diff --git a/codeBB/BB_Code.py b/codeBB/BB_Code.py deleted file mode 100644 index 8c0d337..0000000 --- a/codeBB/BB_Code.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -import codeHL -import re - - -## -## @brief Transcode balise : -## [code language=cpp] -## int main(void) { -## return 0; -## } -## [/code] -## @param[in] value String to transform. -## @return Transformed string. -## -def transcode(value): - #value = re.sub(r'\[code(( |\t|\n|\r)+style=(.*))?\](.*?)\[/code\]', - value = re.sub(r'\[code(( |\t|\n|\r)+style=(.*?))?\](.*?)\[/code\]', - replace_code, #r'
\4
', - value, - flags=re.DOTALL) - - # TODO : remove the basic indentation of the element (to have a better display in the text tutorial ... - return value - - - -def replace_code(match): - if match.group() == "": - return "" - #debug.info("plop: " + str(match.groups())) - value = codeHL.transcode(match.groups()[2], match.groups()[3]) - return '
' + value + '
' - diff --git a/codeBB/BB_Image.py b/codeBB/BB_Image.py deleted file mode 100644 index 7428cfb..0000000 --- a/codeBB/BB_Image.py +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -import re - - -## -## @brief Transcode balise: -## [img w=125 h=45]dossier/image.jpg[/img] -## [img w=125 h=45]http://plop.com/dossier/image.png[/img] -## @param[in] value String to transform. -## @return Transformed string. -## -def transcode(value): - - return value - - diff --git a/codeBB/BB_IndentAndDot.py b/codeBB/BB_IndentAndDot.py deleted file mode 100644 index a4de40b..0000000 --- a/codeBB/BB_IndentAndDot.py +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -import re - - -## -## @brief Transcode -## commencez les ligne par ":" comme: -## : 1 -## : 2 -## ::2.1 -## ::2.2 -## :::2.2.1 -## ::::2.2.1.1 -## :::::2.2.1.1.1 -## ::2.3 -## :3 -## resultat: -## -## 1 -## 2 -## 2.1 -## 2.2 -## 2.2.1 -## 2.2.1.1 -## 2.3 -## 3 -## -## note: lorsque vous sautez une ligne, la liste sarraite et en recommence une autre... -## -## Il est possible de mettre des ":" sans ligne appres ce qui genere une ligne vide.. -## -## AND DOT -## **Ma ligne2 star consecutives engendrent des points quelque soit la position dans la ligne... -## -## Resultat: -## -## * premiere ligne -## * deusieme ligne -## @param[in] value String to transform. -## @return Transformed string. -## -def transcode(value): - - value = re.sub(r'\n:', - r'\n:INDENT:', - value) - p = re.compile('((\:INDENT\:(.*?)\n)*)', - flags=re.DOTALL) - value = p.sub(replace_wiki_identation, - value) - - value = re.sub(r'\*\*(.*?)\n', - r'
  • \1
  • ', - value, - flags=re.DOTALL) - - return value - - -def replace_wiki_identation(match): - if match.group() == "": - return "" - #debug.verbose("plop: " + str(match.group())) - value = "" - return transcode(value) diff --git a/codeBB/BB_Link.py b/codeBB/BB_Link.py deleted file mode 100644 index a8dc88b..0000000 --- a/codeBB/BB_Link.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -import re - - -## -## @brief Transcode: -## [http://votre_site.con] => http://votre_site.con -## [http://votre_site.con | text displayed] => text displayed -## [http://votre_site.con text displayed] => text displayed. -## -## @param[in] value String to transform. -## @return Transformed string. -## -def transcode(value): - - - # named link : [[http://plop.html | link name]] - value = re.sub(r'\[\[http://(.*?) \| (.*?)\]\]', - r'\2', - value) - - # direct link : [[http://plop.html]] - value = re.sub(r'\[\[http://(.*?)\]\]', - r'http://\1', - value) - - # direct lib link : [lib[libname]] - value = re.sub(r'\[lib\[(.*?) \| (.*?)\]\]', - r'\2', - value) - - value = re.sub(r'\[doc\[(.*?) \| (.*?)\]\]', - r'\2', - value) - - value = re.sub(r'\[tutorial\[(.*?) \| (.*?)\]\]', - r'\2', - value) - - value = re.sub(r'\[class\[(.*?)\]\]', - replace_link_class, - value) - - """ - p = re.compile('\[\[(.*?)(|(.*?))\]\])', - flags=re.DOTALL) - value = p.sub(replace_link, - value) - """ - return value - -""" -def replace_link(match): - if match.group() == "": - return "" - #debug.verbose("plop: " + str(match.group())) - value = "" - return transcode(value) -""" - -def replace_link_class(match): - if match.group() == "": - return "" - #debug.info("plop: " + str(match.group())) - className = match.groups()[0] - value = re.sub(':', '_', className) - return '' + className + '' - - - diff --git a/codeBB/BB_Specification.py b/codeBB/BB_Specification.py deleted file mode 100644 index dcb6a61..0000000 --- a/codeBB/BB_Specification.py +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -import re - - -## -## @brief Transcode thales specification ... -## @param[in] value String to transform. -## @return Transformed string. -## -def transcode(value): - - return value - - diff --git a/codeBB/BB_Table.py b/codeBB/BB_Table.py deleted file mode 100644 index 97bb348..0000000 --- a/codeBB/BB_Table.py +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -import re - - -## -## @brief Transcode table: -## { | tableau_type_1 -## | [b]colone 1[/b] -## ligne 1 -## | colone 2 ligne 1 -## |--- -## | colone 1 ligne 1 -## | colone 2 ligne 2 -## |} -## Avec autant de ligne et de colone que vous voullez.. -## Il est possible de faire des retour a la ligne dans une case du tableau... -## En bref sa tend a marcher comme sur un Wiki... -## -## result: -## +-------------------------------------+ -## | colone 1 | -## +------------------+------------------+ -## | ligne 1 | colone 2 ligne 1 | -## +------------------+------------------+ -## | colone 1 ligne 1 | colone 2 ligne 2 | -## +------------------+------------------+ -## -## TODO : Create simple table like : -## | colone 1 || -## | ligne 1 | colone 2 ligne 1 | -## | colone 1 ligne 1 | colone 2 ligne 2| -## @param[in] value String to transform. -## @return Transformed string. -## -def transcode(value): - - return value - - diff --git a/codeBB/BB_Text.py b/codeBB/BB_Text.py deleted file mode 100644 index c7a8c0d..0000000 --- a/codeBB/BB_Text.py +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -import re - - -## -## @brief Transcode . -## [b]texte ici[/b] -## [i]texte ici[/i] -## [u]texte ici[/u] -## [strike]texte ici[/strike] -## [color=olive]texte ici[/color] -## [color=#456FF33F]texte ici[/color] -## Left : [left]texte ici[/left] -## Center : [center]texte ici[/center] -## Right : [right]texte ici[/right] -## [size=22]sdfgsdfgsdgsfd[/size] -## [cadre]mettre les code ici[/cadre] -## @param[in] string String to transform. -## @return Transformed string. -## -def transcode(value): - - value = re.sub(r'\[b\](.*?)\[/b\]', - r'\1', - value, - flags=re.DOTALL) - - value = re.sub(r'\[i\](.*?)\[/i\]', - r'\1', - value, - flags=re.DOTALL) - - value = re.sub(r'\[u\](.*?)\[/u\]', - r'\1', - value, - flags=re.DOTALL) - - value = re.sub(r'\[sup\](.*?)\[/sup\]', - r'\1', - value, - flags=re.DOTALL) - - value = re.sub(r'\[sub\](.*?)\[/sub\]', - r'\1', - value, - flags=re.DOTALL) - - value = re.sub(r'\[color=(\#[0-9A-F]{6}|[a-z\-]+)\](.*?)\[/color\]', - r'\2', - value, - flags=re.DOTALL) - - value = re.sub(r'\[center\](.*)\[/center\]', - r'
    \1
    ', - value, - flags=re.DOTALL) - - value = re.sub(r'\[right\](.*?)\[/right\]', - r'
    \1
    ', - value, - flags=re.DOTALL) - - value = re.sub(r'\[left\](.*?)\[/left\]', - r'
    \1
    ', - value, - flags=re.DOTALL) - - value = re.sub(r'\[strike\](.*?)\[/strike\]', - r'\1', - value, - flags=re.DOTALL) - - value = re.sub(r'\[size=(.*?)\](.*?)\[/size\]', - r'\2', - value, - flags=re.DOTALL) - - value = re.sub(r'\[cadre\](.*?)\[/cadre\]', - r'
    \1
    ', - value, - flags=re.DOTALL) - - value = re.sub(r'____(.*?)\n', - r'
    ', - value, - flags=re.DOTALL) - - return value diff --git a/codeBB/BB_Title.py b/codeBB/BB_Title.py deleted file mode 100644 index 7c68199..0000000 --- a/codeBB/BB_Title.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -import re - - -## -## @brief Transcode . -## =?=Page Title=?= -## ==Title 1== -## ===Title 2=== -## ====Title 3==== -## =====Title 4===== -## ======Title 5====== -## @param[in] value String to transform. -## @return Transformed string. -## -def transcode(value): - - value = re.sub(r'=\?=(.*?)=\?=', - r'

    \1

    ', - value) - - value = re.sub(r'\n======(.*?)======', - r'\n
    \1
    ', - value) - - value = re.sub(r'\n=====(.*?)=====', - r'\n

    \1

    ', - value) - - value = re.sub(r'\n====(.*?)====', - r'\n

    \1

    ', - value) - - value = re.sub(r'\n===(.*?)===', - r'\n

    \1

    ', - value) - - value = re.sub(r'\n==(.*?)==', - r'\n

    \1

    ', - '\n' + value) - - value = value[1:] - - return value - - diff --git a/codeBB/BB_comment.py b/codeBB/BB_comment.py deleted file mode 100644 index 4ae35e3..0000000 --- a/codeBB/BB_comment.py +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -import re - - -## -## @brief Transcode balise: -## /* ... */ -## @param[in] value String to transform. -## @return Transformed string. -## -def transcode(value): - - value = re.sub(r'\/\*(.*?)\*\/', - r'', - value, - flags=re.DOTALL) - """ - value = re.sub(r'\/\/(.*?)\n', - r'', - value) - """ - return value - - diff --git a/codeBB/BB_lineReturn.py b/codeBB/BB_lineReturn.py deleted file mode 100644 index a29c7b9..0000000 --- a/codeBB/BB_lineReturn.py +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -import re - - -## -## @brief Transcode balise: -## \n\n ==>
    -## @param[in] value String to transform. -## @return Transformed string. -## -def transcode(value): - - value = re.sub(r'\r\n', - r'\n', - value) - - value = re.sub(r'\n\n', - r'
    ', - value) - - value = re.sub(r'
    ', - r'
    \n', - value) - - return value - - diff --git a/codeBB/codeBB.py b/codeBB/codeBB.py deleted file mode 100644 index 00231f2..0000000 --- a/codeBB/codeBB.py +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -import re -import BB_Title -import BB_Text -import BB_IndentAndDot -import BB_Link -import BB_Image -import BB_Table - -import BB_comment -import BB_lineReturn -import BB_Code -import BB_Specification - -## -## @brief Transcode input data in the corect format. -## @param[in] string String to transform. -## @return Transformed string. -## -def transcode(value): - # remove html property - value = re.sub(r'<', r'<', value) - value = re.sub(r'>', r'>', value) - value = BB_comment.transcode(value) - value = BB_Title.transcode(value) - value = BB_Text.transcode(value) - value = BB_IndentAndDot.transcode(value) - value = BB_Link.transcode(value) - value = BB_Image.transcode(value) - value = BB_Table.transcode(value) - value = BB_Code.transcode(value) - value = BB_Specification.transcode(value) - value = BB_lineReturn.transcode(value) - return value - -## -## @brief transcode a BBcode file in a html file -## @return True if the file is transformed -## -def transcode_file(inputFileName, outputFileName): - inData = lutinTools.FileReadData(inputFileName) - if inData == "": - return False - outData = transcode(inData) - debug.warning(" out: " + outputFileName) - lutinTools.FileWriteData(outputFileName, outData) - return True - - diff --git a/codeHL/codeHL.py b/codeHL/codeHL.py deleted file mode 100644 index 30a8a42..0000000 --- a/codeHL/codeHL.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -import codeHLcpp -import codeHLBBcode -import codeHLJava -import codeHLjson -import codeHLPython -import codeHLXML -import codeHLshell - - -def transcode(type, value): - if type == "c++": - value = codeHLcpp.transcode(value) - elif type == "java": - value = codeHLJava.transcode(value) - elif type == "bbcode": - value = codeHLBBcode.transcode(value) - elif type == "python": - value = codeHLPython.transcode(value) - elif type == "json": - value = codeHLjson.transcode(value) - elif type == "xml": - value = codeHLXML.transcode(value) - elif type == "shell": - value = codeHLshell.transcode(value) - - return value - diff --git a/codeHL/codeHLBBcode.py b/codeHL/codeHLBBcode.py deleted file mode 100644 index d91f2d0..0000000 --- a/codeHL/codeHLBBcode.py +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -import re - - -def transcode(value): - return value diff --git a/codeHL/codeHLJava.py b/codeHL/codeHLJava.py deleted file mode 100644 index d91f2d0..0000000 --- a/codeHL/codeHLJava.py +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -import re - - -def transcode(value): - return value diff --git a/codeHL/codeHLPython.py b/codeHL/codeHLPython.py deleted file mode 100644 index d91f2d0..0000000 --- a/codeHL/codeHLPython.py +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -import re - - -def transcode(value): - return value diff --git a/codeHL/codeHLXML.py b/codeHL/codeHLXML.py deleted file mode 100644 index d91f2d0..0000000 --- a/codeHL/codeHLXML.py +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -import re - - -def transcode(value): - return value diff --git a/codeHL/codeHLcpp.py b/codeHL/codeHLcpp.py deleted file mode 100644 index ab2f79f..0000000 --- a/codeHL/codeHLcpp.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -import re - -listRegExp = [ - [ r'/\*\*(.*?)\*/', 'code-doxygen'], - [ r'/\*(.*?)\*/', 'code-comment'], - [ r'//!(.*?)\n', 'code-doxygen'], - [ r'//(.*?)\n', 'code-comment'], - [ r'#(.*?)\n', 'code-preproc'], - [ r'"((\\"|.)*?)"', 'code-text-quote'], - [ r"'(('|.)*?)'", 'code-text-quote'], - [ r'(inline|const|class|virtual|private|public|protected|friend|const|extern|auto|register|static|volatile|typedef|struct|union|enum)', - 'code-storage-keyword'], - [ r'(bool|BOOL|char(16_t|32_t)?|double|float|u?int(8|16|32|64|128)?(_t)?|long|short|signed|size_t|unsigned|void|(I|U)(8|16|32|64|128))', - 'code-type'], - [ r'(((0(x|X)[0-9a-fA-F]*)|(\d+\.?\d*|\.\d+)((e|E)(\+|\-)?\d+)?)(L|l|UL|ul|u|U|F|f)?)', - 'code-number'], - [ r'(m_[A-Za-z_0-9])', - 'code-member'], - [ r'(( |\t)_[A-Za-z_0-9]*)', - 'code-input-function'], - [ r'(return|goto|if|else|case|default|switch|break|continue|while|do|for|sizeof)( |\t|\(|\{)', - 'code-keyword'], - [ r'((new|delete|try|catch|memset|fopen|fread|fwrite|fgets|fclose|printf|(f|s|diag_)printf|calloc|malloc|realloc|(cyg|sup)_([a-z]|[A-Z]|[0-9]|_)+)( |\t|\())', - 'code-function-system'], - [ r'((\w|_)+[ \t]*\()', - 'code-function-name'], - [ r'(NULL|MAX|MIN|__LINE__|__DATA__|__FILE__|__func__|__TIME__|__STDC__)', - 'code-generic-define'], - [ r'([A-Z_][A-Z_0-9]{3,500})', - 'code-macro"'], - [ r'(==|>=|<=|!=|>{1,2}|<{1,2}|&&|\{|\})', - 'code-operator'], - [ r'(true|TRUE|false|FALSE)', - '' - outValue += result.group() - outValue += '' - - # change the input value - inValue = inValue[result.end():] - # Search again ... - result = re.search(reg1, inValue, re.DOTALL) - outValue += inValue - return outValue diff --git a/codeHL/codeHLjson.py b/codeHL/codeHLjson.py deleted file mode 100644 index acfafb2..0000000 --- a/codeHL/codeHLjson.py +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -import re - - -def transcode(value): - return value - diff --git a/codeHL/codeHLshell.py b/codeHL/codeHLshell.py deleted file mode 100644 index 67ccb0b..0000000 --- a/codeHL/codeHLshell.py +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -import re - -listRegExp = [ - [ r'#(.*?)\n', r'#\1\n'] -] - -def transcode(value): - for reg1, reg2 in listRegExp: - value = re.sub(reg1, reg2, value, flags=re.DOTALL) - return value diff --git a/cppParser/Class.py b/cppParser/Class.py deleted file mode 100644 index e3fed09..0000000 --- a/cppParser/Class.py +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import Node - - -## -## @brief transform template descrption in one element. -## @param[in] list of elements. ex : 'public', 'ewol::classee', '<', 'plop', '<', 'uint8_t', ',', 'int32_t', '>', '>' -## @return a simplify list. ex : 'public', 'ewol::classee>' -## -def concatenate_template(list): - # TODO ... - return list - -class Class(Node.Node): - def __init__(self, stack=[], file="", lineNumber=0): - # check input : - if len(stack) < 2: - debug.error("Can not parse class : " + str(stack)) - return - Node.Node.__init__(self, 'class', stack[1], file, lineNumber) - self.subList = [] - self.access = "private" - # heritage list : - self.inherit = [] - if len(stack) == 2: - # just a simple class... - return - if len(stack) == 3: - debug.error("error in parsing class : " + str(stack)) - return - if stack[2] != ':': - debug.error("error in parsing class : " + str(stack) + " missing ':' at the 3rd position ...") - - list = concatenate_template(stack[3:]) - debug.verbose("inherit : " + str(list)) - access = "private" - for element in list: - if element in ['private', 'protected', 'public']: - access = element - elif element == ',': - pass - else: - self.inherit.append({'access' : access, 'class' : element}) - - debug.verbose("class : " + self.to_str()) - - def to_str(self) : - ret = "class " + self.name - if len(self.inherit) != 0 : - ret += " : " - isFirst = True - for element in self.inherit: - if isFirst == False: - ret += ", " - isFirst = False - ret += element['access'] + " " + element['class'] - ret += " { ... };" - return ret - - - diff --git a/cppParser/Enum.py b/cppParser/Enum.py deleted file mode 100644 index 17a7f6b..0000000 --- a/cppParser/Enum.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import Node - -class Enum(Node.Node): - def __init__(self, stack=[], file="", lineNumber=0): - self.baseValue = 0; - # check input : - if len(stack) < 2: - debug.error("Can not parse class : " + str(stack)) - return - self.typedef = False - if stack[0] == 'typedef': - self.typedef = True - stack[1:] - - Node.Node.__init__(self, 'enum', stack[1], file, lineNumber) - - self.listElement = [] - - def to_str(self) : - return "enum " + self.name + " { ... };" - - def enum_append(self, stack): - subList = [] - tmp = [] - for element in stack: - if element == ',': - subList.append(tmp) - tmp = [] - else: - tmp.append(element) - if len(tmp) != 0: - subList.append(tmp) - - #debug.verbose(" TODO : Need to append enum : " + str(subList)) - for element in subList: - value = "" - if len(element) > 2: - if element[1] == '=': - for tmp in element[2:]: - value += tmp - self.listElement.append({'name' : element[0], 'value' : value}) - - debug.verbose("enum list : " + str(self.listElement)) \ No newline at end of file diff --git a/cppParser/Library.py b/cppParser/Library.py deleted file mode 100644 index ed0c803..0000000 --- a/cppParser/Library.py +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/python -import LutinDebug as debug -import os -import sys -import re - -class Libray(): - def __init__(self, libName): - self.name = libName - # CPP section: - self.namespaces = [] - self.classes = [] - # C section: - self.structs = [] - self.variables = [] - self.methodes = [] - self.unions = [] - self.types = [] - - - diff --git a/cppParser/Methode.py b/cppParser/Methode.py deleted file mode 100644 index a463148..0000000 --- a/cppParser/Methode.py +++ /dev/null @@ -1,102 +0,0 @@ -##!/usr/bin/python -import lutinDebug as debug -import Node -import Type -import Variable - -class Methode(Node.Node): - def __init__(self, stack=[], file="", lineNumber=0): - name = "" - type = 'methode' - self.virtual = False - self.virtualPure = False - self.static = False - self.inline = False - self.const = False # the end of line cont methode is sont for the class ... - - # remove constructer inside declaration ... - if ':' in stack: - res = [] - for element in stack: - if element != ':': - res.append(element) - else: - break - stack = res - - if stack[len(stack)-2] == '=' \ - and stack[len(stack)-1] == '0': - stack = stack[:len(stack)-2] - self.virtualPure = True - - if stack[0] == 'virtual': - self.virtual = True - stack = stack[1:] - if stack[0] == 'static': - self.static = True - stack = stack[1:] - if stack[0] == 'inline': - self.inline = True - stack = stack[1:] - if stack[len(stack)-1] == 'const': - self.const = True - stack = stack[:len(stack)-1] - - namePos = -1 - - debug.verbose("methode parse : " + str(stack)) - for iii in range(0, len(stack)-2): - if stack[iii+1] == '(': - name = stack[iii] - namePos = iii - break; - - if namePos == 0: - debug.verbose("start with '" + str(name[0]) + "'") - if name[0] == '~': - type = 'destructor' - else: - type = 'constructor' - debug.verbose("methode name : " + name) - Node.Node.__init__(self, type, name, file, lineNumber) - - self.returnType = Type.TypeNone() - self.variable = [] - - # create the return Type (Can be Empty) - retTypeStack = stack[:namePos] - debug.verbose("return : " + str(retTypeStack)) - self.returnType = Type.Type(retTypeStack) - - parameterStack = stack[namePos+2:len(stack)-1] - debug.verbose("parameter : " + str(parameterStack)) - paramTmp = [] - for element in parameterStack: - if element == ',': - self.variable.append(Variable.Variable(paramTmp)) - paramTmp = [] - else: - paramTmp.append(element) - if len(paramTmp) != 0: - self.variable.append(Variable.Variable(paramTmp)) - - def to_str(self): - ret = "" - if self.virtual == True: - ret += "virtual " - if self.static == True: - ret += "static " - if self.inline == True: - ret += "inline " - ret += self.returnType.to_str() - ret += " " - ret += self.name - ret += "(" - # ... - ret += ")" - if self.virtualPure == True: - ret += " = 0" - if self.const == True: - ret += " const" - return ret - \ No newline at end of file diff --git a/cppParser/Namespace.py b/cppParser/Namespace.py deleted file mode 100644 index fa3817c..0000000 --- a/cppParser/Namespace.py +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import Node - -class Namespace(Node.Node): - def __init__(self, stack=[], file="", lineNumber=0): - if len(stack) != 2: - debug.error("Can not parse namespace : " + str(stack)) - Node.Node.__init__(self, 'namespace', stack[1], file, lineNumber) - # enable sub list - self.subList = [] - - debug.verbose("find namespace : " + self.to_str()) - - def to_str(self) : - return "namespace " + self.name + " { ... };" - - diff --git a/cppParser/Node.py b/cppParser/Node.py deleted file mode 100644 index 69a7c39..0000000 --- a/cppParser/Node.py +++ /dev/null @@ -1,122 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug - -accessList = ['private', 'protected', 'public'] - -def debug_space(level): - ret = "" - for iii in range(0,level): - ret += " " - return ret - -class Node(): - def __init__(self, type, name="", file="", lineNumber=0): - self.nodeType = type - self.name = name - self.doc = None - self.fileName = file - self.lineNumber = lineNumber - self.subList = None - self.access = None - - def to_str(self): - return "" - - def str(self): - return self.to_str() - - def get_node_type(self): - return self.nodeType - - def get_name(self): - return self.name - - def debug_display(self, level=0, access = None): - if access == 'private': - debug.info(debug_space(level) + "- " + self.nodeType + " => " + self.name) - elif access == 'protected': - debug.info(debug_space(level) + "# " + self.nodeType + " => " + self.name) - elif access == 'public': - debug.info(debug_space(level) + "+ " + self.nodeType + " => " + self.name) - else: - debug.info(debug_space(level) + self.nodeType + " => " + self.name) - if self.subList!= None: - for element in self.subList: - if 'access' in element.keys(): - element['node'].debug_display(level+1, element['access']) - else: - element['node'].debug_display(level+1) - - def set_access(self, access): - if access not in accessList: - debug.warning("This is not a valid access : '" + access + "' : availlable : " + str(accessList)) - return - if self.access == None: - debug.error("This Node does not support acces configuration...") - return - self.access = access - - def get_access(self): - return self.access - - def append(self, newSubElement): - # just add it in a sub List : - if self.subList == None: - debug.error("can not add a '" + newSubElement.nodeType + "' at this '" + self.nodeType + "'") - return - if newSubElement.get_node_type() != 'namespace': - if self.access == None: - self.subList.append({'node' : newSubElement}) - else: - self.subList.append({'access' : self.access, 'node' : newSubElement}) - return - - # check if the element already exist - for element in self.subList: - if element['node'].get_node_type() == 'namespace': - if element['node'].get_name() == newSubElement.get_name(): - debug.verbose("fusionate with previous declaration") - element['node'].fusion(newSubElement) - return - # normal case adding : - if self.access == None: - self.subList.append({'node' : newSubElement}) - else: - self.subList.append({'access' : self.access, 'node' : newSubElement}) - - ## - ## @ brief only for namespace : - ## - ## - def fusion(self, addedElement): - for element in addedElement.subList: - self.append(element['node']) - - ## - ## @brief Get the list of all specify type - ## @param[in] type Type requested ['namespace', 'class', 'struct', 'methode', 'enum', 'define', 'union', 'variable', 'constructor', 'destructor'] (can be a list) - ## @param[in] sorted Request to sort the return list. - ## @return The requested list or [] - ## - def get_all_sub_type(self, type='all', sorted = False): - if type == 'all': - return self.subList - if isinstance(type, list) == False: - type = [type] - if self.subList == None: - return [] - ret = [] - for element in self.subList: - if element['node'].get_node_type() in type: - ret.append(element) - if sorted == True: - # TODO : Sorted the list ... - pass - return ret - - - -class MainNode(Node): - def __init__(self, type="library", name=""): - Node.__init__(self, type, name) - self.subList = [] \ No newline at end of file diff --git a/cppParser/Parse.py b/cppParser/Parse.py deleted file mode 100644 index f9aa8ac..0000000 --- a/cppParser/Parse.py +++ /dev/null @@ -1,494 +0,0 @@ -#!/usr/bin/python -import os -import sys -import re - -import lex - -import inspect -import lutinDebug as debug -import lutinTools -import Class -import Namespace -import Struct -import Union -import Methode -import Enum -import Variable -import Node - -tokens = [ - 'NUMBER', - 'NAME', - 'OPEN_PAREN', - 'CLOSE_PAREN', - 'OPEN_BRACE', - 'CLOSE_BRACE', - 'OPEN_SQUARE_BRACKET', - 'CLOSE_SQUARE_BRACKET', - 'COLON', - 'SEMI_COLON', - 'COMMA', - 'TAB', - 'BACKSLASH', - 'PIPE', - 'PERCENT', - 'EXCLAMATION', - 'CARET', - 'COMMENT_SINGLELINE', - 'COMMENT_MULTILINE', - 'PRECOMP_MACRO', - 'PRECOMP_MACRO_CONT', - 'ASTERISK', - 'AMPERSTAND', - 'EQUALS', - 'MINUS', - 'PLUS', - 'DIVIDE', - 'CHAR_LITERAL', - 'STRING_LITERAL', - 'NEW_LINE', - 'SQUOTE', -] - -t_ignore = " \r.?@\f" -t_NUMBER = r'[0-9][0-9XxA-Fa-f]*' -t_NAME = r'[<>A-Za-z_~][A-Za-z0-9_]*' -t_OPEN_PAREN = r'\(' -t_CLOSE_PAREN = r'\)' -t_OPEN_BRACE = r'{' -t_CLOSE_BRACE = r'}' -t_OPEN_SQUARE_BRACKET = r'\[' -t_CLOSE_SQUARE_BRACKET = r'\]' -t_SEMI_COLON = r';' -t_COLON = r':' -t_COMMA = r',' -t_TAB = r'\t' -t_BACKSLASH = r'\\' -t_PIPE = r'\|' -t_PERCENT = r'%' -t_CARET = r'\^' -t_EXCLAMATION = r'!' -t_PRECOMP_MACRO = r'\#.*' -t_PRECOMP_MACRO_CONT = r'.*\\\n' -def t_COMMENT_SINGLELINE(t): - r'\/\/.*\n' - global doxygenCommentCache - if t.value.startswith("///") or t.value.startswith("//!"): - if doxygenCommentCache: - doxygenCommentCache += "\n" - if t.value.endswith("\n"): - doxygenCommentCache += t.value[:-1] - else: - doxygenCommentCache += t.value - t.lexer.lineno += len(filter(lambda a: a=="\n", t.value)) -t_ASTERISK = r'\*' -t_MINUS = r'\-' -t_PLUS = r'\+' -t_DIVIDE = r'/(?!/)' -t_AMPERSTAND = r'&' -t_EQUALS = r'=' -t_CHAR_LITERAL = "'.'" -t_SQUOTE = "'" -#found at http://wordaligned.org/articles/string-literals-and-regular-expressions -#TODO: This does not work with the string "bla \" bla" -t_STRING_LITERAL = r'"([^"\\]|\\.)*"' -#Found at http://ostermiller.org/findcomment.html -def t_COMMENT_MULTILINE(t): - r'/\*([^*]|\n|(\*+([^*/]|\n)))*\*+/' - global doxygenCommentCache - if t.value.startswith("/**") or t.value.startswith("/*!"): - #not sure why, but get double new lines - v = t.value.replace("\n\n", "\n") - #strip prefixing whitespace - v = re.sub("\n[\s]+\*", "\n*", v) - doxygenCommentCache += v - t.lexer.lineno += len(filter(lambda a: a=="\n", t.value)) -def t_NEWLINE(t): - r'\n+' - t.lexer.lineno += len(t.value) - -def t_error(v): - print( "Lex error: ", v ) - -lex.lex() - -doxygenCommentCache = "" - - -## -## @brief Join the class name element : ['class', 'Bar', ':', ':', 'Foo'] -> ['class', 'Bar::Foo'] -## @param table Input table to convert. ex: [':', '\t', 'class', 'Bar', ':', ':', 'Foo'] -## @return The new table. ex: ['class', 'Bar::Foo'] -## -def create_compleate_class_name(table): - if "::" not in "".join(table): - out = table - else: - # we need to convert it : - out = [] - for name in table: - if len(out) == 0: - out.append(name) - elif name == ":" \ - and out[-1].endswith(":"): - out[-1] += name - elif out[-1].endswith("::"): - out[-2] += out[-1] + name - del out[-1] - else: - out.append(name) - table = out - if 'operator' not in "".join(table): - out = table - else: - out = [] - for name in table: - if len(out) == 0: - out.append(name) - elif name in ['<','>','='] \ - and out[-1][:8] == 'operator' \ - and len(out[-1])-8 < 2: - out[-1] += name - else: - out.append(name) - - return out - - -class parse_file(): - - def gen_debug_space(self): - ret = "[" + str(len(self.braceDepthType)+1) + "]" - for iii in range(0,len(self.braceDepthType)): - ret += " " - return ret - - def fusion(self, baseNode): - baseNode.fusion(self.mainNode) - return baseNode - - def __init__(self, fileName): - self.mainNode = Node.MainNode("main-node", "tmp") - self.m_elementParseStack = [] - debug.debug("Parse file : '" + fileName + "'") - - self.headerFileName = fileName - - self.anon_union_counter = [-1, 0] - # load all the file data : - headerFileStr = lutinTools.FileReadData(fileName) - - # Strip out template declarations - # TODO : What is the real need ??? - headerFileStr = re.sub("template[\t ]*<[^>]*>", "", headerFileStr) - # remove all needed \r unneeded ==> this simplify next resExp ... - headerFileStr = re.sub("\r", "\r\n", headerFileStr) - headerFileStr = re.sub("\r\n\n", "\r\n", headerFileStr) - headerFileStr = re.sub("\r", "", headerFileStr) - # TODO : Can generate some error ... - headerFileStr = re.sub("\#if 0(.*?)(\#endif|\#else)", "", headerFileStr, flags=re.DOTALL) - headerFileafter = re.sub("\@interface(.*?)\@end", "", headerFileStr, flags=re.DOTALL) - if headerFileStr != headerFileafter : - debug.debug(" Objective C interface ... ==> not supported") - return - - debug.verbose(headerFileStr) - - # Change multi line #defines and expressions to single lines maintaining line nubmers - matches = re.findall(r'(?m)^(?:.*\\\n)+.*$', headerFileStr) - is_define = re.compile(r'[ \t\v]*#[Dd][Ee][Ff][Ii][Nn][Ee]') - for m in matches: - #Keep the newlines so that linecount doesnt break - num_newlines = len(filter(lambda a: a=="\n", m)) - if is_define.match(m): - new_m = m.replace("\n", "<**multiLine**>\\n") - else: - # Just expression taking up multiple lines, make it take 1 line for easier parsing - new_m = m.replace("\\\n", " ") - if (num_newlines > 0): - new_m += "\n"*(num_newlines) - headerFileStr = headerFileStr.replace(m, new_m) - - #Filter out Extern "C" statements. These are order dependent - headerFileStr = re.sub(r'extern( |\t)+"[Cc]"( |\t)*{', "{", headerFileStr) - - ###### debug.info(headerFileStr) - self.stack = [] # token stack to find the namespace and the element name ... - self.nameStack = [] # - self.braceDepth = 0 - self.braceDepthType = [] - self.subModuleCountBrace = 0; - lex.lex() - lex.input(headerFileStr) - self.curLine = 0 - self.curChar = 0 - while True: - tok = lex.token() - if not tok: - break - debug.debug("TOK: " + str(tok)) - self.stack.append( tok.value ) - self.curLine = tok.lineno - self.curChar = tok.lexpos - # special case to remove internal function define in header: - if self.previous_is('function') == True: - if tok.type == 'OPEN_BRACE': - self.subModuleCountBrace += 1 - elif tok.type == 'CLOSE_BRACE': - self.subModuleCountBrace -= 1 - if self.subModuleCountBrace <= 0: - self.brace_type_pop() - continue - # normal case: - if (tok.type in ('PRECOMP_MACRO', 'PRECOMP_MACRO_CONT')): - debug.debug("PRECOMP: " + str(tok)) - self.stack = [] - self.nameStack = [] - # Do nothing for macro ==> many time not needed ... - continue - if tok.type == 'OPEN_BRACE': - # When we open a brace, this is the time to parse the stack ... - # Clean the stack : (remove \t\r\n , and concatenate the 'xx', ':', ':', 'yy' in 'xx::yy', - self.nameStack = create_compleate_class_name(self.nameStack) - if len(self.nameStack) <= 0: - #open brace with no name ... - self.brace_type_push('empty', []) - elif is_a_function(self.nameStack): - # need to parse sub function internal description... - self.subModuleCountBrace = 1 - self.brace_type_push('function', self.nameStack) - elif 'namespace' in self.nameStack: - self.brace_type_push('namespace', self.nameStack) - elif 'class' in self.nameStack: - self.brace_type_push('class', self.nameStack) - elif 'enum' in self.nameStack: - self.brace_type_push('enum', self.nameStack) - elif 'struct' in self.nameStack: - self.brace_type_push('struct', self.nameStack) - elif 'typedef' in self.nameStack: - self.brace_type_push('typedef', self.nameStack) - elif 'union' in self.nameStack: - self.brace_type_push('union', self.nameStack) - else: - self.brace_type_push('unknow', self.nameStack) - self.stack = [] - self.nameStack = [] - elif tok.type == 'CLOSE_BRACE': - if len(self.nameStack) != 0: - if self.previous_is('enum') == True: - self.brace_type_append('enum list', self.nameStack); - else: - debug.warning(self.gen_debug_space() + "end brace DROP : " + str(self.nameStack)); - self.stack = [] - self.nameStack = [] - self.brace_type_pop() - self.nameStack = create_compleate_class_name(self.nameStack) - if tok.type == 'OPEN_PAREN': - self.nameStack.append(tok.value) - elif tok.type == 'CLOSE_PAREN': - self.nameStack.append(tok.value) - elif tok.type == 'OPEN_SQUARE_BRACKET': - self.nameStack.append(tok.value) - elif tok.type == 'CLOSE_SQUARE_BRACKET': - self.nameStack.append(tok.value) - elif tok.type == 'TAB': - pass - elif tok.type == 'EQUALS': - self.nameStack.append(tok.value) - elif tok.type == 'COMMA': - self.nameStack.append(tok.value) - elif tok.type == 'BACKSLASH': - self.nameStack.append(tok.value) - elif tok.type == 'PIPE': - self.nameStack.append(tok.value) - elif tok.type == 'PERCENT': - self.nameStack.append(tok.value) - elif tok.type == 'CARET': - self.nameStack.append(tok.value) - elif tok.type == 'EXCLAMATION': - self.nameStack.append(tok.value) - elif tok.type == 'SQUOTE': - pass - elif tok.type == 'NUMBER': - self.nameStack.append(tok.value) - elif tok.type == 'MINUS': - self.nameStack.append(tok.value) - elif tok.type == 'PLUS': - self.nameStack.append(tok.value) - elif tok.type == 'STRING_LITERAL': - self.nameStack.append(tok.value) - elif tok.type == 'NAME' \ - or tok.type == 'AMPERSTAND' \ - or tok.type == 'ASTERISK' \ - or tok.type == 'CHAR_LITERAL': - self.nameStack.append(tok.value) - elif tok.type == 'COLON': - if self.nameStack[0] in Node.accessList: - debug.debug(self.gen_debug_space() + "change visibility : " + self.nameStack[0]); - self.brace_type_change_access(self.nameStack[0]) - self.nameStack = [] - self.stack = [] - else : - self.nameStack.append(tok.value) - elif tok.type == 'SEMI_COLON': - if len(self.nameStack) != 0: - self.nameStack = create_compleate_class_name(self.nameStack) - if is_a_function(self.nameStack): - self.brace_type_append('function', self.nameStack); - elif 'namespace' in self.nameStack: - debug.debug(self.gen_debug_space() + "find a namespace DECLARATION : " + str(self.nameStack)); - elif 'class' in self.nameStack: - debug.debug(self.gen_debug_space() + "find a class DECLARATION : " + str(self.nameStack)); - elif 'enum' in self.nameStack: - debug.debug(self.gen_debug_space() + "find a enum DECLARATION : " + str(self.nameStack)); - elif 'struct' in self.nameStack: - debug.debug(self.gen_debug_space() + "find a struct DECLARATION : " + str(self.nameStack)); - elif 'typedef' in self.nameStack: - debug.info(self.gen_debug_space() + "find a typedef DECLARATION : " + str(self.nameStack)); - elif 'union' in self.nameStack: - debug.debug(self.gen_debug_space() + "find a union DECLARATION : " + str(self.nameStack)); - else: - if self.previous_is('enum') == True: - self.brace_type_append('enum list', self.nameStack); - else: - # TODO : Check if it is true in all case : - self.brace_type_append('variable', self.nameStack); - #debug.warning(self.gen_debug_space() + "semicolumn : " + str(self.nameStack)); - self.stack = [] - self.nameStack = [] - #self.debug_display(); - - def debug_display(self): - debug.info("Debug display :") - self.mainNode.debug_display(1) - - def create_element(self, type, stack): - ret = None - if type == 'empty' \ - or type == 'enum list': - pass - elif type == 'namespace': - ret = Namespace.Namespace(stack, self.headerFileName, self.curLine) - elif type == 'class': - ret = Class.Class(stack, self.headerFileName, self.curLine) - elif type == 'struct': - ret = Struct.Struct(stack, self.headerFileName, self.curLine) - elif type == 'typedef': - #ret = Namespace.Namespace(stack, self.headerFileName, self.curLine) - # TODO ... - pass - elif type == 'union': - ret = Union.Union(stack, self.headerFileName, self.curLine) - elif type == 'function': - ret = Methode.Methode(stack, self.headerFileName, self.curLine) - elif type == 'enum': - ret = Enum.Enum(stack, self.headerFileName, self.curLine) - elif type == 'variable': - ret = Variable.Variable(stack, self.headerFileName, self.curLine) - else: - debug.error("unknow type ...") - return ret - - def brace_type_push(self, type, stack): - debug.debug(self.gen_debug_space() + "find a <<" + type + ">> : " + str(stack)); - myClassElement = self.create_element(type, stack) - element = { 'type' : type, - 'stack' : stack, - 'node' : myClassElement - } - self.braceDepthType.append(element) - #debug.info ("append : " + str(element)) - - def brace_type_append_current(self, element, id = -50): - if id == -50: - id = len(self.braceDepthType)-1 - if id >= 0: - while self.braceDepthType[id]['node'] == None: - # special case for empty brace, just add it to the upper - id -=1 - if id < 0: - break; - if id < 0: - self.mainNode.append(element) - else: - self.braceDepthType[id]['node'].append(element) - - def brace_type_append(self, type, stack): - debug.debug(self.gen_debug_space() + " append a <<" + type + ">> : " + str(stack)); - lastType = self.get_last_type() - newType = self.create_element(type, stack) - if newType != None: - self.brace_type_append_current(newType) - return - # enum sub list: - if lastType == 'enum' \ - and type == 'enum list': - id = len(self.braceDepthType)-1 - self.braceDepthType[id]['node'].enum_append(stack) - return - debug.info("TODO : Parse the special type") - - def brace_type_pop(self): - id = len(self.braceDepthType)-1 - if id < 0: - debug.warning("Try to pop the stack with No more element ...") - return - if self.braceDepthType[id]['node'] == None: - # nothing to add at the upper ... - pass - else: - # add it on the previous - self.brace_type_append_current(self.braceDepthType[id]['node'], id-1) - self.braceDepthType.pop() - - def brace_type_change_access(self, newOne): - if newOne not in Node.accessList: - debug.error("unknow access type : " + newOne) - return - id = len(self.braceDepthType)-1 - if id >= 0: - while self.braceDepthType[id]['node'] == None: - # special case for empty brace, just add it to the upper - id -=1 - if id < 0: - break; - if id < 0: - debug.warning("can not change the main access on the library") - else: - if self.braceDepthType[id]['node'].get_access() == None: - debug.error("Can not set access in other as : 'class' or 'struct' :" + str(self.braceDepthType[id])) - return - self.braceDepthType[id]['node'].set_access(newOne) - - def previous_is(self, type): - if self.get_last_type() == type: - return True - return False - - def get_last_type(self): - if len(self.braceDepthType) > 0: - return self.braceDepthType[len(self.braceDepthType)-1]['type'] - return None - -def is_a_function(stack) : - # in a function we need to have functionName + ( + ) - if len(stack) < 3: - return False - if ':' in stack: - res = [] - for element in stack: - if element != ':': - res.append(element) - else: - break - stack = res - if stack[len(stack)-2] == '=' \ - and stack[len(stack)-1] == '0': - stack = stack[:len(stack)-2] - #can end with 2 possibilities : ')', 'const' or ')' - if stack[len(stack)-1] == ')' \ - or ( stack[len(stack)-2] == ')' \ - and stack[len(stack)-1] == 'const'): - return True - return False \ No newline at end of file diff --git a/cppParser/Struct.py b/cppParser/Struct.py deleted file mode 100644 index dde1a1f..0000000 --- a/cppParser/Struct.py +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import Node - -class Struct(Node.Node): - def __init__(self, stack=[], file="", lineNumber=0): - name = "" - Node.Node.__init__(self, 'struct', name, file, lineNumber) - self.access = "public" - - - def to_str(self) : - return "struct " + self.name + " { ... };" - - diff --git a/cppParser/Type.py b/cppParser/Type.py deleted file mode 100644 index d159f03..0000000 --- a/cppParser/Type.py +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import Type -import Node - -class Type(): - def __init__(self, stack=[]): - self.name = "" - self.const = False # the const xxxxx - self.reference = False - self.constVar = False # the char* const VarName - - if len(stack) == 0: - # None type - return - if len(stack) == 1: - self.name = stack[0] - return; - # check end const - if stack[len(stack)-1] == 'const': - self.constVar = True - stack = stack[:len(stack)-1] - # check if element is a reference ... - if stack[len(stack)-1] == '&': - self.reference = True - stack = stack[:len(stack)-1] - # che k if it start with const ... - if stack[0] == 'const': - self.const = True - stack = stack[1:] - - self.name = "" - for element in stack: - self.name += element - - def to_str(self) : - ret = "" - if self.const == True: - ret += "const " - ret += self.name - if self.reference == True: - ret += " &" - if self.constVar == True: - ret += " const" - return ret - -class TypeVoid(Type): - def __init__(self): - Type.__init__(self, ['void']) - -class TypeNone(Type): - def __init__(self): - Type.__init__(self) - diff --git a/cppParser/Union.py b/cppParser/Union.py deleted file mode 100644 index 3fcb529..0000000 --- a/cppParser/Union.py +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import Node - -class Union(Node.Node): - def __init__(self, stack=[], file="", lineNumber=0): - name = "" - Node.Node.__init__(self, 'union', name, file, lineNumber) - self.list = [] - - def to_str(self) : - return "union " + self.name + " { ... };" - - diff --git a/cppParser/Variable.py b/cppParser/Variable.py deleted file mode 100644 index 73afa62..0000000 --- a/cppParser/Variable.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import Type -import Node - -class Variable(Node.Node): - def __init__(self, stack=[], file="", lineNumber=0): - name = "" - # TODO : better manageement for xxx[**][**] element: - res = [] - for element in stack: - if element == '[': - break - else: - res.append(element) - stack = res - - if len(stack) < 2: - if stack[0] == 'void': - pass - else: - debug.error("Can not parse variable : " + str(stack)) - else: - name = stack[len(stack)-1] - - Node.Node.__init__(self, 'variable', stack[len(stack)-1], file, lineNumber) - # force the sublist error generation ... - self.subList = None - # default variable : - self.type = Type.TypeVoid() - self.static = False - self.external = False - self.volatile = False - #empty name ... ==> this is really bad ... - if name == "": - return - - if 'static' in stack: - self.static = True - stack = [value for value in stack if value != 'static'] - if 'volatile' in stack: - self.volatile = True - stack = [value for value in stack if value != 'volatile'] - if 'external' in stack: - self.external = True - stack = [value for value in stack if value != 'external'] - - self.type = Type.Type(stack[:len(stack)-1]) - - debug.verbose("find variable : " + self.to_str()) - - def to_str(self) : - ret = "" - if self.external == True: - ret += "external " - if self.volatile == True: - ret += "volatile " - if self.static == True: - ret += "static " - ret += self.type.to_str() - ret += " " - ret += self.name - return ret - diff --git a/lutin.py b/lutin.py index 07954c3..9579bf0 100755 --- a/lutin.py +++ b/lutin.py @@ -41,11 +41,9 @@ def usage(): print " Clean all (same as previous)" print " dump" print " Dump all the module dependency and properties" - print " doc" - print " Create documentation of all module that is mark as availlable on it" listOfAllModule = lutinModule.ListAllModuleWithDesc() for mod in listOfAllModule: - print " " + mod[0] + " / " + mod[0] + "-clean / " + mod[0] + "-dump" + mod[0] + "-doc" + print " " + mod[0] + " / " + mod[0] + "-clean / " + mod[0] + "-dump" print " " + mod[1] print " ex: " + sys.argv[0] + " all --target=Android all -t Windows -m debug all" exit(0) diff --git a/lutinDoc.py b/lutinDoc.py deleted file mode 100644 index a7ec357..0000000 --- a/lutinDoc.py +++ /dev/null @@ -1,269 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -# TODO : Add try of generic input ... -sys.path.append(lutinTools.GetCurrentPath(__file__) + "/ply/ply/") -sys.path.append(lutinTools.GetCurrentPath(__file__) + "/cppParser/") -sys.path.append(lutinTools.GetCurrentPath(__file__) + "/codeBB/") -sys.path.append(lutinTools.GetCurrentPath(__file__) + "/codeHL/") -import Parse -import Node -import lutinDocHtml -import lutinDocMd -import os -import fnmatch - -## -## @brief Main Documantion class -## @param[in] moduleName Name of the module of this element -## -class doc: - def __init__(self, moduleName): - self.moduleName = moduleName - self.listDocFile = [] - self.structureLib = Node.MainNode("library", moduleName) - self.listTutorialFile = [] - self.target = None - self.webSite = "" - self.pathParsing = "" - self.pathGlobalDoc = "" - self.externalLink = [] - self.title = moduleName + " Library" - self.styleHtml = "" - - ## - ## @brief Set the module website (activate only when compile in release mode, else "../moduleName/) - ## @param[in] url New Website url - ## - def set_website(self, url): - self.webSite = url - - ## - ## @brief set the parsing folder - ## @param[in] path New path to parse - ## - def set_path(self, path): - self.pathParsing = path - - ## - ## @brief set the glabal documentation parsing folder - ## @param[in] path New path to parse - ## - def set_path_general_doc(self, path): - self.pathGlobalDoc = path - - ## - ## @brief List of validate external library link (disable otherwise) - ## @param[in] availlable List of all module link availlable - ## - def set_external_link(self, availlable): - self.externalLink = availlable - - ## - ## @brief Set the library title - ## @param[in] title New title to set. - ## - def set_title(self, title): - self.title = title - - ## - ## @brief new html basic css file - ## @param[in] file File of the css style sheet - ## - def set_html_css(self, cssFile): - self.styleHtml = cssFile - - ## - ## @brief Create the module documentation: - ## @param[in,out] target target that request generation of the documentation - ## - def doc_parse_code(self): - if self.pathParsing != "": - for root, dirnames, filenames in os.walk(self.pathParsing): - tmpList = fnmatch.filter(filenames, "*.h") - # Import the module : - for filename in tmpList: - fileCompleteName = os.path.join(root, filename) - debug.debug(" Find a file : '" + fileCompleteName + "'") - self.add_file(fileCompleteName) - # display the hierarchie of all the class and namespace ... - #self.structureLib.debug_display() - if self.pathGlobalDoc != "": - for root, dirnames, filenames in os.walk(self.pathGlobalDoc): - tmpList = fnmatch.filter(filenames, "*.bb") - # Import the module : - for filename in tmpList: - fileCompleteName = os.path.join(root, filename) - tutorialPath = os.path.join(self.pathGlobalDoc, "tutorial/") - debug.verbose(" Find a doc file : '" + fileCompleteName + "'") - pathBase = fileCompleteName[len(self.pathGlobalDoc):len(fileCompleteName)-3] - if fileCompleteName[:len(tutorialPath)] == tutorialPath: - self.add_file_doc(fileCompleteName, pathBase) - else: - self.add_tutorial_doc(fileCompleteName, pathBase) - - ## - ## @brief Add a documentation file at the parsing system - ## @param[in] filename File To add at the parsing element system. - ## @param[in] outPath output system file. - ## @return True if no error occured, False otherwise - ## - def add_file_doc(self, filename, outPath): - debug.debug("adding file in documantation : '" + filename + "'"); - self.listDocFile.append([filename, outPath]) - - ## - ## @brief Add a documentation file at the parsing system - ## @param[in] filename File To add at the parsing element system. - ## @param[in] outPath output system file. - ## @return True if no error occured, False otherwise - ## - def add_tutorial_doc(self, filename, outPath): - debug.debug("adding file in documantation : '" + filename + "'"); - self.listTutorialFile.append([filename, outPath]) - - ## - ## @brief Add a file at the parsing system - ## @param[in] filename File To add at the parsing element system. - ## @return True if no error occured, False otherwise - ## - def add_file(self, filename): - debug.debug("adding file in documantation : '" + filename + "'"); - - parsedFile = Parse.parse_file(filename) - self.structureLib = parsedFile.fusion(self.structureLib) - - return True - - ## - ## @brief Generate Documentation at the folder ... - ## @param[in] destFolder Destination folder. - ## @param[in] mode (optinnal) generation output mode {html, markdown ...} - ## - def generate_documantation(self, target, destFolder, mode="html"): - # local store of the target - self.target = target - if mode == "html": - if lutinDocHtml.generate(self, destFolder) == False: - debug.warning("Generation Documentation :'" + mode + "' ==> return an error for " + self.moduleName) - elif mode == "markdown": - # todo ... - None - else: - debug.error("Unknow Documantation mode generation :'" + mode + "'") - self.target = None - return False - self.target = None - return True - - def get_base_doc_node(self): - return self.structureLib - - ## - ## @brief Get the heritage list (parent) of one element. - ## @param[in] element Element name. - ## @return List of all element herited - ## - def get_heritage_list(self, element): - list = [] - # get element class : - if element in self.listClass.keys(): - localClass = self.listClass[element] - if len(localClass['inherits']) != 0: - # TODO : Support multiple heritage ... - isFirst = True - for heritedClass in localClass['inherits']: - if isFirst == True: - list = self.get_heritage_list(heritedClass['class']) - break; - debug.verbose("find parent : " + element) - list.append(element); - return list - - ## - ## @brief Get the heritage list (child) of this element. - ## @param[in] curentClassName Element name. - ## @return List of all childs - ## - def get_down_heritage_list(self, curentClassName): - list = [] - # get element class : - for element in self.listClass: - localClass = self.listClass[element] - if len(localClass['inherits']) != 0: - for heritedClass in localClass['inherits']: - if curentClassName == heritedClass['class']: - list.append(element) - break; - debug.verbose("find childs : " + str(list)) - return list - - ## - ## @brief trnsform the classname in a generic link (HTML) - ## @param[in] elementName Name of the class requested - ## @return [className, link] - ## - def get_class_link(self, elementName): - if elementName == "const" \ - or elementName == "enum" \ - or elementName == "void" \ - or elementName == "char" \ - or elementName == "char32_t" \ - or elementName == "float" \ - or elementName == "double" \ - or elementName == "bool" \ - or elementName == "int8_t" \ - or elementName == "uint8_t" \ - or elementName == "int16_t" \ - or elementName == "uint16_t" \ - or elementName == "int32_t" \ - or elementName == "uint32_t" \ - or elementName == "int64_t" \ - or elementName == "uint64_t" \ - or elementName == "int" \ - or elementName == "T" \ - or elementName == "CLASS_TYPE" \ - or elementName[:5] == "std::" \ - or elementName[:6] == "appl::" \ - or elementName == "&" \ - or elementName == "*" \ - or elementName == "**": - return [elementName, ""] - if elementName in self.listClass.keys(): - link = elementName.replace(":","_") + ".html" - return [elementName, link] - elif elementName in self.listEnum.keys(): - link = elementName.replace(":","_") + ".html" - return [elementName, link] - else: - return self.target.doc_get_link(elementName) - return [elementName, ""] - - ## - ## @brief trnsform the classname in a generic link (HTML) (external access ==> from target) - ## @param[in] elementName Name of the class requested - ## @return [className, link] - ## - def get_class_link_from_target(self, elementName, target): - # reject when auto call : - if self.target != None: - return [elementName, ""] - # search in local list : - if elementName in self.listClass.keys(): - link = elementName.replace(":","_") + ".html" - if target.get_build_mode() == "debug": - return [elementName, "../" + self.moduleName + "/" + link] - elif self.webSite != "": - return [elementName, self.webSite + "/" + link] - elif elementName in self.listEnum.keys(): - link = elementName.replace(":","_") + ".html" - if target.get_build_mode() == "debug": - return [elementName, "../" + self.moduleName + "/" + link] - elif self.webSite != "": - return [elementName, self.webSite + "/" + link] - # did not find : - return [elementName, ""] - - - diff --git a/lutinDocHtml.py b/lutinDocHtml.py deleted file mode 100644 index 4e17ac5..0000000 --- a/lutinDocHtml.py +++ /dev/null @@ -1,627 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -#import CppHeaderParser -import re -import codeBB -import collections - -global_class_link = { - "std::string" : "http://www.cplusplus.com/reference/string/string/", - "std::u16string" : "http://www.cplusplus.com/reference/string/u16string/", - "std::u32string" : "http://www.cplusplus.com/reference/string/u32string/", - "std::wstring" : "http://www.cplusplus.com/reference/string/wstring/", - "std::vector" : "http://www.cplusplus.com/reference/vector/vector/" - } - - -def replace_type(match): - value = "" + match.group() + "" - return value - -def replace_storage_keyword(match): - value = "" + match.group() + "" - return value - -def display_color(valBase): - # storage keyword : - p = re.compile("(inline|const|class|virtual|private|public|protected|friend|const|extern|auto|register|static|volatile|typedef|struct|union|enum)") - val = p.sub(replace_storage_keyword, valBase) - # type : - p = re.compile("(bool|BOOL|char(16_t|32_t)?|double|float|u?int(8|16|32|64|128)?(_t)?|long|short|signed|size_t|unsigned|void|(I|U)(8|16|32|64|128))") - val = p.sub(replace_type, val) - return val, len(valBase) - -def display_type(type, myDoc): - type = type.replace("inline ", "") - lenght = 0; - isFirst = True - out = '' - # we split all the element in list sepa=rated with space to keep class... and standard c+ class - for element in type.split(' '): - if isFirst == False: - out += " " - lenght += 1 - isFirst = False - # check if the element in internal at the current lib - name, link = myDoc.get_class_link(element) - if len(link) != 0: - out += "" + name + "" - lenght += len(element) - # Ckeck if the variable in a standard class: - elif element in global_class_link.keys(): - out += "" + element + "" - lenght += len(element) - else: - data, lenghtTmp = display_color(element) - out += data - lenght += lenghtTmp - # get every subelement class : - return [out,lenght] - -def display_doxygen_param(comment, input, output): - data = "Parameter" - if input == True: - data += " [input]" - if output == True: - data += " [output]" - data += ": " - #extract first element: - val = comment.find(" ") - var = comment[:val] - endComment = comment[val:] - # TODO : Check if it exist in the parameter list ... - data += "" + var + " " + endComment - - data += "
    " - return data - - -def parse_doxygen(data) : - pos = data.find("/*"); - if pos > 0: - data = data[pos:] - - if data[0] == '/' \ - and data[1] == '*' \ - and data[2] == '*': - data = data[3:len(data)-2] - data = data.replace("\n** ", "\n") - data = data.replace("\n**", "\n") - data = data.replace("\n* ", "\n") - data = data.replace("\n*", "\n") - data = data.replace("\n ** ", "\n") - data = data.replace("\n **", "\n") - data = data.replace("\n * ", "\n") - data = data.replace("\n *", "\n") - data = data.replace("\r", '') - streams = data.split("@") - data2 = '' - for element in streams: - if element[:1] == "\n" \ - or element[:2] == "\n\n": - # nothing to do : Nomale case of the first \n - None - elif element[:6] == "brief ": - data2 += element[6:] - data2 += "
    " - - for element in streams: - if element[:1] == "\n" \ - or element[:2] == "\n\n": - # nothing to do : Nomale case of the first \n - None - elif element[:5] == "note ": - data2 += "Notes: " - data2 += element[5:] - data2 += "
    " - - data3 = '' - for element in streams: - if element[:1] == "\n" \ - or element[:2] == "\n\n": - # nothing to do : Nomale case of the first \n - None - elif element[:14] == "param[in,out] " \ - or element[:14] == "param[out,in] ": - data3 += display_doxygen_param(element[14:], True, True) - elif element[:10] == "param[in] ": - data3 += display_doxygen_param(element[10:], True, False) - elif element[:11] == "param[out] ": - data3 += display_doxygen_param(element[11:], False, True) - elif element[:6] == "param ": - data3 += display_doxygen_param(element[6:], False, False) - elif element[:7] == "return ": - data3 += "Return: " - data3 += element[7:] - data3 += "
    " - if data3 != '': - data2 += "\n" - return data2 - -def white_space(size) : - ret = '' - for iii in range(len(ret), size): - ret += " " - return ret - -def display_reduct_function(function, file, classement, sizeReturn, sizefunction, myDoc) : - file.write(classement + " ") - lenght = len(classement)+1; - if function['destructor'] : - file.write(white_space(sizeReturn) + "~") - lenght += sizeReturn+1; - elif function['constructor'] : - file.write(white_space(sizeReturn+1)) - lenght += sizeReturn+1; - else : - typeData, typeLen = display_type(function["rtnType"], myDoc); - file.write(typeData) - file.write(white_space(sizeReturn+1 - typeLen)) - lenght += sizeReturn+1; - parameterPos = lenght + sizefunction+2; - file.write("" + function["name"] + "") - file.write(white_space(sizefunction+1 - len(function["name"]))) - file.write("(") - isFirst = True - for param in function["parameters"]: - if isFirst == False: - file.write(",
    ") - file.write(white_space(parameterPos)) - - typeData, typeLen = display_type(param["type"], myDoc); - file.write(typeData) - if param['name'] != "": - file.write(" ") - file.write("" + param['name'] + "") - isFirst = False - file.write(");") - file.write("
    ") - - -def displayFunction(namespace, function, file, classement, sizeReturn, sizefunction, myDoc) : - lineData = "" - if ( function['constructor'] == True \ - or function['destructor'] == True \ - or function['static'] == True ) \ - and namespace != "": - lineData = namespace + "::" - if function['destructor'] : - lineData += "~" - lineData +="" + function["name"] + " ()" - file.write("

    " + lineData + "

    \n\n") - - file.write("
    \n");
    -	if function['destructor'] :
    -		file.write("~")
    -		lenght = 1;
    -	elif function['constructor'] :
    -		lenght = 0;
    -	else :
    -		typeData, typeLen = display_type(function["rtnType"], myDoc);
    -		file.write(typeData + " ")
    -		lenght = typeLen+1;
    -	
    -	parameterPos = lenght + len(function["name"]) + 1;
    -	file.write("" + function["name"] + "(")
    -	isFirst = True
    -	for param in function["parameters"]:
    -		if isFirst == False:
    -			file.write(",\n")
    -			file.write(white_space(parameterPos))
    -		typeData, typeLen = display_type(param["type"], myDoc);
    -		file.write(typeData)
    -		if param['name'] != "":
    -			file.write(" ")
    -			file.write("" + param['name'] + "")
    -		isFirst = False
    -	file.write(");")
    -	file.write("
    \n"); - file.write("
    \n") - if "doxygen" in function: - # TODO : parse doxygen ... - file.write(parse_doxygen(function["doxygen"])) - file.write("
    \n") - - - - - -def calsulateSizeFunction(function, size) : - if len(function["name"]) > size: - return len(function["name"])+1 - return size - -def calsulateSizeReturn(function, size) : - if len(function["rtnType"]) > size: - return len(function["rtnType"])+1 - return size - -def class_name_to_file_name(className): - className = className.replace(":", "_") - className = className.replace(" ", "") - className += ".html" - return className -""" - -""" - -def addElement(elementList, tree): - if elementList[0] in tree.keys(): - tree[elementList[0]] == addElement(elementList[1:], tree[elementList[0]]) - else : - tree[elementList[0]] == elementList[0] - if len(elementList) != 1: - tree[elementList[0]] == addElement(elementList[1:], tree[elementList[0]]) - return tree - -def recursively_default_dict(): - return collections.defaultdict(recursively_default_dict) - -def createTree(list): - output = [] - myTree = recursively_default_dict() - #myTree['a']['b'] = 'c' - for className in sorted(list) : - list = className.split("::") - if len(list)==1: - myTree[list[0]] == className - elif len(list)==2: - myTree[list[0]][list[1]] == className - elif len(list)==3: - myTree[list[0]][list[1]][list[2]] == className - elif len(list)==4: - myTree[list[0]][list[1]][list[2]][list[3]] == className - elif len(list)==5: - myTree[list[0]][list[1]][list[2]][list[3]][list[4]] == className - else: - myTree[list[0]][list[1]][list[2]][list[3]][list[4]][list[5]] == className - - #output.append(className.split("::")) - #myTree = addElement(className.split("::"), myTree) - #debug.info("plop" + str(myTree)) - return myTree - -def addSub(tree, filterSubNamespace=False): - return "" - -def generate_menu(element, namespaceStack=[], level=1): - listBase = element.get_all_sub_type(['namespace']) - if len(listBase) == 0: - return "" - ret = "" - ret += '\n' - return ret - -def generate_html_page_name(element, namespaceStack): - link = "" - for name in namespaceStack: - link += name + "__" - return element.get_node_type() + "_" + link + element.get_name() + '.html' - -def generate_name(element, namespaceStack): - link = "" - for name in namespaceStack: - link += name + "::" - return element.get_node_type() + ": " + link + element.get_name() - - -def generate_link(element, namespaceStack): - return '' + element.get_name() + '' - -def generate_stupid_index_page(outFolder, header, footer, myLutinDoc): - # create index.hml : - filename = outFolder + "/index.html" - lutinTools.CreateDirectoryOfFile(filename); - file = open(filename, "w") - file.write(header) - file.write("

    " + myLutinDoc.get_base_doc_node().get_name() + "

    "); - file.write("
    "); - file.write("TODO : Main page ..."); - file.write("
    "); - file.write("
    "); - file.write(footer) - file.close(); - -def generate_page(outFolder, header, footer, element, namespaceStack=[]): - if element.get_node_type() in ['library', 'application', 'namespace', 'class', 'struct', 'enum', 'union']: - listBase = element.get_all_sub_type(['library', 'application', 'namespace', 'class', 'struct', 'enum', 'union']) - for elem in listBase: - if element.get_node_type() in ['namespace', 'class', 'struct']: - namespaceStack.append(element.get_name()) - generate_page(outFolder, header, footer, elem['node'], namespaceStack) - namespaceStack.pop() - else: - generate_page(outFolder, header, footer, elem['node'], namespaceStack) - - - filename = outFolder + '/' + generate_html_page_name(element, namespaceStack) - lutinTools.CreateDirectoryOfFile(filename); - file = open(filename, "w") - file.write(header) - file.write("

    " + generate_name(element, namespaceStack) + "

    "); - if element.get_node_type() == 'library': - file.write("TODO : the page ..."); - elif element.get_node_type() == 'application': - file.write("TODO : the page ..."); - elif element.get_node_type() == 'namespace': - file.write("TODO : the page ..."); - elif element.get_node_type() == 'class': - file.write("

    Constructor and Destructor:

    \n") - file.write("
    \n");
    -		file.write("
    \n"); - file.write("
    \n") - - file.write("

    Synopsis:

    \n") - file.write("
    \n");
    -		file.write("
    \n"); - file.write("
    \n") - - # display all functions : - file.write("

    Detail:

    \n") - - file.write("TODO : the page ..."); - elif element.get_node_type() == 'struct': - file.write("TODO : the page ..."); - elif element.get_node_type() == 'enum': - file.write("TODO : the page ..."); - elif element.get_node_type() == 'union': - file.write("TODO : the page ..."); - else: - # not in a specific file ... - debug.warning("might not appear here :'" + element.get_node_type() + "' = '" + element.get_name() + "'") - pass - file.write(footer) - file.close(); - - - - - -def generate(myLutinDoc, outFolder) : - myDoc = myLutinDoc.get_base_doc_node() - lutinTools.CopyFile(lutinTools.GetCurrentPath(__file__)+"/theme/base.css", outFolder+"/base.css") - lutinTools.CopyFile(lutinTools.GetCurrentPath(__file__)+"/theme/menu.css", outFolder+"/menu.css") - # create common header - genericHeader = '\n' - genericHeader += '\n' - genericHeader += '\n' - genericHeader += ' \n' - genericHeader += ' ' + myDoc.get_name() + ' Library\n' - genericHeader += ' \n' - genericHeader += ' \n' - genericHeader += '\n' - genericHeader += '\n' - genericHeader += ' \n" - genericHeader += "
    \n" - - genericFooter = "
    \n" - genericFooter += "\n" - genericFooter += "\n" - - # create index.hml : - generate_stupid_index_page(outFolder, genericHeader, genericFooter, myLutinDoc) - - # create the namespace index properties : - generate_page(outFolder, genericHeader, genericFooter, myDoc) - - """ - for className in sorted(myDoc.listClass.iterkeys()) : - localClass = myDoc.listClass[className] - debug.debug(" class: " + className) - classFileName = outFolder + "/" + class_name_to_file_name(className) - # create directory (in case) - lutinTools.CreateDirectoryOfFile(classFileName); - debug.printElement("doc", myDoc.moduleName, "<==", className) - # open the file : - file = open(classFileName, "w") - - file.write(genericHeader) - - file.write("

    Class: " + className + "

    \n") - file.write("
    \n") - # calculate function max size return & function name size: - sizeReturn=0 - sizefunction=0 - for function in localClass["methods"]["public"]: - sizefunction = calsulateSizeFunction(function, sizefunction) - sizeReturn = calsulateSizeReturn(function, sizeReturn) - for function in localClass["methods"]["protected"]: - sizefunction = calsulateSizeFunction(function, sizefunction) - sizeReturn = calsulateSizeReturn(function, sizeReturn) - for function in localClass["methods"]["private"]: - sizefunction = calsulateSizeFunction(function, sizefunction) - sizeReturn = calsulateSizeReturn(function, sizeReturn) - - file.write("

    Constructor and Destructor:

    \n") - file.write("
    \n");
    -		for function in localClass["methods"]["public"]:
    -			if    function['destructor'] \
    -			   or function['constructor'] :
    -				display_reduct_function(function, file, "+ ", sizeReturn, sizefunction, myDoc)
    -		for function in localClass["methods"]["protected"]:
    -			if    function['destructor'] \
    -			   or function['constructor'] :
    -				display_reduct_function(function, file, "# ", sizeReturn, sizefunction, myDoc)
    -		for function in localClass["methods"]["private"]:
    -			if    function['destructor'] \
    -			   or function['constructor'] :
    -				display_reduct_function(function, file, "- ", sizeReturn, sizefunction, myDoc)
    -		
    -		file.write("
    \n"); - - file.write("

    Synopsis:

    \n") - # display all functions : - # TODO: ... - file.write("
    \n");
    -		for function in localClass["methods"]["public"]:
    -			if     not function['destructor'] \
    -			   and not function['constructor'] :
    -				display_reduct_function(function, file, "+ ", sizeReturn, sizefunction, myDoc)
    -		for function in localClass["methods"]["protected"]:
    -			if     not function['destructor'] \
    -			   and not function['constructor'] :
    -				display_reduct_function(function, file, "# ", sizeReturn, sizefunction, myDoc)
    -		for function in localClass["methods"]["private"]:
    -			if     not function['destructor'] \
    -			   and not function['constructor'] :
    -				display_reduct_function(function, file, "- ", sizeReturn, sizefunction, myDoc)
    -		file.write("
    \n"); - file.write("\n") - file.write("\n") - - - heritage = myDoc.get_heritage_list(className) - heritageDown = myDoc.get_down_heritage_list(className) - if len(heritage) > 1 \ - or len(heritageDown) > 0: - file.write("

    Object Hierarchy:

    \n") - file.write("
    \n")
    -			level = 0;
    -			for heritedClass in heritage:
    -				if level != 0:
    -					file.write(white_space(level*4) + "+--> ")
    -				if heritedClass != className:
    -					name, link = myDoc.get_class_link(heritedClass)
    -					file.write("" + name + "\n")
    -				else:
    -					file.write("" + heritedClass + "\n")
    -				level += 1;
    -			for heritedClass in heritageDown:
    -				file.write(white_space(level*4) + "+--> ")
    -				name, link = myDoc.get_class_link(heritedClass)
    -				file.write("" + name + "\n")
    -			file.write("
    \n") - file.write("
    \n") - " "" - file.write("

    Signals:

    \n") - # display all signals : - # TODO: ... - - file.write("

    Configuration:

    \n") - # display all configuration : - # TODO: ... - " "" - - if "doxygen" in localClass: - file.write("

    Description:

    \n") - # display Class description : - file.write(localClass["doxygen"]) - - - file.write("

    Detail:

    \n") - # display all the class internal functions : - for function in localClass["methods"]["public"]: - displayFunction(localClass['namespace'] , function, file, "+ ", sizeReturn, sizefunction, myDoc) - file.write("\n
    \n") - for function in localClass["methods"]["protected"]: - displayFunction(localClass['namespace'] , function, file, "# ", sizeReturn, sizefunction, myDoc) - file.write("\n
    \n") - for function in localClass["methods"]["private"]: - displayFunction(localClass['namespace'] , function, file, "- ", sizeReturn, sizefunction, myDoc) - file.write("\n
    \n") - - file.write(genericFooter) - - file.close() - - for enumName in sorted(myDoc.listEnum.iterkeys()) : - localEnum = myDoc.listEnum[enumName] - debug.debug(" enum: " + enumName) - fileName = outFolder + "/" + class_name_to_file_name(enumName) - # create directory (in case) - lutinTools.CreateDirectoryOfFile(fileName); - debug.printElement("doc", myDoc.moduleName, "<==", enumName) - # open the file : - file = open(fileName, "w") - - file.write(genericHeader) - - file.write("

    Enum: " + enumName + "

    \n") - file.write("
    \n") - file.write("Value :
    \n") - file.write("\n") - - file.write(genericFooter) - - file.close() - """ - - for docInputName,outpath in myLutinDoc.listDocFile : - debug.printElement("doc", myLutinDoc.moduleName, "<==", docInputName) - outputFileName = outFolder + "/" + outpath.replace('/','_') +".html" - debug.debug("output file : " + outputFileName) - lutinTools.CreateDirectoryOfFile(outputFileName) - inData = lutinTools.FileReadData(docInputName) - if inData == "": - continue - outData = genericHeader + codeBB.transcode(inData) + genericFooter - lutinTools.FileWriteData(outputFileName, outData) - - for docInputName,outpath in myLutinDoc.listTutorialFile : - debug.printElement("tutorial", myLutinDoc.moduleName, "<==", docInputName) - outputFileName = outFolder + "/" + outpath+".html" - debug.debug("output file : " + outputFileName) - lutinTools.CreateDirectoryOfFile(outputFileName) - inData = lutinTools.FileReadData(docInputName) - if inData == "": - continue - outData = genericHeader + codeBB.transcode(inData) + genericFooter - lutinTools.FileWriteData(outputFileName, outData) - - - diff --git a/lutinDocMd.py b/lutinDocMd.py deleted file mode 100644 index 0f80178..0000000 --- a/lutinDocMd.py +++ /dev/null @@ -1,278 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -#import CppHeaderParser - -def parse_doxygen(data) : - pos = data.find("/*"); - if pos > 0: - data = data[pos:] - - if data[0] == '/' \ - and data[1] == '*' \ - and data[2] == '*': - data = data[3:len(data)-2] - data = data.replace("\n** ", "\n") - data = data.replace("\n**", "\n") - data = data.replace("\n* ", "\n") - data = data.replace("\n*", "\n") - data = data.replace("\n ** ", "\n") - data = data.replace("\n **", "\n") - data = data.replace("\n * ", "\n") - data = data.replace("\n *", "\n") - data = data.replace("\r", "") - streams = data.split("@") - data2 = "" - for element in streams: - data2 += "\n" - if element[:1] == "\n" \ - or element[:2] == "\n\n": - # nothing to do : Nomale case of the first \n - None - elif element[:6] == "brief ": - data2 += element[6:] - data2 += "\n" - elif element[:5] == "note ": - data2 += "\t" - data2 += "**Notes:** " - data2 += element[5:] - elif element[:14] == "param[in,out] " \ - or element[:14] == "param[out,in] ": - data2 += "\t" - data2 += "**Parameter [input] [output]:** " - data2 += element[14:] - elif element[:10] == "param[in] ": - data2 += "\t" - data2 += "**Parameter [input]:** " - data2 += element[10:] - elif element[:11] == "param[out] ": - data2 += "\t" - data2 += "**Parameter [output]:** " - data2 += element[11:] - elif element[:6] == "param ": - data2 += "\t" - data2 += "**Parameter:** " - data2 += element[6:] - elif element[:7] == "return ": - data2 += "\t" - data2 += "**Return:** " - data2 += element[7:] - else: - data2 += "unknow : '" + element + "'" - return data2 - - - -def writeExpendSize(data, size) : - ret = data - for iii in range(len(ret), size): - ret += " " - return ret - -def displayReductFunction(function, file, classement, sizeReturn, sizefunction) : - lineData = classement + " " - - if function['destructor'] : - lineData += writeExpendSize("", sizeReturn) - lineData += "~" - elif function['constructor'] : - lineData += writeExpendSize("", sizeReturn+1) - else : - lineData += writeExpendSize(function["rtnType"], sizeReturn+1) - - lineData += writeExpendSize(function["name"], sizefunction+1) - lineData += "(" - file.write("\t" + lineData); - parameterPos = len(lineData); - isFirst = True - for param in function["parameters"]: - if isFirst == False: - file.write(",\n\t") - file.write(writeExpendSize("",parameterPos)) - file.write(param['type']) - if param['name'] != "": - file.write(" ") - file.write(param['name']) - isFirst = False - file.write(");") - file.write("\n") - - -def displayFunction(namespace, function, file, classement, sizeReturn, sizefunction) : - lineData = "" - if ( function['constructor'] == True \ - or function['destructor'] == True \ - or function['static'] == True ) \ - and namespace != "": - lineData = namespace + "::" - if function['destructor'] : - lineData += "~" - lineData += function["name"] + "( ... )" - file.write("### " + lineData + "\n\n") - - if function['destructor'] : - lineData = "~" - elif function['constructor'] : - lineData = "" - else : - lineData = function["rtnType"] + " " - - lineData += function["name"] - lineData += "(" - file.write("\t" + lineData); - parameterPos = len(lineData); - isFirst = True - for param in function["parameters"]: - if isFirst == False: - file.write(",\n\t") - file.write(writeExpendSize("",parameterPos)) - file.write(param['type']) - if param['name'] != "": - file.write(" ") - file.write(param['name']) - isFirst = False - file.write(");") - file.write("\n\n") - if "doxygen" in function: - # TODO : parse doxygen ... - file.write(parse_doxygen(function["doxygen"])) - file.write("\n") - file.write("\n") - - - - - -def calsulateSizeFunction(function, size) : - if len(function["name"]) > size: - return len(function["name"])+1 - return size - -def calsulateSizeReturn(function, size) : - if len(function["rtnType"]) > size: - return len(function["rtnType"])+1 - return size - -def GenerateDocFile(filename, outFolder) : - try: - metaData = CppHeaderParser.CppHeader(filename) - except CppHeaderParser.CppParseError, e: - debug.error(" can not parse the file: '" + filename + "' error : " + e) - return False - - lutinTools.CreateDirectoryOfFile(outFolder+"/"); - - for element in metaData.classes: - classFileName = outFolder + "/" - localClass = metaData.classes[element] - if localClass['namespace'] == "": - className = localClass['name'] - else: - className = localClass['namespace'] + "::" + localClass['name'] - debug.debug(" class: " + className) - classFileName += className - # Replace all :: with __ - classFileName = classFileName.replace(":", "_") - classFileName = classFileName.replace(" ", "") - classFileName += ".md" - file = open(classFileName, "w") - - file.write(className + "\n") - for iii in range(0,len(className)): - file.write("="); - file.write("\n") - file.write("\n") - # calculate function max size return & function name size: - sizeReturn=0 - sizefunction=0 - for function in localClass["methods"]["public"]: - sizefunction = calsulateSizeFunction(function, sizefunction) - sizeReturn = calsulateSizeReturn(function, sizeReturn) - for function in localClass["methods"]["protected"]: - sizefunction = calsulateSizeFunction(function, sizefunction) - sizeReturn = calsulateSizeReturn(function, sizeReturn) - for function in localClass["methods"]["private"]: - sizefunction = calsulateSizeFunction(function, sizefunction) - sizeReturn = calsulateSizeReturn(function, sizeReturn) - - file.write("Synopsis:\n") - file.write("---------\n") - file.write("\n") - # display all functions : - # TODO: ... - for function in localClass["methods"]["public"]: - displayReductFunction(function, file, "public: ", sizeReturn, sizefunction) - for function in localClass["methods"]["protected"]: - displayReductFunction(function, file, "protected:", sizeReturn, sizefunction) - for function in localClass["methods"]["private"]: - displayReductFunction(function, file, "private: ", sizeReturn, sizefunction) - - file.write("\n") - file.write("\n") - - - if len(localClass['inherits']) != 0: - file.write("Object Hierarchy:\n") - file.write("-----------------\n") - file.write("\n") - for heritedClass in localClass['inherits']: - file.write("\t" + heritedClass['class'] + "\n") - file.write("\t |\n") - file.write("\t +--> " + localClass['name'] + "\n") - file.write("\n") - file.write("\n") - - - """ - file.write("Signals:\n") - file.write("--------\n") - file.write("\n") - # display all signals : - # TODO: ... - - file.write("Configuration:\n") - file.write("--------------\n") - file.write("\n") - # display all configuration : - # TODO: ... - """ - - if "doxygen" in localClass: - file.write("Description:\n") - file.write("------------\n") - file.write("\n") - # display Class description : - file.write(localClass["doxygen"]) - file.write("\n") - file.write("\n") - - - file.write("Detail:\n") - file.write("-------\n") - file.write("\n") - # display all the class internal functions : - for function in localClass["methods"]["public"]: - displayFunction(localClass['namespace'] , function, file, "public: ", sizeReturn, sizefunction) - file.write("\n________________________________________________________________________\n\n") - for function in localClass["methods"]["protected"]: - displayFunction(localClass['namespace'] , function, file, "protected:", sizeReturn, sizefunction) - file.write("\n________________________________________________________________________\n\n") - for function in localClass["methods"]["private"]: - displayFunction(localClass['namespace'] , function, file, "private: ", sizeReturn, sizefunction) - file.write("\n________________________________________________________________________\n\n") - - - - if len(localClass['inherits']) != 0: - for heritedClass in localClass['inherits']: - debug.debug(" heritage : " + str(heritedClass['class'])) - - - file.close() - - -""" - -""" - diff --git a/lutinModule.py b/lutinModule.py index 71c760b..55e7d33 100644 --- a/lutinModule.py +++ b/lutinModule.py @@ -11,7 +11,6 @@ import lutinHeritage as heritage import lutinDepend as dependency import lutinMultiprocess import lutinEnv -import lutinDoc class module: @@ -375,47 +374,6 @@ class module: debug.verbose("Might copy folder : " + element[0] + "==>" + element[1]) lutinTools.CopyAnythingTarget(target, self.originFolder+"/"+element[0],element[1]) - ## - ## @brief Set the documentation availlable for this module - ## - def doc_enable(self): - self.documentation = lutinDoc.doc(self.name) - self.documentation.set_path(lutinTools.GetCurrentPath(self.originFile)) - - ## - ## @brief Create the module documentation: - ## @param[in,out] target target that request generation of the documentation - ## - def doc_parse_code(self, target): - if self.documentation == None: - return False - self.documentation.doc_parse_code() - return True - - ## - ## @brief Generate real documentation files - ## @param[in,out] target target that request generation of the documentation - ## - def doc_generate(self, target): - if self.documentation == None: - return False - # Real creation of the documentation : - lutinTools.RemoveFolderAndSubFolder(target.GetDocFolder(self.name)); - self.documentation.generate_documantation(target, target.GetDocFolder(self.name)) - return True - - - ## - ## @brief Get link on a class or an enum in all the subclasses - ## @param[in] name of the class - ## @return [real element name, link on it] - ## - def doc_get_link(self, target, elementName): - if self.documentation == None: - return [elementName, ""] - return self.documentation.get_class_link_from_target(elementName, target); - - # call here to build the module def Build(self, target, packageName): # ckeck if not previously build diff --git a/lutinTarget.py b/lutinTarget.py index 734d3da..7be6e28 100644 --- a/lutinTarget.py +++ b/lutinTarget.py @@ -67,7 +67,6 @@ class Target: self.folder_final="/final/" + typeCompilator self.folder_staging="/staging/" + typeCompilator self.folder_build="/build/" + typeCompilator - self.folder_doc="/doc/" self.folder_bin="/usr/bin" self.folder_lib="/usr/lib" self.folder_data="/usr/share" @@ -210,15 +209,7 @@ class Target: self.LoadIfNeeded(modName) def Build(self, name, packagesName=None): - if name == "doc": - debug.info("Documentation for all") - self.LoadAll() - print 'Doc all modules' - for mod in self.moduleList: - mod.doc_parse_code(self) - for mod in self.moduleList: - mod.doc_generate(self) - elif name == "dump": + if name == "dump": debug.info("dump all") self.LoadAll() print 'Dump all modules properties' @@ -270,25 +261,8 @@ class Target: elif actionName == "build": debug.debug("build module '" + moduleName + "'") return mod.Build(self, None) - elif actionName == "doc": - debug.debug("Create doc module '" + moduleName + "'") - if mod.doc_parse_code(self) == False: - return False - return mod.doc_generate(self) debug.error("not know module name : '" + moduleName + "' to '" + actionName + "' it") - ## - ## @brief Get link on a class or an enum in all the subclasses - ## @param[in] name of the class - ## @return [real element name, link on it] - ## - def doc_get_link(self, elementName): - for mod in self.moduleList: - elementRealName, link = mod.doc_get_link(self, elementName) - if len(link) != 0: - debug.verbose("find the element : " + elementName + " ==> " + link) - return [elementRealName, link] - return [elementName, ""] __startTargetName="lutinTarget" diff --git a/ply/ANNOUNCE b/ply/ANNOUNCE deleted file mode 100644 index bdc1c10..0000000 --- a/ply/ANNOUNCE +++ /dev/null @@ -1,40 +0,0 @@ -February 17, 2011 - - Announcing : PLY-3.4 (Python Lex-Yacc) - - http://www.dabeaz.com/ply - -I'm pleased to announce PLY-3.4--a pure Python implementation of the -common parsing tools lex and yacc. PLY-3.4 is a minor bug fix -release. It supports both Python 2 and Python 3. - -If you are new to PLY, here are a few highlights: - -- PLY is closely modeled after traditional lex/yacc. If you know how - to use these or similar tools in other languages, you will find - PLY to be comparable. - -- PLY provides very extensive error reporting and diagnostic - information to assist in parser construction. The original - implementation was developed for instructional purposes. As - a result, the system tries to identify the most common types - of errors made by novice users. - -- PLY provides full support for empty productions, error recovery, - precedence rules, and ambiguous grammars. - -- Parsing is based on LR-parsing which is fast, memory efficient, - better suited to large grammars, and which has a number of nice - properties when dealing with syntax errors and other parsing - problems. Currently, PLY can build its parsing tables using - either SLR or LALR(1) algorithms. - -More information about PLY can be obtained on the PLY webpage at: - - http://www.dabeaz.com/ply - -PLY is freely available. - -Cheers, - -David Beazley (http://www.dabeaz.com) \ No newline at end of file diff --git a/ply/CHANGES b/ply/CHANGES deleted file mode 100644 index 34bf50f..0000000 --- a/ply/CHANGES +++ /dev/null @@ -1,1093 +0,0 @@ -Version 3.4 ---------------------- -02/17/11: beazley - Minor patch to make cpp.py compatible with Python 3. Note: This - is an experimental file not currently used by the rest of PLY. - -02/17/11: beazley - Fixed setup.py trove classifiers to properly list PLY as - Python 3 compatible. - -01/02/11: beazley - Migration of repository to github. - -Version 3.3 ------------------------------ -08/25/09: beazley - Fixed issue 15 related to the set_lineno() method in yacc. Reported by - mdsherry. - -08/25/09: beazley - Fixed a bug related to regular expression compilation flags not being - properly stored in lextab.py files created by the lexer when running - in optimize mode. Reported by Bruce Frederiksen. - - -Version 3.2 ------------------------------ -03/24/09: beazley - Added an extra check to not print duplicated warning messages - about reduce/reduce conflicts. - -03/24/09: beazley - Switched PLY over to a BSD-license. - -03/23/09: beazley - Performance optimization. Discovered a few places to make - speedups in LR table generation. - -03/23/09: beazley - New warning message. PLY now warns about rules never - reduced due to reduce/reduce conflicts. Suggested by - Bruce Frederiksen. - -03/23/09: beazley - Some clean-up of warning messages related to reduce/reduce errors. - -03/23/09: beazley - Added a new picklefile option to yacc() to write the parsing - tables to a filename using the pickle module. Here is how - it works: - - yacc(picklefile="parsetab.p") - - This option can be used if the normal parsetab.py file is - extremely large. For example, on jython, it is impossible - to read parsing tables if the parsetab.py exceeds a certain - threshold. - - The filename supplied to the picklefile option is opened - relative to the current working directory of the Python - interpreter. If you need to refer to the file elsewhere, - you will need to supply an absolute or relative path. - - For maximum portability, the pickle file is written - using protocol 0. - -03/13/09: beazley - Fixed a bug in parser.out generation where the rule numbers - where off by one. - -03/13/09: beazley - Fixed a string formatting bug with one of the error messages. - Reported by Richard Reitmeyer - -Version 3.1 ------------------------------ -02/28/09: beazley - Fixed broken start argument to yacc(). PLY-3.0 broke this - feature by accident. - -02/28/09: beazley - Fixed debugging output. yacc() no longer reports shift/reduce - or reduce/reduce conflicts if debugging is turned off. This - restores similar behavior in PLY-2.5. Reported by Andrew Waters. - -Version 3.0 ------------------------------ -02/03/09: beazley - Fixed missing lexer attribute on certain tokens when - invoking the parser p_error() function. Reported by - Bart Whiteley. - -02/02/09: beazley - The lex() command now does all error-reporting and diagonistics - using the logging module interface. Pass in a Logger object - using the errorlog parameter to specify a different logger. - -02/02/09: beazley - Refactored ply.lex to use a more object-oriented and organized - approach to collecting lexer information. - -02/01/09: beazley - Removed the nowarn option from lex(). All output is controlled - by passing in a logger object. Just pass in a logger with a high - level setting to suppress output. This argument was never - documented to begin with so hopefully no one was relying upon it. - -02/01/09: beazley - Discovered and removed a dead if-statement in the lexer. This - resulted in a 6-7% speedup in lexing when I tested it. - -01/13/09: beazley - Minor change to the procedure for signalling a syntax error in a - production rule. A normal SyntaxError exception should be raised - instead of yacc.SyntaxError. - -01/13/09: beazley - Added a new method p.set_lineno(n,lineno) that can be used to set the - line number of symbol n in grammar rules. This simplifies manual - tracking of line numbers. - -01/11/09: beazley - Vastly improved debugging support for yacc.parse(). Instead of passing - debug as an integer, you can supply a Logging object (see the logging - module). Messages will be generated at the ERROR, INFO, and DEBUG - logging levels, each level providing progressively more information. - The debugging trace also shows states, grammar rule, values passed - into grammar rules, and the result of each reduction. - -01/09/09: beazley - The yacc() command now does all error-reporting and diagnostics using - the interface of the logging module. Use the errorlog parameter to - specify a logging object for error messages. Use the debuglog parameter - to specify a logging object for the 'parser.out' output. - -01/09/09: beazley - *HUGE* refactoring of the the ply.yacc() implementation. The high-level - user interface is backwards compatible, but the internals are completely - reorganized into classes. No more global variables. The internals - are also more extensible. For example, you can use the classes to - construct a LALR(1) parser in an entirely different manner than - what is currently the case. Documentation is forthcoming. - -01/07/09: beazley - Various cleanup and refactoring of yacc internals. - -01/06/09: beazley - Fixed a bug with precedence assignment. yacc was assigning the precedence - each rule based on the left-most token, when in fact, it should have been - using the right-most token. Reported by Bruce Frederiksen. - -11/27/08: beazley - Numerous changes to support Python 3.0 including removal of deprecated - statements (e.g., has_key) and the additional of compatibility code - to emulate features from Python 2 that have been removed, but which - are needed. Fixed the unit testing suite to work with Python 3.0. - The code should be backwards compatible with Python 2. - -11/26/08: beazley - Loosened the rules on what kind of objects can be passed in as the - "module" parameter to lex() and yacc(). Previously, you could only use - a module or an instance. Now, PLY just uses dir() to get a list of - symbols on whatever the object is without regard for its type. - -11/26/08: beazley - Changed all except: statements to be compatible with Python2.x/3.x syntax. - -11/26/08: beazley - Changed all raise Exception, value statements to raise Exception(value) for - forward compatibility. - -11/26/08: beazley - Removed all print statements from lex and yacc, using sys.stdout and sys.stderr - directly. Preparation for Python 3.0 support. - -11/04/08: beazley - Fixed a bug with referring to symbols on the the parsing stack using negative - indices. - -05/29/08: beazley - Completely revamped the testing system to use the unittest module for everything. - Added additional tests to cover new errors/warnings. - -Version 2.5 ------------------------------ -05/28/08: beazley - Fixed a bug with writing lex-tables in optimized mode and start states. - Reported by Kevin Henry. - -Version 2.4 ------------------------------ -05/04/08: beazley - A version number is now embedded in the table file signature so that - yacc can more gracefully accomodate changes to the output format - in the future. - -05/04/08: beazley - Removed undocumented .pushback() method on grammar productions. I'm - not sure this ever worked and can't recall ever using it. Might have - been an abandoned idea that never really got fleshed out. This - feature was never described or tested so removing it is hopefully - harmless. - -05/04/08: beazley - Added extra error checking to yacc() to detect precedence rules defined - for undefined terminal symbols. This allows yacc() to detect a potential - problem that can be really tricky to debug if no warning message or error - message is generated about it. - -05/04/08: beazley - lex() now has an outputdir that can specify the output directory for - tables when running in optimize mode. For example: - - lexer = lex.lex(optimize=True, lextab="ltab", outputdir="foo/bar") - - The behavior of specifying a table module and output directory are - more aligned with the behavior of yacc(). - -05/04/08: beazley - [Issue 9] - Fixed filename bug in when specifying the modulename in lex() and yacc(). - If you specified options such as the following: - - parser = yacc.yacc(tabmodule="foo.bar.parsetab",outputdir="foo/bar") - - yacc would create a file "foo.bar.parsetab.py" in the given directory. - Now, it simply generates a file "parsetab.py" in that directory. - Bug reported by cptbinho. - -05/04/08: beazley - Slight modification to lex() and yacc() to allow their table files - to be loaded from a previously loaded module. This might make - it easier to load the parsing tables from a complicated package - structure. For example: - - import foo.bar.spam.parsetab as parsetab - parser = yacc.yacc(tabmodule=parsetab) - - Note: lex and yacc will never regenerate the table file if used - in the form---you will get a warning message instead. - This idea suggested by Brian Clapper. - - -04/28/08: beazley - Fixed a big with p_error() functions being picked up correctly - when running in yacc(optimize=1) mode. Patch contributed by - Bart Whiteley. - -02/28/08: beazley - Fixed a bug with 'nonassoc' precedence rules. Basically the - non-precedence was being ignored and not producing the correct - run-time behavior in the parser. - -02/16/08: beazley - Slight relaxation of what the input() method to a lexer will - accept as a string. Instead of testing the input to see - if the input is a string or unicode string, it checks to see - if the input object looks like it contains string data. - This change makes it possible to pass string-like objects - in as input. For example, the object returned by mmap. - - import mmap, os - data = mmap.mmap(os.open(filename,os.O_RDONLY), - os.path.getsize(filename), - access=mmap.ACCESS_READ) - lexer.input(data) - - -11/29/07: beazley - Modification of ply.lex to allow token functions to aliased. - This is subtle, but it makes it easier to create libraries and - to reuse token specifications. For example, suppose you defined - a function like this: - - def number(t): - r'\d+' - t.value = int(t.value) - return t - - This change would allow you to define a token rule as follows: - - t_NUMBER = number - - In this case, the token type will be set to 'NUMBER' and use - the associated number() function to process tokens. - -11/28/07: beazley - Slight modification to lex and yacc to grab symbols from both - the local and global dictionaries of the caller. This - modification allows lexers and parsers to be defined using - inner functions and closures. - -11/28/07: beazley - Performance optimization: The lexer.lexmatch and t.lexer - attributes are no longer set for lexer tokens that are not - defined by functions. The only normal use of these attributes - would be in lexer rules that need to perform some kind of - special processing. Thus, it doesn't make any sense to set - them on every token. - - *** POTENTIAL INCOMPATIBILITY *** This might break code - that is mucking around with internal lexer state in some - sort of magical way. - -11/27/07: beazley - Added the ability to put the parser into error-handling mode - from within a normal production. To do this, simply raise - a yacc.SyntaxError exception like this: - - def p_some_production(p): - 'some_production : prod1 prod2' - ... - raise yacc.SyntaxError # Signal an error - - A number of things happen after this occurs: - - - The last symbol shifted onto the symbol stack is discarded - and parser state backed up to what it was before the - the rule reduction. - - - The current lookahead symbol is saved and replaced by - the 'error' symbol. - - - The parser enters error recovery mode where it tries - to either reduce the 'error' rule or it starts - discarding items off of the stack until the parser - resets. - - When an error is manually set, the parser does *not* call - the p_error() function (if any is defined). - *** NEW FEATURE *** Suggested on the mailing list - -11/27/07: beazley - Fixed structure bug in examples/ansic. Reported by Dion Blazakis. - -11/27/07: beazley - Fixed a bug in the lexer related to start conditions and ignored - token rules. If a rule was defined that changed state, but - returned no token, the lexer could be left in an inconsistent - state. Reported by - -11/27/07: beazley - Modified setup.py to support Python Eggs. Patch contributed by - Simon Cross. - -11/09/07: beazely - Fixed a bug in error handling in yacc. If a syntax error occurred and the - parser rolled the entire parse stack back, the parser would be left in in - inconsistent state that would cause it to trigger incorrect actions on - subsequent input. Reported by Ton Biegstraaten, Justin King, and others. - -11/09/07: beazley - Fixed a bug when passing empty input strings to yacc.parse(). This - would result in an error message about "No input given". Reported - by Andrew Dalke. - -Version 2.3 ------------------------------ -02/20/07: beazley - Fixed a bug with character literals if the literal '.' appeared as the - last symbol of a grammar rule. Reported by Ales Smrcka. - -02/19/07: beazley - Warning messages are now redirected to stderr instead of being printed - to standard output. - -02/19/07: beazley - Added a warning message to lex.py if it detects a literal backslash - character inside the t_ignore declaration. This is to help - problems that might occur if someone accidentally defines t_ignore - as a Python raw string. For example: - - t_ignore = r' \t' - - The idea for this is from an email I received from David Cimimi who - reported bizarre behavior in lexing as a result of defining t_ignore - as a raw string by accident. - -02/18/07: beazley - Performance improvements. Made some changes to the internal - table organization and LR parser to improve parsing performance. - -02/18/07: beazley - Automatic tracking of line number and position information must now be - enabled by a special flag to parse(). For example: - - yacc.parse(data,tracking=True) - - In many applications, it's just not that important to have the - parser automatically track all line numbers. By making this an - optional feature, it allows the parser to run significantly faster - (more than a 20% speed increase in many cases). Note: positional - information is always available for raw tokens---this change only - applies to positional information associated with nonterminal - grammar symbols. - *** POTENTIAL INCOMPATIBILITY *** - -02/18/07: beazley - Yacc no longer supports extended slices of grammar productions. - However, it does support regular slices. For example: - - def p_foo(p): - '''foo: a b c d e''' - p[0] = p[1:3] - - This change is a performance improvement to the parser--it streamlines - normal access to the grammar values since slices are now handled in - a __getslice__() method as opposed to __getitem__(). - -02/12/07: beazley - Fixed a bug in the handling of token names when combined with - start conditions. Bug reported by Todd O'Bryan. - -Version 2.2 ------------------------------- -11/01/06: beazley - Added lexpos() and lexspan() methods to grammar symbols. These - mirror the same functionality of lineno() and linespan(). For - example: - - def p_expr(p): - 'expr : expr PLUS expr' - p.lexpos(1) # Lexing position of left-hand-expression - p.lexpos(1) # Lexing position of PLUS - start,end = p.lexspan(3) # Lexing range of right hand expression - -11/01/06: beazley - Minor change to error handling. The recommended way to skip characters - in the input is to use t.lexer.skip() as shown here: - - def t_error(t): - print "Illegal character '%s'" % t.value[0] - t.lexer.skip(1) - - The old approach of just using t.skip(1) will still work, but won't - be documented. - -10/31/06: beazley - Discarded tokens can now be specified as simple strings instead of - functions. To do this, simply include the text "ignore_" in the - token declaration. For example: - - t_ignore_cppcomment = r'//.*' - - Previously, this had to be done with a function. For example: - - def t_ignore_cppcomment(t): - r'//.*' - pass - - If start conditions/states are being used, state names should appear - before the "ignore_" text. - -10/19/06: beazley - The Lex module now provides support for flex-style start conditions - as described at http://www.gnu.org/software/flex/manual/html_chapter/flex_11.html. - Please refer to this document to understand this change note. Refer to - the PLY documentation for PLY-specific explanation of how this works. - - To use start conditions, you first need to declare a set of states in - your lexer file: - - states = ( - ('foo','exclusive'), - ('bar','inclusive') - ) - - This serves the same role as the %s and %x specifiers in flex. - - One a state has been declared, tokens for that state can be - declared by defining rules of the form t_state_TOK. For example: - - t_PLUS = '\+' # Rule defined in INITIAL state - t_foo_NUM = '\d+' # Rule defined in foo state - t_bar_NUM = '\d+' # Rule defined in bar state - - t_foo_bar_NUM = '\d+' # Rule defined in both foo and bar - t_ANY_NUM = '\d+' # Rule defined in all states - - In addition to defining tokens for each state, the t_ignore and t_error - specifications can be customized for specific states. For example: - - t_foo_ignore = " " # Ignored characters for foo state - def t_bar_error(t): - # Handle errors in bar state - - With token rules, the following methods can be used to change states - - def t_TOKNAME(t): - t.lexer.begin('foo') # Begin state 'foo' - t.lexer.push_state('foo') # Begin state 'foo', push old state - # onto a stack - t.lexer.pop_state() # Restore previous state - t.lexer.current_state() # Returns name of current state - - These methods mirror the BEGIN(), yy_push_state(), yy_pop_state(), and - yy_top_state() functions in flex. - - The use of start states can be used as one way to write sub-lexers. - For example, the lexer or parser might instruct the lexer to start - generating a different set of tokens depending on the context. - - example/yply/ylex.py shows the use of start states to grab C/C++ - code fragments out of traditional yacc specification files. - - *** NEW FEATURE *** Suggested by Daniel Larraz with whom I also - discussed various aspects of the design. - -10/19/06: beazley - Minor change to the way in which yacc.py was reporting shift/reduce - conflicts. Although the underlying LALR(1) algorithm was correct, - PLY was under-reporting the number of conflicts compared to yacc/bison - when precedence rules were in effect. This change should make PLY - report the same number of conflicts as yacc. - -10/19/06: beazley - Modified yacc so that grammar rules could also include the '-' - character. For example: - - def p_expr_list(p): - 'expression-list : expression-list expression' - - Suggested by Oldrich Jedlicka. - -10/18/06: beazley - Attribute lexer.lexmatch added so that token rules can access the re - match object that was generated. For example: - - def t_FOO(t): - r'some regex' - m = t.lexer.lexmatch - # Do something with m - - - This may be useful if you want to access named groups specified within - the regex for a specific token. Suggested by Oldrich Jedlicka. - -10/16/06: beazley - Changed the error message that results if an illegal character - is encountered and no default error function is defined in lex. - The exception is now more informative about the actual cause of - the error. - -Version 2.1 ------------------------------- -10/02/06: beazley - The last Lexer object built by lex() can be found in lex.lexer. - The last Parser object built by yacc() can be found in yacc.parser. - -10/02/06: beazley - New example added: examples/yply - - This example uses PLY to convert Unix-yacc specification files to - PLY programs with the same grammar. This may be useful if you - want to convert a grammar from bison/yacc to use with PLY. - -10/02/06: beazley - Added support for a start symbol to be specified in the yacc - input file itself. Just do this: - - start = 'name' - - where 'name' matches some grammar rule. For example: - - def p_name(p): - 'name : A B C' - ... - - This mirrors the functionality of the yacc %start specifier. - -09/30/06: beazley - Some new examples added.: - - examples/GardenSnake : A simple indentation based language similar - to Python. Shows how you might handle - whitespace. Contributed by Andrew Dalke. - - examples/BASIC : An implementation of 1964 Dartmouth BASIC. - Contributed by Dave against his better - judgement. - -09/28/06: beazley - Minor patch to allow named groups to be used in lex regular - expression rules. For example: - - t_QSTRING = r'''(?P['"]).*?(?P=quote)''' - - Patch submitted by Adam Ring. - -09/28/06: beazley - LALR(1) is now the default parsing method. To use SLR, use - yacc.yacc(method="SLR"). Note: there is no performance impact - on parsing when using LALR(1) instead of SLR. However, constructing - the parsing tables will take a little longer. - -09/26/06: beazley - Change to line number tracking. To modify line numbers, modify - the line number of the lexer itself. For example: - - def t_NEWLINE(t): - r'\n' - t.lexer.lineno += 1 - - This modification is both cleanup and a performance optimization. - In past versions, lex was monitoring every token for changes in - the line number. This extra processing is unnecessary for a vast - majority of tokens. Thus, this new approach cleans it up a bit. - - *** POTENTIAL INCOMPATIBILITY *** - You will need to change code in your lexer that updates the line - number. For example, "t.lineno += 1" becomes "t.lexer.lineno += 1" - -09/26/06: beazley - Added the lexing position to tokens as an attribute lexpos. This - is the raw index into the input text at which a token appears. - This information can be used to compute column numbers and other - details (e.g., scan backwards from lexpos to the first newline - to get a column position). - -09/25/06: beazley - Changed the name of the __copy__() method on the Lexer class - to clone(). This is used to clone a Lexer object (e.g., if - you're running different lexers at the same time). - -09/21/06: beazley - Limitations related to the use of the re module have been eliminated. - Several users reported problems with regular expressions exceeding - more than 100 named groups. To solve this, lex.py is now capable - of automatically splitting its master regular regular expression into - smaller expressions as needed. This should, in theory, make it - possible to specify an arbitrarily large number of tokens. - -09/21/06: beazley - Improved error checking in lex.py. Rules that match the empty string - are now rejected (otherwise they cause the lexer to enter an infinite - loop). An extra check for rules containing '#' has also been added. - Since lex compiles regular expressions in verbose mode, '#' is interpreted - as a regex comment, it is critical to use '\#' instead. - -09/18/06: beazley - Added a @TOKEN decorator function to lex.py that can be used to - define token rules where the documentation string might be computed - in some way. - - digit = r'([0-9])' - nondigit = r'([_A-Za-z])' - identifier = r'(' + nondigit + r'(' + digit + r'|' + nondigit + r')*)' - - from ply.lex import TOKEN - - @TOKEN(identifier) - def t_ID(t): - # Do whatever - - The @TOKEN decorator merely sets the documentation string of the - associated token function as needed for lex to work. - - Note: An alternative solution is the following: - - def t_ID(t): - # Do whatever - - t_ID.__doc__ = identifier - - Note: Decorators require the use of Python 2.4 or later. If compatibility - with old versions is needed, use the latter solution. - - The need for this feature was suggested by Cem Karan. - -09/14/06: beazley - Support for single-character literal tokens has been added to yacc. - These literals must be enclosed in quotes. For example: - - def p_expr(p): - "expr : expr '+' expr" - ... - - def p_expr(p): - 'expr : expr "-" expr' - ... - - In addition to this, it is necessary to tell the lexer module about - literal characters. This is done by defining the variable 'literals' - as a list of characters. This should be defined in the module that - invokes the lex.lex() function. For example: - - literals = ['+','-','*','/','(',')','='] - - or simply - - literals = '+=*/()=' - - It is important to note that literals can only be a single character. - When the lexer fails to match a token using its normal regular expression - rules, it will check the current character against the literal list. - If found, it will be returned with a token type set to match the literal - character. Otherwise, an illegal character will be signalled. - - -09/14/06: beazley - Modified PLY to install itself as a proper Python package called 'ply'. - This will make it a little more friendly to other modules. This - changes the usage of PLY only slightly. Just do this to import the - modules - - import ply.lex as lex - import ply.yacc as yacc - - Alternatively, you can do this: - - from ply import * - - Which imports both the lex and yacc modules. - Change suggested by Lee June. - -09/13/06: beazley - Changed the handling of negative indices when used in production rules. - A negative production index now accesses already parsed symbols on the - parsing stack. For example, - - def p_foo(p): - "foo: A B C D" - print p[1] # Value of 'A' symbol - print p[2] # Value of 'B' symbol - print p[-1] # Value of whatever symbol appears before A - # on the parsing stack. - - p[0] = some_val # Sets the value of the 'foo' grammer symbol - - This behavior makes it easier to work with embedded actions within the - parsing rules. For example, in C-yacc, it is possible to write code like - this: - - bar: A { printf("seen an A = %d\n", $1); } B { do_stuff; } - - In this example, the printf() code executes immediately after A has been - parsed. Within the embedded action code, $1 refers to the A symbol on - the stack. - - To perform this equivalent action in PLY, you need to write a pair - of rules like this: - - def p_bar(p): - "bar : A seen_A B" - do_stuff - - def p_seen_A(p): - "seen_A :" - print "seen an A =", p[-1] - - The second rule "seen_A" is merely a empty production which should be - reduced as soon as A is parsed in the "bar" rule above. The use - of the negative index p[-1] is used to access whatever symbol appeared - before the seen_A symbol. - - This feature also makes it possible to support inherited attributes. - For example: - - def p_decl(p): - "decl : scope name" - - def p_scope(p): - """scope : GLOBAL - | LOCAL""" - p[0] = p[1] - - def p_name(p): - "name : ID" - if p[-1] == "GLOBAL": - # ... - else if p[-1] == "LOCAL": - #... - - In this case, the name rule is inheriting an attribute from the - scope declaration that precedes it. - - *** POTENTIAL INCOMPATIBILITY *** - If you are currently using negative indices within existing grammar rules, - your code will break. This should be extremely rare if non-existent in - most cases. The argument to various grammar rules is not usually not - processed in the same way as a list of items. - -Version 2.0 ------------------------------- -09/07/06: beazley - Major cleanup and refactoring of the LR table generation code. Both SLR - and LALR(1) table generation is now performed by the same code base with - only minor extensions for extra LALR(1) processing. - -09/07/06: beazley - Completely reimplemented the entire LALR(1) parsing engine to use the - DeRemer and Pennello algorithm for calculating lookahead sets. This - significantly improves the performance of generating LALR(1) tables - and has the added feature of actually working correctly! If you - experienced weird behavior with LALR(1) in prior releases, this should - hopefully resolve all of those problems. Many thanks to - Andrew Waters and Markus Schoepflin for submitting bug reports - and helping me test out the revised LALR(1) support. - -Version 1.8 ------------------------------- -08/02/06: beazley - Fixed a problem related to the handling of default actions in LALR(1) - parsing. If you experienced subtle and/or bizarre behavior when trying - to use the LALR(1) engine, this may correct those problems. Patch - contributed by Russ Cox. Note: This patch has been superceded by - revisions for LALR(1) parsing in Ply-2.0. - -08/02/06: beazley - Added support for slicing of productions in yacc. - Patch contributed by Patrick Mezard. - -Version 1.7 ------------------------------- -03/02/06: beazley - Fixed infinite recursion problem ReduceToTerminals() function that - would sometimes come up in LALR(1) table generation. Reported by - Markus Schoepflin. - -03/01/06: beazley - Added "reflags" argument to lex(). For example: - - lex.lex(reflags=re.UNICODE) - - This can be used to specify optional flags to the re.compile() function - used inside the lexer. This may be necessary for special situations such - as processing Unicode (e.g., if you want escapes like \w and \b to consult - the Unicode character property database). The need for this suggested by - Andreas Jung. - -03/01/06: beazley - Fixed a bug with an uninitialized variable on repeated instantiations of parser - objects when the write_tables=0 argument was used. Reported by Michael Brown. - -03/01/06: beazley - Modified lex.py to accept Unicode strings both as the regular expressions for - tokens and as input. Hopefully this is the only change needed for Unicode support. - Patch contributed by Johan Dahl. - -03/01/06: beazley - Modified the class-based interface to work with new-style or old-style classes. - Patch contributed by Michael Brown (although I tweaked it slightly so it would work - with older versions of Python). - -Version 1.6 ------------------------------- -05/27/05: beazley - Incorporated patch contributed by Christopher Stawarz to fix an extremely - devious bug in LALR(1) parser generation. This patch should fix problems - numerous people reported with LALR parsing. - -05/27/05: beazley - Fixed problem with lex.py copy constructor. Reported by Dave Aitel, Aaron Lav, - and Thad Austin. - -05/27/05: beazley - Added outputdir option to yacc() to control output directory. Contributed - by Christopher Stawarz. - -05/27/05: beazley - Added rununit.py test script to run tests using the Python unittest module. - Contributed by Miki Tebeka. - -Version 1.5 ------------------------------- -05/26/04: beazley - Major enhancement. LALR(1) parsing support is now working. - This feature was implemented by Elias Ioup (ezioup@alumni.uchicago.edu) - and optimized by David Beazley. To use LALR(1) parsing do - the following: - - yacc.yacc(method="LALR") - - Computing LALR(1) parsing tables takes about twice as long as - the default SLR method. However, LALR(1) allows you to handle - more complex grammars. For example, the ANSI C grammar - (in example/ansic) has 13 shift-reduce conflicts with SLR, but - only has 1 shift-reduce conflict with LALR(1). - -05/20/04: beazley - Added a __len__ method to parser production lists. Can - be used in parser rules like this: - - def p_somerule(p): - """a : B C D - | E F" - if (len(p) == 3): - # Must have been first rule - elif (len(p) == 2): - # Must be second rule - - Suggested by Joshua Gerth and others. - -Version 1.4 ------------------------------- -04/23/04: beazley - Incorporated a variety of patches contributed by Eric Raymond. - These include: - - 0. Cleans up some comments so they don't wrap on an 80-column display. - 1. Directs compiler errors to stderr where they belong. - 2. Implements and documents automatic line counting when \n is ignored. - 3. Changes the way progress messages are dumped when debugging is on. - The new format is both less verbose and conveys more information than - the old, including shift and reduce actions. - -04/23/04: beazley - Added a Python setup.py file to simply installation. Contributed - by Adam Kerrison. - -04/23/04: beazley - Added patches contributed by Adam Kerrison. - - - Some output is now only shown when debugging is enabled. This - means that PLY will be completely silent when not in debugging mode. - - - An optional parameter "write_tables" can be passed to yacc() to - control whether or not parsing tables are written. By default, - it is true, but it can be turned off if you don't want the yacc - table file. Note: disabling this will cause yacc() to regenerate - the parsing table each time. - -04/23/04: beazley - Added patches contributed by David McNab. This patch addes two - features: - - - The parser can be supplied as a class instead of a module. - For an example of this, see the example/classcalc directory. - - - Debugging output can be directed to a filename of the user's - choice. Use - - yacc(debugfile="somefile.out") - - -Version 1.3 ------------------------------- -12/10/02: jmdyck - Various minor adjustments to the code that Dave checked in today. - Updated test/yacc_{inf,unused}.exp to reflect today's changes. - -12/10/02: beazley - Incorporated a variety of minor bug fixes to empty production - handling and infinite recursion checking. Contributed by - Michael Dyck. - -12/10/02: beazley - Removed bogus recover() method call in yacc.restart() - -Version 1.2 ------------------------------- -11/27/02: beazley - Lexer and parser objects are now available as an attribute - of tokens and slices respectively. For example: - - def t_NUMBER(t): - r'\d+' - print t.lexer - - def p_expr_plus(t): - 'expr: expr PLUS expr' - print t.lexer - print t.parser - - This can be used for state management (if needed). - -10/31/02: beazley - Modified yacc.py to work with Python optimize mode. To make - this work, you need to use - - yacc.yacc(optimize=1) - - Furthermore, you need to first run Python in normal mode - to generate the necessary parsetab.py files. After that, - you can use python -O or python -OO. - - Note: optimized mode turns off a lot of error checking. - Only use when you are sure that your grammar is working. - Make sure parsetab.py is up to date! - -10/30/02: beazley - Added cloning of Lexer objects. For example: - - import copy - l = lex.lex() - lc = copy.copy(l) - - l.input("Some text") - lc.input("Some other text") - ... - - This might be useful if the same "lexer" is meant to - be used in different contexts---or if multiple lexers - are running concurrently. - -10/30/02: beazley - Fixed subtle bug with first set computation and empty productions. - Patch submitted by Michael Dyck. - -10/30/02: beazley - Fixed error messages to use "filename:line: message" instead - of "filename:line. message". This makes error reporting more - friendly to emacs. Patch submitted by François Pinard. - -10/30/02: beazley - Improvements to parser.out file. Terminals and nonterminals - are sorted instead of being printed in random order. - Patch submitted by François Pinard. - -10/30/02: beazley - Improvements to parser.out file output. Rules are now printed - in a way that's easier to understand. Contributed by Russ Cox. - -10/30/02: beazley - Added 'nonassoc' associativity support. This can be used - to disable the chaining of operators like a < b < c. - To use, simply specify 'nonassoc' in the precedence table - - precedence = ( - ('nonassoc', 'LESSTHAN', 'GREATERTHAN'), # Nonassociative operators - ('left', 'PLUS', 'MINUS'), - ('left', 'TIMES', 'DIVIDE'), - ('right', 'UMINUS'), # Unary minus operator - ) - - Patch contributed by Russ Cox. - -10/30/02: beazley - Modified the lexer to provide optional support for Python -O and -OO - modes. To make this work, Python *first* needs to be run in - unoptimized mode. This reads the lexing information and creates a - file "lextab.py". Then, run lex like this: - - # module foo.py - ... - ... - lex.lex(optimize=1) - - Once the lextab file has been created, subsequent calls to - lex.lex() will read data from the lextab file instead of using - introspection. In optimized mode (-O, -OO) everything should - work normally despite the loss of doc strings. - - To change the name of the file 'lextab.py' use the following: - - lex.lex(lextab="footab") - - (this creates a file footab.py) - - -Version 1.1 October 25, 2001 ------------------------------- - -10/25/01: beazley - Modified the table generator to produce much more compact data. - This should greatly reduce the size of the parsetab.py[c] file. - Caveat: the tables still need to be constructed so a little more - work is done in parsetab on import. - -10/25/01: beazley - There may be a possible bug in the cycle detector that reports errors - about infinite recursion. I'm having a little trouble tracking it - down, but if you get this problem, you can disable the cycle - detector as follows: - - yacc.yacc(check_recursion = 0) - -10/25/01: beazley - Fixed a bug in lex.py that sometimes caused illegal characters to be - reported incorrectly. Reported by Sverre Jørgensen. - -7/8/01 : beazley - Added a reference to the underlying lexer object when tokens are handled by - functions. The lexer is available as the 'lexer' attribute. This - was added to provide better lexing support for languages such as Fortran - where certain types of tokens can't be conveniently expressed as regular - expressions (and where the tokenizing function may want to perform a - little backtracking). Suggested by Pearu Peterson. - -6/20/01 : beazley - Modified yacc() function so that an optional starting symbol can be specified. - For example: - - yacc.yacc(start="statement") - - Normally yacc always treats the first production rule as the starting symbol. - However, if you are debugging your grammar it may be useful to specify - an alternative starting symbol. Idea suggested by Rich Salz. - -Version 1.0 June 18, 2001 --------------------------- -Initial public offering - diff --git a/ply/PKG-INFO b/ply/PKG-INFO deleted file mode 100644 index 0080e02..0000000 --- a/ply/PKG-INFO +++ /dev/null @@ -1,22 +0,0 @@ -Metadata-Version: 1.0 -Name: ply -Version: 3.4 -Summary: Python Lex & Yacc -Home-page: http://www.dabeaz.com/ply/ -Author: David Beazley -Author-email: dave@dabeaz.com -License: BSD -Description: - PLY is yet another implementation of lex and yacc for Python. Some notable - features include the fact that its implemented entirely in Python and it - uses LALR(1) parsing which is efficient and well suited for larger grammars. - - PLY provides most of the standard lex/yacc features including support for empty - productions, precedence rules, error recovery, and support for ambiguous grammars. - - PLY is extremely easy to use and provides very extensive error checking. - It is compatible with both Python 2 and Python 3. - -Platform: UNKNOWN -Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 2 diff --git a/ply/README b/ply/README deleted file mode 100644 index f384d1a..0000000 --- a/ply/README +++ /dev/null @@ -1,271 +0,0 @@ -PLY (Python Lex-Yacc) Version 3.4 - -Copyright (C) 2001-2011, -David M. Beazley (Dabeaz LLC) -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. -* Neither the name of the David Beazley or Dabeaz LLC may be used to - endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -Introduction -============ - -PLY is a 100% Python implementation of the common parsing tools lex -and yacc. Here are a few highlights: - - - PLY is very closely modeled after traditional lex/yacc. - If you know how to use these tools in C, you will find PLY - to be similar. - - - PLY provides *very* extensive error reporting and diagnostic - information to assist in parser construction. The original - implementation was developed for instructional purposes. As - a result, the system tries to identify the most common types - of errors made by novice users. - - - PLY provides full support for empty productions, error recovery, - precedence specifiers, and moderately ambiguous grammars. - - - Parsing is based on LR-parsing which is fast, memory efficient, - better suited to large grammars, and which has a number of nice - properties when dealing with syntax errors and other parsing problems. - Currently, PLY builds its parsing tables using the LALR(1) - algorithm used in yacc. - - - PLY uses Python introspection features to build lexers and parsers. - This greatly simplifies the task of parser construction since it reduces - the number of files and eliminates the need to run a separate lex/yacc - tool before running your program. - - - PLY can be used to build parsers for "real" programming languages. - Although it is not ultra-fast due to its Python implementation, - PLY can be used to parse grammars consisting of several hundred - rules (as might be found for a language like C). The lexer and LR - parser are also reasonably efficient when parsing typically - sized programs. People have used PLY to build parsers for - C, C++, ADA, and other real programming languages. - -How to Use -========== - -PLY consists of two files : lex.py and yacc.py. These are contained -within the 'ply' directory which may also be used as a Python package. -To use PLY, simply copy the 'ply' directory to your project and import -lex and yacc from the associated 'ply' package. For example: - - import ply.lex as lex - import ply.yacc as yacc - -Alternatively, you can copy just the files lex.py and yacc.py -individually and use them as modules. For example: - - import lex - import yacc - -The file setup.py can be used to install ply using distutils. - -The file doc/ply.html contains complete documentation on how to use -the system. - -The example directory contains several different examples including a -PLY specification for ANSI C as given in K&R 2nd Ed. - -A simple example is found at the end of this document - -Requirements -============ -PLY requires the use of Python 2.2 or greater. However, you should -use the latest Python release if possible. It should work on just -about any platform. PLY has been tested with both CPython and Jython. -It also seems to work with IronPython. - -Resources -========= -More information about PLY can be obtained on the PLY webpage at: - - http://www.dabeaz.com/ply - -For a detailed overview of parsing theory, consult the excellent -book "Compilers : Principles, Techniques, and Tools" by Aho, Sethi, and -Ullman. The topics found in "Lex & Yacc" by Levine, Mason, and Brown -may also be useful. - -A Google group for PLY can be found at - - http://groups.google.com/group/ply-hack - -Acknowledgments -=============== -A special thanks is in order for all of the students in CS326 who -suffered through about 25 different versions of these tools :-). - -The CHANGES file acknowledges those who have contributed patches. - -Elias Ioup did the first implementation of LALR(1) parsing in PLY-1.x. -Andrew Waters and Markus Schoepflin were instrumental in reporting bugs -and testing a revised LALR(1) implementation for PLY-2.0. - -Special Note for PLY-3.0 -======================== -PLY-3.0 the first PLY release to support Python 3. However, backwards -compatibility with Python 2.2 is still preserved. PLY provides dual -Python 2/3 compatibility by restricting its implementation to a common -subset of basic language features. You should not convert PLY using -2to3--it is not necessary and may in fact break the implementation. - -Example -======= - -Here is a simple example showing a PLY implementation of a calculator -with variables. - -# ----------------------------------------------------------------------------- -# calc.py -# -# A simple calculator with variables. -# ----------------------------------------------------------------------------- - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - t.value = int(t.value) - return t - -# Ignored characters -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -import ply.lex as lex -lex.lex() - -# Precedence rules for the arithmetic operators -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names (for storing variables) -names = { } - -def p_statement_assign(p): - 'statement : NAME EQUALS expression' - names[p[1]] = p[3] - -def p_statement_expr(p): - 'statement : expression' - print(p[1]) - -def p_expression_binop(p): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] - -def p_expression_uminus(p): - 'expression : MINUS expression %prec UMINUS' - p[0] = -p[2] - -def p_expression_group(p): - 'expression : LPAREN expression RPAREN' - p[0] = p[2] - -def p_expression_number(p): - 'expression : NUMBER' - p[0] = p[1] - -def p_expression_name(p): - 'expression : NAME' - try: - p[0] = names[p[1]] - except LookupError: - print("Undefined name '%s'" % p[1]) - p[0] = 0 - -def p_error(p): - print("Syntax error at '%s'" % p.value) - -import ply.yacc as yacc -yacc.yacc() - -while 1: - try: - s = raw_input('calc > ') # use input() on Python 3 - except EOFError: - break - yacc.parse(s) - - -Bug Reports and Patches -======================= -My goal with PLY is to simply have a decent lex/yacc implementation -for Python. As a general rule, I don't spend huge amounts of time -working on it unless I receive very specific bug reports and/or -patches to fix problems. I also try to incorporate submitted feature -requests and enhancements into each new version. To contact me about -bugs and/or new features, please send email to dave@dabeaz.com. - -In addition there is a Google group for discussing PLY related issues at - - http://groups.google.com/group/ply-hack - --- Dave - - - - - - - - - diff --git a/ply/TODO b/ply/TODO deleted file mode 100644 index f4800aa..0000000 --- a/ply/TODO +++ /dev/null @@ -1,16 +0,0 @@ -The PLY to-do list: - -1. Finish writing the C Preprocessor module. Started in the - file ply/cpp.py - -2. Create and document libraries of useful tokens. - -3. Expand the examples/yply tool that parses bison/yacc - files. - -4. Think of various diabolical things to do with the - new yacc internals. For example, it is now possible - to specify grammrs using completely different schemes - than the reflection approach used by PLY. - - diff --git a/ply/doc/internal.html b/ply/doc/internal.html deleted file mode 100644 index 3fabfe2..0000000 --- a/ply/doc/internal.html +++ /dev/null @@ -1,874 +0,0 @@ - - -PLY Internals - - - -

    PLY Internals

    - - -David M. Beazley
    -dave@dabeaz.com
    -
    - -

    -PLY Version: 3.0 -

    - - -

    - - - -

    1. Introduction

    - - -This document describes classes and functions that make up the internal -operation of PLY. Using this programming interface, it is possible to -manually build an parser using a different interface specification -than what PLY normally uses. For example, you could build a gramar -from information parsed in a completely different input format. Some of -these objects may be useful for building more advanced parsing engines -such as GLR. - -

    -It should be stressed that using PLY at this level is not for the -faint of heart. Generally, it's assumed that you know a bit of -the underlying compiler theory and how an LR parser is put together. - -

    2. Grammar Class

    - - -The file ply.yacc defines a class Grammar that -is used to hold and manipulate information about a grammar -specification. It encapsulates the same basic information -about a grammar that is put into a YACC file including -the list of tokens, precedence rules, and grammar rules. -Various operations are provided to perform different validations -on the grammar. In addition, there are operations to compute -the first and follow sets that are needed by the various table -generation algorithms. - -

    -Grammar(terminals) - -

    -Creates a new grammar object. terminals is a list of strings -specifying the terminals for the grammar. An instance g of -Grammar has the following methods: -
    - -

    -g.set_precedence(term,assoc,level) -

    -Sets the precedence level and associativity for a given terminal term. -assoc is one of 'right', -'left', or 'nonassoc' and level is a positive integer. The higher -the value of level, the higher the precedence. Here is an example of typical -precedence settings: - -
    -g.set_precedence('PLUS',  'left',1)
    -g.set_precedence('MINUS', 'left',1)
    -g.set_precedence('TIMES', 'left',2)
    -g.set_precedence('DIVIDE','left',2)
    -g.set_precedence('UMINUS','left',3)
    -
    - -This method must be called prior to adding any productions to the -grammar with g.add_production(). The precedence of individual grammar -rules is determined by the precedence of the right-most terminal. - -
    -

    -g.add_production(name,syms,func=None,file='',line=0) -

    -Adds a new grammar rule. name is the name of the rule, -syms is a list of symbols making up the right hand -side of the rule, func is the function to call when -reducing the rule. file and line specify -the filename and line number of the rule and are used for -generating error messages. - -

    -The list of symbols in syms may include character -literals and %prec specifiers. Here are some -examples: - -

    -g.add_production('expr',['expr','PLUS','term'],func,file,line)
    -g.add_production('expr',['expr','"+"','term'],func,file,line)
    -g.add_production('expr',['MINUS','expr','%prec','UMINUS'],func,file,line)
    -
    - -

    -If any kind of error is detected, a GrammarError exception -is raised with a message indicating the reason for the failure. -

    - -

    -g.set_start(start=None) -

    -Sets the starting rule for the grammar. start is a string -specifying the name of the start rule. If start is omitted, -the first grammar rule added with add_production() is taken to be -the starting rule. This method must always be called after all -productions have been added. -
    - -

    -g.find_unreachable() -

    -Diagnostic function. Returns a list of all unreachable non-terminals -defined in the grammar. This is used to identify inactive parts of -the grammar specification. -
    - -

    -g.infinite_cycle() -

    -Diagnostic function. Returns a list of all non-terminals in the -grammar that result in an infinite cycle. This condition occurs if -there is no way for a grammar rule to expand to a string containing -only terminal symbols. -
    - -

    -g.undefined_symbols() -

    -Diagnostic function. Returns a list of tuples (name, prod) -corresponding to undefined symbols in the grammar. name is the -name of the undefined symbol and prod is an instance of -Production which has information about the production rule -where the undefined symbol was used. -
    - -

    -g.unused_terminals() -

    -Diagnostic function. Returns a list of terminals that were defined, -but never used in the grammar. -
    - -

    -g.unused_rules() -

    -Diagnostic function. Returns a list of Production instances -corresponding to production rules that were defined in the grammar, -but never used anywhere. This is slightly different -than find_unreachable(). -
    - -

    -g.unused_precedence() -

    -Diagnostic function. Returns a list of tuples (term, assoc) -corresponding to precedence rules that were set, but never used the -grammar. term is the terminal name and assoc is the -precedence associativity (e.g., 'left', 'right', -or 'nonassoc'. -
    - -

    -g.compute_first() -

    -Compute all of the first sets for all symbols in the grammar. Returns a dictionary -mapping symbol names to a list of all first symbols. -
    - -

    -g.compute_follow() -

    -Compute all of the follow sets for all non-terminals in the grammar. -The follow set is the set of all possible symbols that might follow a -given non-terminal. Returns a dictionary mapping non-terminal names -to a list of symbols. -
    - -

    -g.build_lritems() -

    -Calculates all of the LR items for all productions in the grammar. This -step is required before using the grammar for any kind of table generation. -See the section on LR items below. -
    - -

    -The following attributes are set by the above methods and may be useful -in code that works with the grammar. All of these attributes should be -assumed to be read-only. Changing their values directly will likely -break the grammar. - -

    -g.Productions -

    -A list of all productions added. The first entry is reserved for -a production representing the starting rule. The objects in this list -are instances of the Production class, described shortly. -
    - -

    -g.Prodnames -

    -A dictionary mapping the names of nonterminals to a list of all -productions of that nonterminal. -
    - -

    -g.Terminals -

    -A dictionary mapping the names of terminals to a list of the -production numbers where they are used. -
    - -

    -g.Nonterminals -

    -A dictionary mapping the names of nonterminals to a list of the -production numbers where they are used. -
    - -

    -g.First -

    -A dictionary representing the first sets for all grammar symbols. This is -computed and returned by the compute_first() method. -
    - -

    -g.Follow -

    -A dictionary representing the follow sets for all grammar rules. This is -computed and returned by the compute_follow() method. -
    - -

    -g.Start -

    -Starting symbol for the grammar. Set by the set_start() method. -
    - -For the purposes of debugging, a Grammar object supports the __len__() and -__getitem__() special methods. Accessing g[n] returns the nth production -from the grammar. - - -

    3. Productions

    - - -Grammar objects store grammar rules as instances of a Production class. This -class has no public constructor--you should only create productions by calling Grammar.add_production(). -The following attributes are available on a Production instance p. - -

    -p.name -

    -The name of the production. For a grammar rule such as A : B C D, this is 'A'. -
    - -

    -p.prod -

    -A tuple of symbols making up the right-hand side of the production. For a grammar rule such as A : B C D, this is ('B','C','D'). -
    - -

    -p.number -

    -Production number. An integer containing the index of the production in the grammar's Productions list. -
    - -

    -p.func -

    -The name of the reduction function associated with the production. -This is the function that will execute when reducing the entire -grammar rule during parsing. -
    - -

    -p.callable -

    -The callable object associated with the name in p.func. This is None -unless the production has been bound using bind(). -
    - -

    -p.file -

    -Filename associated with the production. Typically this is the file where the production was defined. Used for error messages. -
    - -

    -p.lineno -

    -Line number associated with the production. Typically this is the line number in p.file where the production was defined. Used for error messages. -
    - -

    -p.prec -

    -Precedence and associativity associated with the production. This is a tuple (assoc,level) where -assoc is one of 'left','right', or 'nonassoc' and level is -an integer. This value is determined by the precedence of the right-most terminal symbol in the production -or by use of the %prec specifier when adding the production. -
    - -

    -p.usyms -

    -A list of all unique symbols found in the production. -
    - -

    -p.lr_items -

    -A list of all LR items for this production. This attribute only has a meaningful value if the -Grammar.build_lritems() method has been called. The items in this list are -instances of LRItem described below. -
    - -

    -p.lr_next -

    -The head of a linked-list representation of the LR items in p.lr_items. -This attribute only has a meaningful value if the Grammar.build_lritems() -method has been called. Each LRItem instance has a lr_next attribute -to move to the next item. The list is terminated by None. -
    - -

    -p.bind(dict) -

    -Binds the production function name in p.func to a callable object in -dict. This operation is typically carried out in the last step -prior to running the parsing engine and is needed since parsing tables are typically -read from files which only include the function names, not the functions themselves. -
    - -

    -Production objects support -the __len__(), __getitem__(), and __str__() -special methods. -len(p) returns the number of symbols in p.prod -and p[n] is the same as p.prod[n]. - -

    4. LRItems

    - - -The construction of parsing tables in an LR-based parser generator is primarily -done over a set of "LR Items". An LR item represents a stage of parsing one -of the grammar rules. To compute the LR items, it is first necessary to -call Grammar.build_lritems(). Once this step, all of the productions -in the grammar will have their LR items attached to them. - -

    -Here is an interactive example that shows what LR items look like if you -interactively experiment. In this example, g is a Grammar -object. - -

    -
    ->>> g.build_lritems()
    ->>> p = g[1]
    ->>> p
    -Production(statement -> ID = expr)
    ->>>
    -
    -
    - -In the above code, p represents the first grammar rule. In -this case, a rule 'statement -> ID = expr'. - -

    -Now, let's look at the LR items for p. - -

    -
    ->>> p.lr_items
    -[LRItem(statement -> . ID = expr), 
    - LRItem(statement -> ID . = expr), 
    - LRItem(statement -> ID = . expr), 
    - LRItem(statement -> ID = expr .)]
    ->>>
    -
    -
    - -In each LR item, the dot (.) represents a specific stage of parsing. In each LR item, the dot -is advanced by one symbol. It is only when the dot reaches the very end that a production -is successfully parsed. - -

    -An instance lr of LRItem has the following -attributes that hold information related to that specific stage of -parsing. - -

    -lr.name -

    -The name of the grammar rule. For example, 'statement' in the above example. -
    - -

    -lr.prod -

    -A tuple of symbols representing the right-hand side of the production, including the -special '.' character. For example, ('ID','.','=','expr'). -
    - -

    -lr.number -

    -An integer representing the production number in the grammar. -
    - -

    -lr.usyms -

    -A set of unique symbols in the production. Inherited from the original Production instance. -
    - -

    -lr.lr_index -

    -An integer representing the position of the dot (.). You should never use lr.prod.index() -to search for it--the result will be wrong if the grammar happens to also use (.) as a character -literal. -
    - -

    -lr.lr_after -

    -A list of all productions that can legally appear immediately to the right of the -dot (.). This list contains Production instances. This attribute -represents all of the possible branches a parse can take from the current position. -For example, suppose that lr represents a stage immediately before -an expression like this: - -
    ->>> lr
    -LRItem(statement -> ID = . expr)
    ->>>
    -
    - -Then, the value of lr.lr_after might look like this, showing all productions that -can legally appear next: - -
    ->>> lr.lr_after
    -[Production(expr -> expr PLUS expr), 
    - Production(expr -> expr MINUS expr), 
    - Production(expr -> expr TIMES expr), 
    - Production(expr -> expr DIVIDE expr), 
    - Production(expr -> MINUS expr), 
    - Production(expr -> LPAREN expr RPAREN), 
    - Production(expr -> NUMBER), 
    - Production(expr -> ID)]
    ->>>
    -
    - -
    - -

    -lr.lr_before -

    -The grammar symbol that appears immediately before the dot (.) or None if -at the beginning of the parse. -
    - -

    -lr.lr_next -

    -A link to the next LR item, representing the next stage of the parse. None if lr -is the last LR item. -
    - -LRItem instances also support the __len__() and __getitem__() special methods. -len(lr) returns the number of items in lr.prod including the dot (.). lr[n] -returns lr.prod[n]. - -

    -It goes without saying that all of the attributes associated with LR -items should be assumed to be read-only. Modifications will very -likely create a small black-hole that will consume you and your code. - -

    5. LRTable

    - - -The LRTable class is used to represent LR parsing table data. This -minimally includes the production list, action table, and goto table. - -

    -LRTable() -

    -Create an empty LRTable object. This object contains only the information needed to -run an LR parser. -
    - -An instance lrtab of LRTable has the following methods: - -

    -lrtab.read_table(module) -

    -Populates the LR table with information from the module specified in module. -module is either a module object already loaded with import or -the name of a Python module. If it's a string containing a module name, it is -loaded and parsing data is extracted. Returns the signature value that was used -when initially writing the tables. Raises a VersionError exception if -the module was created using an incompatible version of PLY. -
    - -

    -lrtab.bind_callables(dict) -

    -This binds all of the function names used in productions to callable objects -found in the dictionary dict. During table generation and when reading -LR tables from files, PLY only uses the names of action functions such as 'p_expr', -'p_statement', etc. In order to actually run the parser, these names -have to be bound to callable objects. This method is always called prior to -running a parser. -
    - -After lrtab has been populated, the following attributes are defined. - -

    -lrtab.lr_method -

    -The LR parsing method used (e.g., 'LALR') -
    - - -

    -lrtab.lr_productions -

    -The production list. If the parsing tables have been newly -constructed, this will be a list of Production instances. If -the parsing tables have been read from a file, it's a list -of MiniProduction instances. This, together -with lr_action and lr_goto contain all of the -information needed by the LR parsing engine. -
    - -

    -lrtab.lr_action -

    -The LR action dictionary that implements the underlying state machine. -The keys of this dictionary are the LR states. -
    - -

    -lrtab.lr_goto -

    -The LR goto table that contains information about grammar rule reductions. -
    - - -

    6. LRGeneratedTable

    - - -The LRGeneratedTable class represents constructed LR parsing tables on a -grammar. It is a subclass of LRTable. - -

    -LRGeneratedTable(grammar, method='LALR',log=None) -

    -Create the LR parsing tables on a grammar. grammar is an instance of Grammar, -method is a string with the parsing method ('SLR' or 'LALR'), and -log is a logger object used to write debugging information. The debugging information -written to log is the same as what appears in the parser.out file created -by yacc. By supplying a custom logger with a different message format, it is possible to get -more information (e.g., the line number in yacc.py used for issuing each line of -output in the log). The result is an instance of LRGeneratedTable. -
    - -

    -An instance lr of LRGeneratedTable has the following attributes. - -

    -lr.grammar -

    -A link to the Grammar object used to construct the parsing tables. -
    - -

    -lr.lr_method -

    -The LR parsing method used (e.g., 'LALR') -
    - - -

    -lr.lr_productions -

    -A reference to grammar.Productions. This, together with lr_action and lr_goto -contain all of the information needed by the LR parsing engine. -
    - -

    -lr.lr_action -

    -The LR action dictionary that implements the underlying state machine. The keys of this dictionary are -the LR states. -
    - -

    -lr.lr_goto -

    -The LR goto table that contains information about grammar rule reductions. -
    - -

    -lr.sr_conflicts -

    -A list of tuples (state,token,resolution) identifying all shift/reduce conflicts. state is the LR state -number where the conflict occurred, token is the token causing the conflict, and resolution is -a string describing the resolution taken. resolution is either 'shift' or 'reduce'. -
    - -

    -lr.rr_conflicts -

    -A list of tuples (state,rule,rejected) identifying all reduce/reduce conflicts. state is the -LR state number where the conflict occurred, rule is the production rule that was selected -and rejected is the production rule that was rejected. Both rule and rejected are -instances of Production. They can be inspected to provide the user with more information. -
    - -

    -There are two public methods of LRGeneratedTable. - -

    -lr.write_table(modulename,outputdir="",signature="") -

    -Writes the LR parsing table information to a Python module. modulename is a string -specifying the name of a module such as "parsetab". outputdir is the name of a -directory where the module should be created. signature is a string representing a -grammar signature that's written into the output file. This can be used to detect when -the data stored in a module file is out-of-sync with the the grammar specification (and that -the tables need to be regenerated). If modulename is a string "parsetab", -this function creates a file called parsetab.py. If the module name represents a -package such as "foo.bar.parsetab", then only the last component, "parsetab" is -used. -
    - - -

    7. LRParser

    - - -The LRParser class implements the low-level LR parsing engine. - - -

    -LRParser(lrtab, error_func) -

    -Create an LRParser. lrtab is an instance of LRTable -containing the LR production and state tables. error_func is the -error function to invoke in the event of a parsing error. -
    - -An instance p of LRParser has the following methods: - -

    -p.parse(input=None,lexer=None,debug=0,tracking=0,tokenfunc=None) -

    -Run the parser. input is a string, which if supplied is fed into the -lexer using its input() method. lexer is an instance of the -Lexer class to use for tokenizing. If not supplied, the last lexer -created with the lex module is used. debug is a boolean flag -that enables debugging. tracking is a boolean flag that tells the -parser to perform additional line number tracking. tokenfunc is a callable -function that returns the next token. If supplied, the parser will use it to get -all tokens. -
    - -

    -p.restart() -

    -Resets the parser state for a parse already in progress. -
    - -

    8. ParserReflect

    - - -

    -The ParserReflect class is used to collect parser specification data -from a Python module or object. This class is what collects all of the -p_rule() functions in a PLY file, performs basic error checking, -and collects all of the needed information to build a grammar. Most of the -high-level PLY interface as used by the yacc() function is actually -implemented by this class. - -

    -ParserReflect(pdict, log=None) -

    -Creates a ParserReflect instance. pdict is a dictionary -containing parser specification data. This dictionary typically corresponds -to the module or class dictionary of code that implements a PLY parser. -log is a logger instance that will be used to report error -messages. -
    - -An instance p of ParserReflect has the following methods: - -

    -p.get_all() -

    -Collect and store all required parsing information. -
    - -

    -p.validate_all() -

    -Validate all of the collected parsing information. This is a seprate step -from p.get_all() as a performance optimization. In order to -increase parser start-up time, a parser can elect to only validate the -parsing data when regenerating the parsing tables. The validation -step tries to collect as much information as possible rather than -raising an exception at the first sign of trouble. The attribute -p.error is set if there are any validation errors. The -value of this attribute is also returned. -
    - -

    -p.signature() -

    -Compute a signature representing the contents of the collected parsing -data. The signature value should change if anything in the parser -specification has changed in a way that would justify parser table -regeneration. This method can be called after p.get_all(), -but before p.validate_all(). -
    - -The following attributes are set in the process of collecting data: - -

    -p.start -

    -The grammar start symbol, if any. Taken from pdict['start']. -
    - -

    -p.error_func -

    -The error handling function or None. Taken from pdict['p_error']. -
    - -

    -p.tokens -

    -The token list. Taken from pdict['tokens']. -
    - -

    -p.prec -

    -The precedence specifier. Taken from pdict['precedence']. -
    - -

    -p.preclist -

    -A parsed version of the precedence specified. A list of tuples of the form -(token,assoc,level) where token is the terminal symbol, -assoc is the associativity (e.g., 'left') and level -is a numeric precedence level. -
    - -

    -p.grammar -

    -A list of tuples (name, rules) representing the grammar rules. name is the -name of a Python function or method in pdict that starts with "p_". -rules is a list of tuples (filename,line,prodname,syms) representing -the grammar rules found in the documentation string of that function. filename and line contain location -information that can be used for debugging. prodname is the name of the -production. syms is the right-hand side of the production. If you have a -function like this - -
    -def p_expr(p):
    -    '''expr : expr PLUS expr
    -            | expr MINUS expr
    -            | expr TIMES expr
    -            | expr DIVIDE expr'''
    -
    - -then the corresponding entry in p.grammar might look like this: - -
    -('p_expr', [ ('calc.py',10,'expr', ['expr','PLUS','expr']),
    -             ('calc.py',11,'expr', ['expr','MINUS','expr']),
    -             ('calc.py',12,'expr', ['expr','TIMES','expr']),
    -             ('calc.py',13,'expr', ['expr','DIVIDE','expr'])
    -           ])
    -
    -
    - -

    -p.pfuncs -

    -A sorted list of tuples (line, file, name, doc) representing all of -the p_ functions found. line and file give location -information. name is the name of the function. doc is the -documentation string. This list is sorted in ascending order by line number. -
    - -

    -p.files -

    -A dictionary holding all of the source filenames that were encountered -while collecting parser information. Only the keys of this dictionary have -any meaning. -
    - -

    -p.error -

    -An attribute that indicates whether or not any critical errors -occurred in validation. If this is set, it means that that some kind -of problem was detected and that no further processing should be -performed. -
    - - -

    9. High-level operation

    - - -Using all of the above classes requires some attention to detail. The yacc() -function carries out a very specific sequence of operations to create a grammar. -This same sequence should be emulated if you build an alternative PLY interface. - -
      -
    1. A ParserReflect object is created and raw grammar specification data is -collected. -
    2. A Grammar object is created and populated with information -from the specification data. -
    3. A LRGenerator object is created to run the LALR algorithm over -the Grammar object. -
    4. Productions in the LRGenerator and bound to callables using the bind_callables() -method. -
    5. A LRParser object is created from from the information in the -LRGenerator object. -
    - - - - - - - - - - diff --git a/ply/doc/makedoc.py b/ply/doc/makedoc.py deleted file mode 100644 index 415a53a..0000000 --- a/ply/doc/makedoc.py +++ /dev/null @@ -1,194 +0,0 @@ -#!/usr/local/bin/python - -############################################################################### -# Takes a chapter as input and adds internal links and numbering to all -# of the H1, H2, H3, H4 and H5 sections. -# -# Every heading HTML tag (H1, H2 etc) is given an autogenerated name to link -# to. However, if the name is not an autogenerated name from a previous run, -# it will be kept. If it is autogenerated, it might change on subsequent runs -# of this program. Thus if you want to create links to one of the headings, -# then change the heading link name to something that does not look like an -# autogenerated link name. -############################################################################### - -import sys -import re -import string - -############################################################################### -# Functions -############################################################################### - -# Regexs for -alink = re.compile(r"", re.IGNORECASE) -heading = re.compile(r"(_nn\d)", re.IGNORECASE) - -def getheadingname(m): - autogeneratedheading = True; - if m.group(1) != None: - amatch = alink.match(m.group(1)) - if amatch: - # A non-autogenerated heading - keep it - headingname = amatch.group(1) - autogeneratedheading = heading.match(headingname) - if autogeneratedheading: - # The heading name was either non-existent or autogenerated, - # We can create a new heading / change the existing heading - headingname = "%s_nn%d" % (filenamebase, nameindex) - return headingname - -############################################################################### -# Main program -############################################################################### - -if len(sys.argv) != 2: - print "usage: makedoc.py filename" - sys.exit(1) - -filename = sys.argv[1] -filenamebase = string.split(filename,".")[0] - -section = 0 -subsection = 0 -subsubsection = 0 -subsubsubsection = 0 -nameindex = 0 - -name = "" - -# Regexs for

    ,...

    sections - -h1 = re.compile(r".*?

    ()*[\d\.\s]*(.*?)

    ", re.IGNORECASE) -h2 = re.compile(r".*?

    ()*[\d\.\s]*(.*?)

    ", re.IGNORECASE) -h3 = re.compile(r".*?

    ()*[\d\.\s]*(.*?)

    ", re.IGNORECASE) -h4 = re.compile(r".*?

    ()*[\d\.\s]*(.*?)

    ", re.IGNORECASE) -h5 = re.compile(r".*?
    ()*[\d\.\s]*(.*?)
    ", re.IGNORECASE) - -data = open(filename).read() # Read data -open(filename+".bak","w").write(data) # Make backup - -lines = data.splitlines() -result = [ ] # This is the result of postprocessing the file -index = "\n
    \n" # index contains the index for adding at the top of the file. Also printed to stdout. - -skip = 0 -skipspace = 0 - -for s in lines: - if s == "": - if not skip: - result.append("@INDEX@") - skip = 1 - else: - skip = 0 - continue; - if skip: - continue - - if not s and skipspace: - continue - - if skipspace: - result.append("") - result.append("") - skipspace = 0 - - m = h2.match(s) - if m: - prevheadingtext = m.group(2) - nameindex += 1 - section += 1 - headingname = getheadingname(m) - result.append("""

    %d. %s

    """ % (headingname,section, prevheadingtext)) - - if subsubsubsection: - index += "\n" - if subsubsection: - index += "\n" - if subsection: - index += "\n" - if section == 1: - index += "
      \n" - - index += """
    • %s\n""" % (headingname,prevheadingtext) - subsection = 0 - subsubsection = 0 - subsubsubsection = 0 - skipspace = 1 - continue - m = h3.match(s) - if m: - prevheadingtext = m.group(2) - nameindex += 1 - subsection += 1 - headingname = getheadingname(m) - result.append("""

      %d.%d %s

      """ % (headingname,section, subsection, prevheadingtext)) - - if subsubsubsection: - index += "
    \n" - if subsubsection: - index += "\n" - if subsection == 1: - index += "
      \n" - - index += """
    • %s\n""" % (headingname,prevheadingtext) - subsubsection = 0 - skipspace = 1 - continue - m = h4.match(s) - if m: - prevheadingtext = m.group(2) - nameindex += 1 - subsubsection += 1 - subsubsubsection = 0 - headingname = getheadingname(m) - result.append("""

      %d.%d.%d %s

      """ % (headingname,section, subsection, subsubsection, prevheadingtext)) - - if subsubsubsection: - index += "
    \n" - if subsubsection == 1: - index += "
      \n" - - index += """
    • %s\n""" % (headingname,prevheadingtext) - skipspace = 1 - continue - m = h5.match(s) - if m: - prevheadingtext = m.group(2) - nameindex += 1 - subsubsubsection += 1 - headingname = getheadingname(m) - result.append("""
      %d.%d.%d.%d %s
      """ % (headingname,section, subsection, subsubsection, subsubsubsection, prevheadingtext)) - - if subsubsubsection == 1: - index += "
        \n" - - index += """
      • %s\n""" % (headingname,prevheadingtext) - skipspace = 1 - continue - - result.append(s) - -if subsubsubsection: - index += "
      \n" - -if subsubsection: - index += "
    \n" - -if subsection: - index += "\n" - -if section: - index += "\n" - -index += "
    \n\n" - -data = "\n".join(result) - -data = data.replace("@INDEX@",index) + "\n"; - -# Write the file back out -open(filename,"w").write(data) - - diff --git a/ply/doc/ply.html b/ply/doc/ply.html deleted file mode 100644 index fdcd88a..0000000 --- a/ply/doc/ply.html +++ /dev/null @@ -1,3262 +0,0 @@ - - -PLY (Python Lex-Yacc) - - - -

    PLY (Python Lex-Yacc)

    - - -David M. Beazley
    -dave@dabeaz.com
    -
    - -

    -PLY Version: 3.4 -

    - - -

    - - - - -

    1. Preface and Requirements

    - - -

    -This document provides an overview of lexing and parsing with PLY. -Given the intrinsic complexity of parsing, I would strongly advise -that you read (or at least skim) this entire document before jumping -into a big development project with PLY. -

    - -

    -PLY-3.0 is compatible with both Python 2 and Python 3. Be aware that -Python 3 support is new and has not been extensively tested (although -all of the examples and unit tests pass under Python 3.0). If you are -using Python 2, you should try to use Python 2.4 or newer. Although PLY -works with versions as far back as Python 2.2, some of its optional features -require more modern library modules. -

    - -

    2. Introduction

    - - -PLY is a pure-Python implementation of the popular compiler -construction tools lex and yacc. The main goal of PLY is to stay -fairly faithful to the way in which traditional lex/yacc tools work. -This includes supporting LALR(1) parsing as well as providing -extensive input validation, error reporting, and diagnostics. Thus, -if you've used yacc in another programming language, it should be -relatively straightforward to use PLY. - -

    -Early versions of PLY were developed to support an Introduction to -Compilers Course I taught in 2001 at the University of Chicago. In this course, -students built a fully functional compiler for a simple Pascal-like -language. Their compiler, implemented entirely in Python, had to -include lexical analysis, parsing, type checking, type inference, -nested scoping, and code generation for the SPARC processor. -Approximately 30 different compiler implementations were completed in -this course. Most of PLY's interface and operation has been influenced by common -usability problems encountered by students. Since 2001, PLY has -continued to be improved as feedback has been received from users. -PLY-3.0 represents a major refactoring of the original implementation -with an eye towards future enhancements. - -

    -Since PLY was primarily developed as an instructional tool, you will -find it to be fairly picky about token and grammar rule -specification. In part, this -added formality is meant to catch common programming mistakes made by -novice users. However, advanced users will also find such features to -be useful when building complicated grammars for real programming -languages. It should also be noted that PLY does not provide much in -the way of bells and whistles (e.g., automatic construction of -abstract syntax trees, tree traversal, etc.). Nor would I consider it -to be a parsing framework. Instead, you will find a bare-bones, yet -fully capable lex/yacc implementation written entirely in Python. - -

    -The rest of this document assumes that you are somewhat familar with -parsing theory, syntax directed translation, and the use of compiler -construction tools such as lex and yacc in other programming -languages. If you are unfamilar with these topics, you will probably -want to consult an introductory text such as "Compilers: Principles, -Techniques, and Tools", by Aho, Sethi, and Ullman. O'Reilly's "Lex -and Yacc" by John Levine may also be handy. In fact, the O'Reilly book can be -used as a reference for PLY as the concepts are virtually identical. - -

    3. PLY Overview

    - - -PLY consists of two separate modules; lex.py and -yacc.py, both of which are found in a Python package -called ply. The lex.py module is used to break input text into a -collection of tokens specified by a collection of regular expression -rules. yacc.py is used to recognize language syntax that has -been specified in the form of a context free grammar. yacc.py uses LR parsing and generates its parsing tables -using either the LALR(1) (the default) or SLR table generation algorithms. - -

    -The two tools are meant to work together. Specifically, -lex.py provides an external interface in the form of a -token() function that returns the next valid token on the -input stream. yacc.py calls this repeatedly to retrieve -tokens and invoke grammar rules. The output of yacc.py is -often an Abstract Syntax Tree (AST). However, this is entirely up to -the user. If desired, yacc.py can also be used to implement -simple one-pass compilers. - -

    -Like its Unix counterpart, yacc.py provides most of the -features you expect including extensive error checking, grammar -validation, support for empty productions, error tokens, and ambiguity -resolution via precedence rules. In fact, everything that is possible in traditional yacc -should be supported in PLY. - -

    -The primary difference between -yacc.py and Unix yacc is that yacc.py -doesn't involve a separate code-generation process. -Instead, PLY relies on reflection (introspection) -to build its lexers and parsers. Unlike traditional lex/yacc which -require a special input file that is converted into a separate source -file, the specifications given to PLY are valid Python -programs. This means that there are no extra source files nor is -there a special compiler construction step (e.g., running yacc to -generate Python code for the compiler). Since the generation of the -parsing tables is relatively expensive, PLY caches the results and -saves them to a file. If no changes are detected in the input source, -the tables are read from the cache. Otherwise, they are regenerated. - -

    4. Lex

    - - -lex.py is used to tokenize an input string. For example, suppose -you're writing a programming language and a user supplied the following input string: - -
    -
    -x = 3 + 42 * (s - t)
    -
    -
    - -A tokenizer splits the string into individual tokens - -
    -
    -'x','=', '3', '+', '42', '*', '(', 's', '-', 't', ')'
    -
    -
    - -Tokens are usually given names to indicate what they are. For example: - -
    -
    -'ID','EQUALS','NUMBER','PLUS','NUMBER','TIMES',
    -'LPAREN','ID','MINUS','ID','RPAREN'
    -
    -
    - -More specifically, the input is broken into pairs of token types and values. For example: - -
    -
    -('ID','x'), ('EQUALS','='), ('NUMBER','3'), 
    -('PLUS','+'), ('NUMBER','42), ('TIMES','*'),
    -('LPAREN','('), ('ID','s'), ('MINUS','-'),
    -('ID','t'), ('RPAREN',')'
    -
    -
    - -The identification of tokens is typically done by writing a series of regular expression -rules. The next section shows how this is done using lex.py. - -

    4.1 Lex Example

    - - -The following example shows how lex.py is used to write a simple tokenizer. - -
    -
    -# ------------------------------------------------------------
    -# calclex.py
    -#
    -# tokenizer for a simple expression evaluator for
    -# numbers and +,-,*,/
    -# ------------------------------------------------------------
    -import ply.lex as lex
    -
    -# List of token names.   This is always required
    -tokens = (
    -   'NUMBER',
    -   'PLUS',
    -   'MINUS',
    -   'TIMES',
    -   'DIVIDE',
    -   'LPAREN',
    -   'RPAREN',
    -)
    -
    -# Regular expression rules for simple tokens
    -t_PLUS    = r'\+'
    -t_MINUS   = r'-'
    -t_TIMES   = r'\*'
    -t_DIVIDE  = r'/'
    -t_LPAREN  = r'\('
    -t_RPAREN  = r'\)'
    -
    -# A regular expression rule with some action code
    -def t_NUMBER(t):
    -    r'\d+'
    -    t.value = int(t.value)    
    -    return t
    -
    -# Define a rule so we can track line numbers
    -def t_newline(t):
    -    r'\n+'
    -    t.lexer.lineno += len(t.value)
    -
    -# A string containing ignored characters (spaces and tabs)
    -t_ignore  = ' \t'
    -
    -# Error handling rule
    -def t_error(t):
    -    print "Illegal character '%s'" % t.value[0]
    -    t.lexer.skip(1)
    -
    -# Build the lexer
    -lexer = lex.lex()
    -
    -
    -
    -To use the lexer, you first need to feed it some input text using -its input() method. After that, repeated calls -to token() produce tokens. The following code shows how this -works: - -
    -
    -
    -# Test it out
    -data = '''
    -3 + 4 * 10
    -  + -20 *2
    -'''
    -
    -# Give the lexer some input
    -lexer.input(data)
    -
    -# Tokenize
    -while True:
    -    tok = lexer.token()
    -    if not tok: break      # No more input
    -    print tok
    -
    -
    - -When executed, the example will produce the following output: - -
    -
    -$ python example.py
    -LexToken(NUMBER,3,2,1)
    -LexToken(PLUS,'+',2,3)
    -LexToken(NUMBER,4,2,5)
    -LexToken(TIMES,'*',2,7)
    -LexToken(NUMBER,10,2,10)
    -LexToken(PLUS,'+',3,14)
    -LexToken(MINUS,'-',3,16)
    -LexToken(NUMBER,20,3,18)
    -LexToken(TIMES,'*',3,20)
    -LexToken(NUMBER,2,3,21)
    -
    -
    - -Lexers also support the iteration protocol. So, you can write the above loop as follows: - -
    -
    -for tok in lexer:
    -    print tok
    -
    -
    - -The tokens returned by lexer.token() are instances -of LexToken. This object has -attributes tok.type, tok.value, -tok.lineno, and tok.lexpos. The following code shows an example of -accessing these attributes: - -
    -
    -# Tokenize
    -while True:
    -    tok = lexer.token()
    -    if not tok: break      # No more input
    -    print tok.type, tok.value, tok.line, tok.lexpos
    -
    -
    - -The tok.type and tok.value attributes contain the -type and value of the token itself. -tok.line and tok.lexpos contain information about -the location of the token. tok.lexpos is the index of the -token relative to the start of the input text. - -

    4.2 The tokens list

    - - -All lexers must provide a list tokens that defines all of the possible token -names that can be produced by the lexer. This list is always required -and is used to perform a variety of validation checks. The tokens list is also used by the -yacc.py module to identify terminals. - -

    -In the example, the following code specified the token names: - -

    -
    -tokens = (
    -   'NUMBER',
    -   'PLUS',
    -   'MINUS',
    -   'TIMES',
    -   'DIVIDE',
    -   'LPAREN',
    -   'RPAREN',
    -)
    -
    -
    - -

    4.3 Specification of tokens

    - - -Each token is specified by writing a regular expression rule. Each of these rules are -are defined by making declarations with a special prefix t_ to indicate that it -defines a token. For simple tokens, the regular expression can -be specified as strings such as this (note: Python raw strings are used since they are the -most convenient way to write regular expression strings): - -
    -
    -t_PLUS = r'\+'
    -
    -
    - -In this case, the name following the t_ must exactly match one of the -names supplied in tokens. If some kind of action needs to be performed, -a token rule can be specified as a function. For example, this rule matches numbers and -converts the string into a Python integer. - -
    -
    -def t_NUMBER(t):
    -    r'\d+'
    -    t.value = int(t.value)
    -    return t
    -
    -
    - -When a function is used, the regular expression rule is specified in the function documentation string. -The function always takes a single argument which is an instance of -LexToken. This object has attributes of t.type which is the token type (as a string), -t.value which is the lexeme (the actual text matched), t.lineno which is the current line number, and t.lexpos which -is the position of the token relative to the beginning of the input text. -By default, t.type is set to the name following the t_ prefix. The action -function can modify the contents of the LexToken object as appropriate. However, -when it is done, the resulting token should be returned. If no value is returned by the action -function, the token is simply discarded and the next token read. - -

    -Internally, lex.py uses the re module to do its patten matching. When building the master regular expression, -rules are added in the following order: -

    -

      -
    1. All tokens defined by functions are added in the same order as they appear in the lexer file. -
    2. Tokens defined by strings are added next by sorting them in order of decreasing regular expression length (longer expressions -are added first). -
    -

    -Without this ordering, it can be difficult to correctly match certain types of tokens. For example, if you -wanted to have separate tokens for "=" and "==", you need to make sure that "==" is checked first. By sorting regular -expressions in order of decreasing length, this problem is solved for rules defined as strings. For functions, -the order can be explicitly controlled since rules appearing first are checked first. - -

    -To handle reserved words, you should write a single rule to match an -identifier and do a special name lookup in a function like this: - -

    -
    -reserved = {
    -   'if' : 'IF',
    -   'then' : 'THEN',
    -   'else' : 'ELSE',
    -   'while' : 'WHILE',
    -   ...
    -}
    -
    -tokens = ['LPAREN','RPAREN',...,'ID'] + list(reserved.values())
    -
    -def t_ID(t):
    -    r'[a-zA-Z_][a-zA-Z_0-9]*'
    -    t.type = reserved.get(t.value,'ID')    # Check for reserved words
    -    return t
    -
    -
    - -This approach greatly reduces the number of regular expression rules and is likely to make things a little faster. - -

    -Note: You should avoid writing individual rules for reserved words. For example, if you write rules like this, - -

    -
    -t_FOR   = r'for'
    -t_PRINT = r'print'
    -
    -
    - -those rules will be triggered for identifiers that include those words as a prefix such as "forget" or "printed". This is probably not -what you want. - -

    4.4 Token values

    - - -When tokens are returned by lex, they have a value that is stored in the value attribute. Normally, the value is the text -that was matched. However, the value can be assigned to any Python object. For instance, when lexing identifiers, you may -want to return both the identifier name and information from some sort of symbol table. To do this, you might write a rule like this: - -
    -
    -def t_ID(t):
    -    ...
    -    # Look up symbol table information and return a tuple
    -    t.value = (t.value, symbol_lookup(t.value))
    -    ...
    -    return t
    -
    -
    - -It is important to note that storing data in other attribute names is not recommended. The yacc.py module only exposes the -contents of the value attribute. Thus, accessing other attributes may be unnecessarily awkward. If you -need to store multiple values on a token, assign a tuple, dictionary, or instance to value. - -

    4.5 Discarded tokens

    - - -To discard a token, such as a comment, simply define a token rule that returns no value. For example: - -
    -
    -def t_COMMENT(t):
    -    r'\#.*'
    -    pass
    -    # No return value. Token discarded
    -
    -
    - -Alternatively, you can include the prefix "ignore_" in the token declaration to force a token to be ignored. For example: - -
    -
    -t_ignore_COMMENT = r'\#.*'
    -
    -
    - -Be advised that if you are ignoring many different kinds of text, you may still want to use functions since these provide more precise -control over the order in which regular expressions are matched (i.e., functions are matched in order of specification whereas strings are -sorted by regular expression length). - -

    4.6 Line numbers and positional information

    - - -

    By default, lex.py knows nothing about line numbers. This is because lex.py doesn't know anything -about what constitutes a "line" of input (e.g., the newline character or even if the input is textual data). -To update this information, you need to write a special rule. In the example, the t_newline() rule shows how to do this. - -

    -
    -# Define a rule so we can track line numbers
    -def t_newline(t):
    -    r'\n+'
    -    t.lexer.lineno += len(t.value)
    -
    -
    -Within the rule, the lineno attribute of the underlying lexer t.lexer is updated. -After the line number is updated, the token is simply discarded since nothing is returned. - -

    -lex.py does not perform and kind of automatic column tracking. However, it does record positional -information related to each token in the lexpos attribute. Using this, it is usually possible to compute -column information as a separate step. For instance, just count backwards until you reach a newline. - -

    -
    -# Compute column. 
    -#     input is the input text string
    -#     token is a token instance
    -def find_column(input,token):
    -    last_cr = input.rfind('\n',0,token.lexpos)
    -    if last_cr < 0:
    -	last_cr = 0
    -    column = (token.lexpos - last_cr) + 1
    -    return column
    -
    -
    - -Since column information is often only useful in the context of error handling, calculating the column -position can be performed when needed as opposed to doing it for each token. - -

    4.7 Ignored characters

    - - -

    -The special t_ignore rule is reserved by lex.py for characters -that should be completely ignored in the input stream. -Usually this is used to skip over whitespace and other non-essential characters. -Although it is possible to define a regular expression rule for whitespace in a manner -similar to t_newline(), the use of t_ignore provides substantially better -lexing performance because it is handled as a special case and is checked in a much -more efficient manner than the normal regular expression rules. - -

    4.8 Literal characters

    - - -

    -Literal characters can be specified by defining a variable literals in your lexing module. For example: - -

    -
    -literals = [ '+','-','*','/' ]
    -
    -
    - -or alternatively - -
    -
    -literals = "+-*/"
    -
    -
    - -A literal character is simply a single character that is returned "as is" when encountered by the lexer. Literals are checked -after all of the defined regular expression rules. Thus, if a rule starts with one of the literal characters, it will always -take precedence. -

    -When a literal token is returned, both its type and value attributes are set to the character itself. For example, '+'. - -

    4.9 Error handling

    - - -

    -Finally, the t_error() -function is used to handle lexing errors that occur when illegal -characters are detected. In this case, the t.value attribute contains the -rest of the input string that has not been tokenized. In the example, the error function -was defined as follows: - -

    -
    -# Error handling rule
    -def t_error(t):
    -    print "Illegal character '%s'" % t.value[0]
    -    t.lexer.skip(1)
    -
    -
    - -In this case, we simply print the offending character and skip ahead one character by calling t.lexer.skip(1). - -

    4.10 Building and using the lexer

    - - -

    -To build the lexer, the function lex.lex() is used. This function -uses Python reflection (or introspection) to read the the regular expression rules -out of the calling context and build the lexer. Once the lexer has been built, two methods can -be used to control the lexer. - -

      -
    • lexer.input(data). Reset the lexer and store a new input string. -
    • lexer.token(). Return the next token. Returns a special LexToken instance on success or -None if the end of the input text has been reached. -
    - -The preferred way to use PLY is to invoke the above methods directly on the lexer object returned by the -lex() function. The legacy interface to PLY involves module-level functions lex.input() and lex.token(). -For example: - -
    -
    -lex.lex()
    -lex.input(sometext)
    -while 1:
    -    tok = lex.token()
    -    if not tok: break
    -    print tok
    -
    -
    - -

    -In this example, the module-level functions lex.input() and lex.token() are bound to the input() -and token() methods of the last lexer created by the lex module. This interface may go away at some point so -it's probably best not to use it. - -

    4.11 The @TOKEN decorator

    - - -In some applications, you may want to define build tokens from as a series of -more complex regular expression rules. For example: - -
    -
    -digit            = r'([0-9])'
    -nondigit         = r'([_A-Za-z])'
    -identifier       = r'(' + nondigit + r'(' + digit + r'|' + nondigit + r')*)'        
    -
    -def t_ID(t):
    -    # want docstring to be identifier above. ?????
    -    ...
    -
    -
    - -In this case, we want the regular expression rule for ID to be one of the variables above. However, there is no -way to directly specify this using a normal documentation string. To solve this problem, you can use the @TOKEN -decorator. For example: - -
    -
    -from ply.lex import TOKEN
    -
    -@TOKEN(identifier)
    -def t_ID(t):
    -    ...
    -
    -
    - -This will attach identifier to the docstring for t_ID() allowing lex.py to work normally. An alternative -approach this problem is to set the docstring directly like this: - -
    -
    -def t_ID(t):
    -    ...
    -
    -t_ID.__doc__ = identifier
    -
    -
    - -NOTE: Use of @TOKEN requires Python-2.4 or newer. If you're concerned about backwards compatibility with older -versions of Python, use the alternative approach of setting the docstring directly. - -

    4.12 Optimized mode

    - - -For improved performance, it may be desirable to use Python's -optimized mode (e.g., running Python with the -O -option). However, doing so causes Python to ignore documentation -strings. This presents special problems for lex.py. To -handle this case, you can create your lexer using -the optimize option as follows: - -
    -
    -lexer = lex.lex(optimize=1)
    -
    -
    - -Next, run Python in its normal operating mode. When you do -this, lex.py will write a file called lextab.py to -the current directory. This file contains all of the regular -expression rules and tables used during lexing. On subsequent -executions, -lextab.py will simply be imported to build the lexer. This -approach substantially improves the startup time of the lexer and it -works in Python's optimized mode. - -

    -To change the name of the lexer-generated file, use the lextab keyword argument. For example: - -

    -
    -lexer = lex.lex(optimize=1,lextab="footab")
    -
    -
    - -When running in optimized mode, it is important to note that lex disables most error checking. Thus, this is really only recommended -if you're sure everything is working correctly and you're ready to start releasing production code. - -

    4.13 Debugging

    - - -For the purpose of debugging, you can run lex() in a debugging mode as follows: - -
    -
    -lexer = lex.lex(debug=1)
    -
    -
    - -

    -This will produce various sorts of debugging information including all of the added rules, -the master regular expressions used by the lexer, and tokens generating during lexing. -

    - -

    -In addition, lex.py comes with a simple main function which -will either tokenize input read from standard input or from a file specified -on the command line. To use it, simply put this in your lexer: -

    - -
    -
    -if __name__ == '__main__':
    -     lex.runmain()
    -
    -
    - -Please refer to the "Debugging" section near the end for some more advanced details -of debugging. - -

    4.14 Alternative specification of lexers

    - - -As shown in the example, lexers are specified all within one Python module. If you want to -put token rules in a different module from the one in which you invoke lex(), use the -module keyword argument. - -

    -For example, you might have a dedicated module that just contains -the token rules: - -

    -
    -# module: tokrules.py
    -# This module just contains the lexing rules
    -
    -# List of token names.   This is always required
    -tokens = (
    -   'NUMBER',
    -   'PLUS',
    -   'MINUS',
    -   'TIMES',
    -   'DIVIDE',
    -   'LPAREN',
    -   'RPAREN',
    -)
    -
    -# Regular expression rules for simple tokens
    -t_PLUS    = r'\+'
    -t_MINUS   = r'-'
    -t_TIMES   = r'\*'
    -t_DIVIDE  = r'/'
    -t_LPAREN  = r'\('
    -t_RPAREN  = r'\)'
    -
    -# A regular expression rule with some action code
    -def t_NUMBER(t):
    -    r'\d+'
    -    t.value = int(t.value)    
    -    return t
    -
    -# Define a rule so we can track line numbers
    -def t_newline(t):
    -    r'\n+'
    -    t.lexer.lineno += len(t.value)
    -
    -# A string containing ignored characters (spaces and tabs)
    -t_ignore  = ' \t'
    -
    -# Error handling rule
    -def t_error(t):
    -    print "Illegal character '%s'" % t.value[0]
    -    t.lexer.skip(1)
    -
    -
    - -Now, if you wanted to build a tokenizer from these rules from within a different module, you would do the following (shown for Python interactive mode): - -
    -
    ->>> import tokrules
    ->>> lexer = lex.lex(module=tokrules)
    ->>> lexer.input("3 + 4")
    ->>> lexer.token()
    -LexToken(NUMBER,3,1,1,0)
    ->>> lexer.token()
    -LexToken(PLUS,'+',1,2)
    ->>> lexer.token()
    -LexToken(NUMBER,4,1,4)
    ->>> lexer.token()
    -None
    ->>>
    -
    -
    - -The module option can also be used to define lexers from instances of a class. For example: - -
    -
    -import ply.lex as lex
    -
    -class MyLexer:
    -    # List of token names.   This is always required
    -    tokens = (
    -       'NUMBER',
    -       'PLUS',
    -       'MINUS',
    -       'TIMES',
    -       'DIVIDE',
    -       'LPAREN',
    -       'RPAREN',
    -    )
    -
    -    # Regular expression rules for simple tokens
    -    t_PLUS    = r'\+'
    -    t_MINUS   = r'-'
    -    t_TIMES   = r'\*'
    -    t_DIVIDE  = r'/'
    -    t_LPAREN  = r'\('
    -    t_RPAREN  = r'\)'
    -
    -    # A regular expression rule with some action code
    -    # Note addition of self parameter since we're in a class
    -    def t_NUMBER(self,t):
    -        r'\d+'
    -        t.value = int(t.value)    
    -        return t
    -
    -    # Define a rule so we can track line numbers
    -    def t_newline(self,t):
    -        r'\n+'
    -        t.lexer.lineno += len(t.value)
    -
    -    # A string containing ignored characters (spaces and tabs)
    -    t_ignore  = ' \t'
    -
    -    # Error handling rule
    -    def t_error(self,t):
    -        print "Illegal character '%s'" % t.value[0]
    -        t.lexer.skip(1)
    -
    -    # Build the lexer
    -    def build(self,**kwargs):
    -        self.lexer = lex.lex(module=self, **kwargs)
    -    
    -    # Test it output
    -    def test(self,data):
    -        self.lexer.input(data)
    -        while True:
    -             tok = lexer.token()
    -             if not tok: break
    -             print tok
    -
    -# Build the lexer and try it out
    -m = MyLexer()
    -m.build()           # Build the lexer
    -m.test("3 + 4")     # Test it
    -
    -
    - - -When building a lexer from class, you should construct the lexer from -an instance of the class, not the class object itself. This is because -PLY only works properly if the lexer actions are defined by bound-methods. - -

    -When using the module option to lex(), PLY collects symbols -from the underlying object using the dir() function. There is no -direct access to the __dict__ attribute of the object supplied as a -module value. - -

    -Finally, if you want to keep things nicely encapsulated, but don't want to use a -full-fledged class definition, lexers can be defined using closures. For example: - -

    -
    -import ply.lex as lex
    -
    -# List of token names.   This is always required
    -tokens = (
    -  'NUMBER',
    -  'PLUS',
    -  'MINUS',
    -  'TIMES',
    -  'DIVIDE',
    -  'LPAREN',
    -  'RPAREN',
    -)
    -
    -def MyLexer():
    -    # Regular expression rules for simple tokens
    -    t_PLUS    = r'\+'
    -    t_MINUS   = r'-'
    -    t_TIMES   = r'\*'
    -    t_DIVIDE  = r'/'
    -    t_LPAREN  = r'\('
    -    t_RPAREN  = r'\)'
    -
    -    # A regular expression rule with some action code
    -    def t_NUMBER(t):
    -        r'\d+'
    -        t.value = int(t.value)    
    -        return t
    -
    -    # Define a rule so we can track line numbers
    -    def t_newline(t):
    -        r'\n+'
    -        t.lexer.lineno += len(t.value)
    -
    -    # A string containing ignored characters (spaces and tabs)
    -    t_ignore  = ' \t'
    -
    -    # Error handling rule
    -    def t_error(t):
    -        print "Illegal character '%s'" % t.value[0]
    -        t.lexer.skip(1)
    -
    -    # Build the lexer from my environment and return it    
    -    return lex.lex()
    -
    -
    - - -

    4.15 Maintaining state

    - - -In your lexer, you may want to maintain a variety of state -information. This might include mode settings, symbol tables, and -other details. As an example, suppose that you wanted to keep -track of how many NUMBER tokens had been encountered. - -

    -One way to do this is to keep a set of global variables in the module -where you created the lexer. For example: - -

    -
    -num_count = 0
    -def t_NUMBER(t):
    -    r'\d+'
    -    global num_count
    -    num_count += 1
    -    t.value = int(t.value)    
    -    return t
    -
    -
    - -If you don't like the use of a global variable, another place to store -information is inside the Lexer object created by lex(). -To this, you can use the lexer attribute of tokens passed to -the various rules. For example: - -
    -
    -def t_NUMBER(t):
    -    r'\d+'
    -    t.lexer.num_count += 1     # Note use of lexer attribute
    -    t.value = int(t.value)    
    -    return t
    -
    -lexer = lex.lex()
    -lexer.num_count = 0            # Set the initial count
    -
    -
    - -This latter approach has the advantage of being simple and working -correctly in applications where multiple instantiations of a given -lexer exist in the same application. However, this might also feel -like a gross violation of encapsulation to OO purists. -Just to put your mind at some ease, all -internal attributes of the lexer (with the exception of lineno) have names that are prefixed -by lex (e.g., lexdata,lexpos, etc.). Thus, -it is perfectly safe to store attributes in the lexer that -don't have names starting with that prefix or a name that conlicts with one of the -predefined methods (e.g., input(), token(), etc.). - -

    -If you don't like assigning values on the lexer object, you can define your lexer as a class as -shown in the previous section: - -

    -
    -class MyLexer:
    -    ...
    -    def t_NUMBER(self,t):
    -        r'\d+'
    -        self.num_count += 1
    -        t.value = int(t.value)    
    -        return t
    -
    -    def build(self, **kwargs):
    -        self.lexer = lex.lex(object=self,**kwargs)
    -
    -    def __init__(self):
    -        self.num_count = 0
    -
    -
    - -The class approach may be the easiest to manage if your application is -going to be creating multiple instances of the same lexer and you need -to manage a lot of state. - -

    -State can also be managed through closures. For example, in Python 3: - -

    -
    -def MyLexer():
    -    num_count = 0
    -    ...
    -    def t_NUMBER(t):
    -        r'\d+'
    -        nonlocal num_count
    -        num_count += 1
    -        t.value = int(t.value)    
    -        return t
    -    ...
    -
    -
    - -

    4.16 Lexer cloning

    - - -

    -If necessary, a lexer object can be duplicated by invoking its clone() method. For example: - -

    -
    -lexer = lex.lex()
    -...
    -newlexer = lexer.clone()
    -
    -
    - -When a lexer is cloned, the copy is exactly identical to the original lexer -including any input text and internal state. However, the clone allows a -different set of input text to be supplied which may be processed separately. -This may be useful in situations when you are writing a parser/compiler that -involves recursive or reentrant processing. For instance, if you -needed to scan ahead in the input for some reason, you could create a -clone and use it to look ahead. Or, if you were implementing some kind of preprocessor, -cloned lexers could be used to handle different input files. - -

    -Creating a clone is different than calling lex.lex() in that -PLY doesn't regenerate any of the internal tables or regular expressions. So, - -

    -Special considerations need to be made when cloning lexers that also -maintain their own internal state using classes or closures. Namely, -you need to be aware that the newly created lexers will share all of -this state with the original lexer. For example, if you defined a -lexer as a class and did this: - -

    -
    -m = MyLexer()
    -a = lex.lex(object=m)      # Create a lexer
    -
    -b = a.clone()              # Clone the lexer
    -
    -
    - -Then both a and b are going to be bound to the same -object m and any changes to m will be reflected in both lexers. It's -important to emphasize that clone() is only meant to create a new lexer -that reuses the regular expressions and environment of another lexer. If you -need to make a totally new copy of a lexer, then call lex() again. - -

    4.17 Internal lexer state

    - - -A Lexer object lexer has a number of internal attributes that may be useful in certain -situations. - -

    -lexer.lexpos -

    -This attribute is an integer that contains the current position within the input text. If you modify -the value, it will change the result of the next call to token(). Within token rule functions, this points -to the first character after the matched text. If the value is modified within a rule, the next returned token will be -matched at the new position. -
    - -

    -lexer.lineno -

    -The current value of the line number attribute stored in the lexer. PLY only specifies that the attribute -exists---it never sets, updates, or performs any processing with it. If you want to track line numbers, -you will need to add code yourself (see the section on line numbers and positional information). -
    - -

    -lexer.lexdata -

    -The current input text stored in the lexer. This is the string passed with the input() method. It -would probably be a bad idea to modify this unless you really know what you're doing. -
    - -

    -lexer.lexmatch -

    -This is the raw Match object returned by the Python re.match() function (used internally by PLY) for the -current token. If you have written a regular expression that contains named groups, you can use this to retrieve those values. -Note: This attribute is only updated when tokens are defined and processed by functions. -
    - -

    4.18 Conditional lexing and start conditions

    - - -In advanced parsing applications, it may be useful to have different -lexing states. For instance, you may want the occurrence of a certain -token or syntactic construct to trigger a different kind of lexing. -PLY supports a feature that allows the underlying lexer to be put into -a series of different states. Each state can have its own tokens, -lexing rules, and so forth. The implementation is based largely on -the "start condition" feature of GNU flex. Details of this can be found -at http://www.gnu.org/software/flex/manual/html_chapter/flex_11.html.. - -

    -To define a new lexing state, it must first be declared. This is done by including a "states" declaration in your -lex file. For example: - -

    -
    -states = (
    -   ('foo','exclusive'),
    -   ('bar','inclusive'),
    -)
    -
    -
    - -This declaration declares two states, 'foo' -and 'bar'. States may be of two types; 'exclusive' -and 'inclusive'. An exclusive state completely overrides the -default behavior of the lexer. That is, lex will only return tokens -and apply rules defined specifically for that state. An inclusive -state adds additional tokens and rules to the default set of rules. -Thus, lex will return both the tokens defined by default in addition -to those defined for the inclusive state. - -

    -Once a state has been declared, tokens and rules are declared by including the -state name in token/rule declaration. For example: - -

    -
    -t_foo_NUMBER = r'\d+'                      # Token 'NUMBER' in state 'foo'        
    -t_bar_ID     = r'[a-zA-Z_][a-zA-Z0-9_]*'   # Token 'ID' in state 'bar'
    -
    -def t_foo_newline(t):
    -    r'\n'
    -    t.lexer.lineno += 1
    -
    -
    - -A token can be declared in multiple states by including multiple state names in the declaration. For example: - -
    -
    -t_foo_bar_NUMBER = r'\d+'         # Defines token 'NUMBER' in both state 'foo' and 'bar'
    -
    -
    - -Alternative, a token can be declared in all states using the 'ANY' in the name. - -
    -
    -t_ANY_NUMBER = r'\d+'         # Defines a token 'NUMBER' in all states
    -
    -
    - -If no state name is supplied, as is normally the case, the token is associated with a special state 'INITIAL'. For example, -these two declarations are identical: - -
    -
    -t_NUMBER = r'\d+'
    -t_INITIAL_NUMBER = r'\d+'
    -
    -
    - -

    -States are also associated with the special t_ignore and t_error() declarations. For example, if a state treats -these differently, you can declare: - -

    -
    -t_foo_ignore = " \t\n"       # Ignored characters for state 'foo'
    -
    -def t_bar_error(t):          # Special error handler for state 'bar'
    -    pass 
    -
    -
    - -By default, lexing operates in the 'INITIAL' state. This state includes all of the normally defined tokens. -For users who aren't using different states, this fact is completely transparent. If, during lexing or parsing, you want to change -the lexing state, use the begin() method. For example: - -
    -
    -def t_begin_foo(t):
    -    r'start_foo'
    -    t.lexer.begin('foo')             # Starts 'foo' state
    -
    -
    - -To get out of a state, you use begin() to switch back to the initial state. For example: - -
    -
    -def t_foo_end(t):
    -    r'end_foo'
    -    t.lexer.begin('INITIAL')        # Back to the initial state
    -
    -
    - -The management of states can also be done with a stack. For example: - -
    -
    -def t_begin_foo(t):
    -    r'start_foo'
    -    t.lexer.push_state('foo')             # Starts 'foo' state
    -
    -def t_foo_end(t):
    -    r'end_foo'
    -    t.lexer.pop_state()                   # Back to the previous state
    -
    -
    - -

    -The use of a stack would be useful in situations where there are many ways of entering a new lexing state and you merely want to go back -to the previous state afterwards. - -

    -An example might help clarify. Suppose you were writing a parser and you wanted to grab sections of arbitrary C code enclosed by -curly braces. That is, whenever you encounter a starting brace '{', you want to read all of the enclosed code up to the ending brace '}' -and return it as a string. Doing this with a normal regular expression rule is nearly (if not actually) impossible. This is because braces can -be nested and can be included in comments and strings. Thus, simply matching up to the first matching '}' character isn't good enough. Here is how -you might use lexer states to do this: - -

    -
    -# Declare the state
    -states = (
    -  ('ccode','exclusive'),
    -)
    -
    -# Match the first {. Enter ccode state.
    -def t_ccode(t):
    -    r'\{'
    -    t.lexer.code_start = t.lexer.lexpos        # Record the starting position
    -    t.lexer.level = 1                          # Initial brace level
    -    t.lexer.begin('ccode')                     # Enter 'ccode' state
    -
    -# Rules for the ccode state
    -def t_ccode_lbrace(t):     
    -    r'\{'
    -    t.lexer.level +=1                
    -
    -def t_ccode_rbrace(t):
    -    r'\}'
    -    t.lexer.level -=1
    -
    -    # If closing brace, return the code fragment
    -    if t.lexer.level == 0:
    -         t.value = t.lexer.lexdata[t.lexer.code_start:t.lexer.lexpos+1]
    -         t.type = "CCODE"
    -         t.lexer.lineno += t.value.count('\n')
    -         t.lexer.begin('INITIAL')           
    -         return t
    -
    -# C or C++ comment (ignore)    
    -def t_ccode_comment(t):
    -    r'(/\*(.|\n)*?*/)|(//.*)'
    -    pass
    -
    -# C string
    -def t_ccode_string(t):
    -   r'\"([^\\\n]|(\\.))*?\"'
    -
    -# C character literal
    -def t_ccode_char(t):
    -   r'\'([^\\\n]|(\\.))*?\''
    -
    -# Any sequence of non-whitespace characters (not braces, strings)
    -def t_ccode_nonspace(t):
    -   r'[^\s\{\}\'\"]+'
    -
    -# Ignored characters (whitespace)
    -t_ccode_ignore = " \t\n"
    -
    -# For bad characters, we just skip over it
    -def t_ccode_error(t):
    -    t.lexer.skip(1)
    -
    -
    - -In this example, the occurrence of the first '{' causes the lexer to record the starting position and enter a new state 'ccode'. A collection of rules then match -various parts of the input that follow (comments, strings, etc.). All of these rules merely discard the token (by not returning a value). -However, if the closing right brace is encountered, the rule t_ccode_rbrace collects all of the code (using the earlier recorded starting -position), stores it, and returns a token 'CCODE' containing all of that text. When returning the token, the lexing state is restored back to its -initial state. - -

    4.19 Miscellaneous Issues

    - - -

    -

  • The lexer requires input to be supplied as a single input string. Since most machines have more than enough memory, this -rarely presents a performance concern. However, it means that the lexer currently can't be used with streaming data -such as open files or sockets. This limitation is primarily a side-effect of using the re module. - -

    -

  • The lexer should work properly with both Unicode strings given as token and pattern matching rules as -well as for input text. - -

    -

  • If you need to supply optional flags to the re.compile() function, use the reflags option to lex. For example: - -
    -
    -lex.lex(reflags=re.UNICODE)
    -
    -
    - -

    -

  • Since the lexer is written entirely in Python, its performance is -largely determined by that of the Python re module. Although -the lexer has been written to be as efficient as possible, it's not -blazingly fast when used on very large input files. If -performance is concern, you might consider upgrading to the most -recent version of Python, creating a hand-written lexer, or offloading -the lexer into a C extension module. - -

    -If you are going to create a hand-written lexer and you plan to use it with yacc.py, -it only needs to conform to the following requirements: - -

      -
    • It must provide a token() method that returns the next token or None if no more -tokens are available. -
    • The token() method must return an object tok that has type and value attributes. If -line number tracking is being used, then the token should also define a lineno attribute. -
    - -

    5. Parsing basics

    - - -yacc.py is used to parse language syntax. Before showing an -example, there are a few important bits of background that must be -mentioned. First, syntax is usually specified in terms of a BNF grammar. -For example, if you wanted to parse -simple arithmetic expressions, you might first write an unambiguous -grammar specification like this: - -
    -
     
    -expression : expression + term
    -           | expression - term
    -           | term
    -
    -term       : term * factor
    -           | term / factor
    -           | factor
    -
    -factor     : NUMBER
    -           | ( expression )
    -
    -
    - -In the grammar, symbols such as NUMBER, +, -, *, and / are known -as terminals and correspond to raw input tokens. Identifiers such as term and factor refer to -grammar rules comprised of a collection of terminals and other rules. These identifiers are known as non-terminals. -

    - -The semantic behavior of a language is often specified using a -technique known as syntax directed translation. In syntax directed -translation, attributes are attached to each symbol in a given grammar -rule along with an action. Whenever a particular grammar rule is -recognized, the action describes what to do. For example, given the -expression grammar above, you might write the specification for a -simple calculator like this: - -

    -
     
    -Grammar                             Action
    ---------------------------------    -------------------------------------------- 
    -expression0 : expression1 + term    expression0.val = expression1.val + term.val
    -            | expression1 - term    expression0.val = expression1.val - term.val
    -            | term                  expression0.val = term.val
    -
    -term0       : term1 * factor        term0.val = term1.val * factor.val
    -            | term1 / factor        term0.val = term1.val / factor.val
    -            | factor                term0.val = factor.val
    -
    -factor      : NUMBER                factor.val = int(NUMBER.lexval)
    -            | ( expression )        factor.val = expression.val
    -
    -
    - -A good way to think about syntax directed translation is to -view each symbol in the grammar as a kind of object. Associated -with each symbol is a value representing its "state" (for example, the -val attribute above). Semantic -actions are then expressed as a collection of functions or methods -that operate on the symbols and associated values. - -

    -Yacc uses a parsing technique known as LR-parsing or shift-reduce parsing. LR parsing is a -bottom up technique that tries to recognize the right-hand-side of various grammar rules. -Whenever a valid right-hand-side is found in the input, the appropriate action code is triggered and the -grammar symbols are replaced by the grammar symbol on the left-hand-side. - -

    -LR parsing is commonly implemented by shifting grammar symbols onto a -stack and looking at the stack and the next input token for patterns that -match one of the grammar rules. -The details of the algorithm can be found in a compiler textbook, but the -following example illustrates the steps that are performed if you -wanted to parse the expression -3 + 5 * (10 - 20) using the grammar defined above. In the example, -the special symbol $ represents the end of input. - - -

    -
    -Step Symbol Stack           Input Tokens            Action
    ----- ---------------------  ---------------------   -------------------------------
    -1                           3 + 5 * ( 10 - 20 )$    Shift 3
    -2    3                        + 5 * ( 10 - 20 )$    Reduce factor : NUMBER
    -3    factor                   + 5 * ( 10 - 20 )$    Reduce term   : factor
    -4    term                     + 5 * ( 10 - 20 )$    Reduce expr : term
    -5    expr                     + 5 * ( 10 - 20 )$    Shift +
    -6    expr +                     5 * ( 10 - 20 )$    Shift 5
    -7    expr + 5                     * ( 10 - 20 )$    Reduce factor : NUMBER
    -8    expr + factor                * ( 10 - 20 )$    Reduce term   : factor
    -9    expr + term                  * ( 10 - 20 )$    Shift *
    -10   expr + term *                  ( 10 - 20 )$    Shift (
    -11   expr + term * (                  10 - 20 )$    Shift 10
    -12   expr + term * ( 10                  - 20 )$    Reduce factor : NUMBER
    -13   expr + term * ( factor              - 20 )$    Reduce term : factor
    -14   expr + term * ( term                - 20 )$    Reduce expr : term
    -15   expr + term * ( expr                - 20 )$    Shift -
    -16   expr + term * ( expr -                20 )$    Shift 20
    -17   expr + term * ( expr - 20                )$    Reduce factor : NUMBER
    -18   expr + term * ( expr - factor            )$    Reduce term : factor
    -19   expr + term * ( expr - term              )$    Reduce expr : expr - term
    -20   expr + term * ( expr                     )$    Shift )
    -21   expr + term * ( expr )                    $    Reduce factor : (expr)
    -22   expr + term * factor                      $    Reduce term : term * factor
    -23   expr + term                               $    Reduce expr : expr + term
    -24   expr                                      $    Reduce expr
    -25                                             $    Success!
    -
    -
    - -When parsing the expression, an underlying state machine and the -current input token determine what happens next. If the next token -looks like part of a valid grammar rule (based on other items on the -stack), it is generally shifted onto the stack. If the top of the -stack contains a valid right-hand-side of a grammar rule, it is -usually "reduced" and the symbols replaced with the symbol on the -left-hand-side. When this reduction occurs, the appropriate action is -triggered (if defined). If the input token can't be shifted and the -top of stack doesn't match any grammar rules, a syntax error has -occurred and the parser must take some kind of recovery step (or bail -out). A parse is only successful if the parser reaches a state where -the symbol stack is empty and there are no more input tokens. - -

    -It is important to note that the underlying implementation is built -around a large finite-state machine that is encoded in a collection of -tables. The construction of these tables is non-trivial and -beyond the scope of this discussion. However, subtle details of this -process explain why, in the example above, the parser chooses to shift -a token onto the stack in step 9 rather than reducing the -rule expr : expr + term. - -

    6. Yacc

    - - -The ply.yacc module implements the parsing component of PLY. -The name "yacc" stands for "Yet Another Compiler Compiler" and is -borrowed from the Unix tool of the same name. - -

    6.1 An example

    - - -Suppose you wanted to make a grammar for simple arithmetic expressions as previously described. Here is -how you would do it with yacc.py: - -
    -
    -# Yacc example
    -
    -import ply.yacc as yacc
    -
    -# Get the token map from the lexer.  This is required.
    -from calclex import tokens
    -
    -def p_expression_plus(p):
    -    'expression : expression PLUS term'
    -    p[0] = p[1] + p[3]
    -
    -def p_expression_minus(p):
    -    'expression : expression MINUS term'
    -    p[0] = p[1] - p[3]
    -
    -def p_expression_term(p):
    -    'expression : term'
    -    p[0] = p[1]
    -
    -def p_term_times(p):
    -    'term : term TIMES factor'
    -    p[0] = p[1] * p[3]
    -
    -def p_term_div(p):
    -    'term : term DIVIDE factor'
    -    p[0] = p[1] / p[3]
    -
    -def p_term_factor(p):
    -    'term : factor'
    -    p[0] = p[1]
    -
    -def p_factor_num(p):
    -    'factor : NUMBER'
    -    p[0] = p[1]
    -
    -def p_factor_expr(p):
    -    'factor : LPAREN expression RPAREN'
    -    p[0] = p[2]
    -
    -# Error rule for syntax errors
    -def p_error(p):
    -    print "Syntax error in input!"
    -
    -# Build the parser
    -parser = yacc.yacc()
    -
    -while True:
    -   try:
    -       s = raw_input('calc > ')
    -   except EOFError:
    -       break
    -   if not s: continue
    -   result = parser.parse(s)
    -   print result
    -
    -
    - -In this example, each grammar rule is defined by a Python function -where the docstring to that function contains the appropriate -context-free grammar specification. The statements that make up the -function body implement the semantic actions of the rule. Each function -accepts a single argument p that is a sequence containing the -values of each grammar symbol in the corresponding rule. The values -of p[i] are mapped to grammar symbols as shown here: - -
    -
    -def p_expression_plus(p):
    -    'expression : expression PLUS term'
    -    #   ^            ^        ^    ^
    -    #  p[0]         p[1]     p[2] p[3]
    -
    -    p[0] = p[1] + p[3]
    -
    -
    - -

    -For tokens, the "value" of the corresponding p[i] is the -same as the p.value attribute assigned in the lexer -module. For non-terminals, the value is determined by whatever is -placed in p[0] when rules are reduced. This value can be -anything at all. However, it probably most common for the value to be -a simple Python type, a tuple, or an instance. In this example, we -are relying on the fact that the NUMBER token stores an -integer value in its value field. All of the other rules simply -perform various types of integer operations and propagate the result. -

    - -

    -Note: The use of negative indices have a special meaning in -yacc---specially p[-1] does not have the same value -as p[3] in this example. Please see the section on "Embedded -Actions" for further details. -

    - -

    -The first rule defined in the yacc specification determines the -starting grammar symbol (in this case, a rule for expression -appears first). Whenever the starting rule is reduced by the parser -and no more input is available, parsing stops and the final value is -returned (this value will be whatever the top-most rule placed -in p[0]). Note: an alternative starting symbol can be -specified using the start keyword argument to -yacc(). - -

    The p_error(p) rule is defined to catch syntax errors. -See the error handling section below for more detail. - -

    -To build the parser, call the yacc.yacc() function. This -function looks at the module and attempts to construct all of the LR -parsing tables for the grammar you have specified. The first -time yacc.yacc() is invoked, you will get a message such as -this: - -

    -
    -$ python calcparse.py
    -Generating LALR tables
    -calc > 
    -
    -
    - -Since table construction is relatively expensive (especially for large -grammars), the resulting parsing table is written to the current -directory in a file called parsetab.py. In addition, a -debugging file called parser.out is created. On subsequent -executions, yacc will reload the table from -parsetab.py unless it has detected a change in the underlying -grammar (in which case the tables and parsetab.py file are -regenerated). Note: The names of parser output files can be changed -if necessary. See the PLY Reference for details. - -

    -If any errors are detected in your grammar specification, yacc.py will produce -diagnostic messages and possibly raise an exception. Some of the errors that can be detected include: - -

      -
    • Duplicated function names (if more than one rule function have the same name in the grammar file). -
    • Shift/reduce and reduce/reduce conflicts generated by ambiguous grammars. -
    • Badly specified grammar rules. -
    • Infinite recursion (rules that can never terminate). -
    • Unused rules and tokens -
    • Undefined rules and tokens -
    - -The next few sections discuss grammar specification in more detail. - -

    -The final part of the example shows how to actually run the parser -created by -yacc(). To run the parser, you simply have to call -the parse() with a string of input text. This will run all -of the grammar rules and return the result of the entire parse. This -result return is the value assigned to p[0] in the starting -grammar rule. - -

    6.2 Combining Grammar Rule Functions

    - - -When grammar rules are similar, they can be combined into a single function. -For example, consider the two rules in our earlier example: - -
    -
    -def p_expression_plus(p):
    -    'expression : expression PLUS term'
    -    p[0] = p[1] + p[3]
    -
    -def p_expression_minus(t):
    -    'expression : expression MINUS term'
    -    p[0] = p[1] - p[3]
    -
    -
    - -Instead of writing two functions, you might write a single function like this: - -
    -
    -def p_expression(p):
    -    '''expression : expression PLUS term
    -                  | expression MINUS term'''
    -    if p[2] == '+':
    -        p[0] = p[1] + p[3]
    -    elif p[2] == '-':
    -        p[0] = p[1] - p[3]
    -
    -
    - -In general, the doc string for any given function can contain multiple grammar rules. So, it would -have also been legal (although possibly confusing) to write this: - -
    -
    -def p_binary_operators(p):
    -    '''expression : expression PLUS term
    -                  | expression MINUS term
    -       term       : term TIMES factor
    -                  | term DIVIDE factor'''
    -    if p[2] == '+':
    -        p[0] = p[1] + p[3]
    -    elif p[2] == '-':
    -        p[0] = p[1] - p[3]
    -    elif p[2] == '*':
    -        p[0] = p[1] * p[3]
    -    elif p[2] == '/':
    -        p[0] = p[1] / p[3]
    -
    -
    - -When combining grammar rules into a single function, it is usually a good idea for all of the rules to have -a similar structure (e.g., the same number of terms). Otherwise, the corresponding action code may be more -complicated than necessary. However, it is possible to handle simple cases using len(). For example: - -
    -
    -def p_expressions(p):
    -    '''expression : expression MINUS expression
    -                  | MINUS expression'''
    -    if (len(p) == 4):
    -        p[0] = p[1] - p[3]
    -    elif (len(p) == 3):
    -        p[0] = -p[2]
    -
    -
    - -If parsing performance is a concern, you should resist the urge to put -too much conditional processing into a single grammar rule as shown in -these examples. When you add checks to see which grammar rule is -being handled, you are actually duplicating the work that the parser -has already performed (i.e., the parser already knows exactly what rule it -matched). You can eliminate this overhead by using a -separate p_rule() function for each grammar rule. - -

    6.3 Character Literals

    - - -If desired, a grammar may contain tokens defined as single character literals. For example: - -
    -
    -def p_binary_operators(p):
    -    '''expression : expression '+' term
    -                  | expression '-' term
    -       term       : term '*' factor
    -                  | term '/' factor'''
    -    if p[2] == '+':
    -        p[0] = p[1] + p[3]
    -    elif p[2] == '-':
    -        p[0] = p[1] - p[3]
    -    elif p[2] == '*':
    -        p[0] = p[1] * p[3]
    -    elif p[2] == '/':
    -        p[0] = p[1] / p[3]
    -
    -
    - -A character literal must be enclosed in quotes such as '+'. In addition, if literals are used, they must be declared in the -corresponding lex file through the use of a special literals declaration. - -
    -
    -# Literals.  Should be placed in module given to lex()
    -literals = ['+','-','*','/' ]
    -
    -
    - -Character literals are limited to a single character. Thus, it is not legal to specify literals such as '<=' or '=='. For this, use -the normal lexing rules (e.g., define a rule such as t_EQ = r'=='). - -

    6.4 Empty Productions

    - - -yacc.py can handle empty productions by defining a rule like this: - -
    -
    -def p_empty(p):
    -    'empty :'
    -    pass
    -
    -
    - -Now to use the empty production, simply use 'empty' as a symbol. For example: - -
    -
    -def p_optitem(p):
    -    'optitem : item'
    -    '        | empty'
    -    ...
    -
    -
    - -Note: You can write empty rules anywhere by simply specifying an empty -right hand side. However, I personally find that writing an "empty" -rule and using "empty" to denote an empty production is easier to read -and more clearly states your intentions. - -

    6.5 Changing the starting symbol

    - - -Normally, the first rule found in a yacc specification defines the starting grammar rule (top level rule). To change this, simply -supply a start specifier in your file. For example: - -
    -
    -start = 'foo'
    -
    -def p_bar(p):
    -    'bar : A B'
    -
    -# This is the starting rule due to the start specifier above
    -def p_foo(p):
    -    'foo : bar X'
    -...
    -
    -
    - -The use of a start specifier may be useful during debugging -since you can use it to have yacc build a subset of a larger grammar. -For this purpose, it is also possible to specify a starting symbol as -an argument to yacc(). For example: - -
    -
    -yacc.yacc(start='foo')
    -
    -
    - -

    6.6 Dealing With Ambiguous Grammars

    - - -The expression grammar given in the earlier example has been written -in a special format to eliminate ambiguity. However, in many -situations, it is extremely difficult or awkward to write grammars in -this format. A much more natural way to express the grammar is in a -more compact form like this: - -
    -
    -expression : expression PLUS expression
    -           | expression MINUS expression
    -           | expression TIMES expression
    -           | expression DIVIDE expression
    -           | LPAREN expression RPAREN
    -           | NUMBER
    -
    -
    - -Unfortunately, this grammar specification is ambiguous. For example, -if you are parsing the string "3 * 4 + 5", there is no way to tell how -the operators are supposed to be grouped. For example, does the -expression mean "(3 * 4) + 5" or is it "3 * (4+5)"? - -

    -When an ambiguous grammar is given to yacc.py it will print -messages about "shift/reduce conflicts" or "reduce/reduce conflicts". -A shift/reduce conflict is caused when the parser generator can't -decide whether or not to reduce a rule or shift a symbol on the -parsing stack. For example, consider the string "3 * 4 + 5" and the -internal parsing stack: - -

    -
    -Step Symbol Stack           Input Tokens            Action
    ----- ---------------------  ---------------------   -------------------------------
    -1    $                                3 * 4 + 5$    Shift 3
    -2    $ 3                                * 4 + 5$    Reduce : expression : NUMBER
    -3    $ expr                             * 4 + 5$    Shift *
    -4    $ expr *                             4 + 5$    Shift 4
    -5    $ expr * 4                             + 5$    Reduce: expression : NUMBER
    -6    $ expr * expr                          + 5$    SHIFT/REDUCE CONFLICT ????
    -
    -
    - -In this case, when the parser reaches step 6, it has two options. One -is to reduce the rule expr : expr * expr on the stack. The -other option is to shift the token + on the stack. Both -options are perfectly legal from the rules of the -context-free-grammar. - -

    -By default, all shift/reduce conflicts are resolved in favor of -shifting. Therefore, in the above example, the parser will always -shift the + instead of reducing. Although this strategy -works in many cases (for example, the case of -"if-then" versus "if-then-else"), it is not enough for arithmetic expressions. In fact, -in the above example, the decision to shift + is completely -wrong---we should have reduced expr * expr since -multiplication has higher mathematical precedence than addition. - -

    To resolve ambiguity, especially in expression -grammars, yacc.py allows individual tokens to be assigned a -precedence level and associativity. This is done by adding a variable -precedence to the grammar file like this: - -

    -
    -precedence = (
    -    ('left', 'PLUS', 'MINUS'),
    -    ('left', 'TIMES', 'DIVIDE'),
    -)
    -
    -
    - -This declaration specifies that PLUS/MINUS have the -same precedence level and are left-associative and that -TIMES/DIVIDE have the same precedence and are -left-associative. Within the precedence declaration, tokens -are ordered from lowest to highest precedence. Thus, this declaration -specifies that TIMES/DIVIDE have higher precedence -than PLUS/MINUS (since they appear later in the -precedence specification). - -

    -The precedence specification works by associating a numerical -precedence level value and associativity direction to the listed -tokens. For example, in the above example you get: - -

    -
    -PLUS      : level = 1,  assoc = 'left'
    -MINUS     : level = 1,  assoc = 'left'
    -TIMES     : level = 2,  assoc = 'left'
    -DIVIDE    : level = 2,  assoc = 'left'
    -
    -
    - -These values are then used to attach a numerical precedence value and -associativity direction to each grammar rule. This is always -determined by looking at the precedence of the right-most terminal -symbol. For example: - -
    -
    -expression : expression PLUS expression                 # level = 1, left
    -           | expression MINUS expression                # level = 1, left
    -           | expression TIMES expression                # level = 2, left
    -           | expression DIVIDE expression               # level = 2, left
    -           | LPAREN expression RPAREN                   # level = None (not specified)
    -           | NUMBER                                     # level = None (not specified)
    -
    -
    - -When shift/reduce conflicts are encountered, the parser generator resolves the conflict by -looking at the precedence rules and associativity specifiers. - -

    -

      -
    1. If the current token has higher precedence than the rule on the stack, it is shifted. -
    2. If the grammar rule on the stack has higher precedence, the rule is reduced. -
    3. If the current token and the grammar rule have the same precedence, the -rule is reduced for left associativity, whereas the token is shifted for right associativity. -
    4. If nothing is known about the precedence, shift/reduce conflicts are resolved in -favor of shifting (the default). -
    - -For example, if "expression PLUS expression" has been parsed and the -next token is "TIMES", the action is going to be a shift because -"TIMES" has a higher precedence level than "PLUS". On the other hand, -if "expression TIMES expression" has been parsed and the next token is -"PLUS", the action is going to be reduce because "PLUS" has a lower -precedence than "TIMES." - -

    -When shift/reduce conflicts are resolved using the first three -techniques (with the help of precedence rules), yacc.py will -report no errors or conflicts in the grammar (although it will print -some information in the parser.out debugging file). - -

    -One problem with the precedence specifier technique is that it is -sometimes necessary to change the precedence of an operator in certain -contexts. For example, consider a unary-minus operator in "3 + 4 * --5". Mathematically, the unary minus is normally given a very high -precedence--being evaluated before the multiply. However, in our -precedence specifier, MINUS has a lower precedence than TIMES. To -deal with this, precedence rules can be given for so-called "fictitious tokens" -like this: - -

    -
    -precedence = (
    -    ('left', 'PLUS', 'MINUS'),
    -    ('left', 'TIMES', 'DIVIDE'),
    -    ('right', 'UMINUS'),            # Unary minus operator
    -)
    -
    -
    - -Now, in the grammar file, we can write our unary minus rule like this: - -
    -
    -def p_expr_uminus(p):
    -    'expression : MINUS expression %prec UMINUS'
    -    p[0] = -p[2]
    -
    -
    - -In this case, %prec UMINUS overrides the default rule precedence--setting it to that -of UMINUS in the precedence specifier. - -

    -At first, the use of UMINUS in this example may appear very confusing. -UMINUS is not an input token or a grammer rule. Instead, you should -think of it as the name of a special marker in the precedence table. When you use the %prec qualifier, you're simply -telling yacc that you want the precedence of the expression to be the same as for this special marker instead of the usual precedence. - -

    -It is also possible to specify non-associativity in the precedence table. This would -be used when you don't want operations to chain together. For example, suppose -you wanted to support comparison operators like < and > but you didn't want to allow -combinations like a < b < c. To do this, simply specify a rule like this: - -

    -
    -precedence = (
    -    ('nonassoc', 'LESSTHAN', 'GREATERTHAN'),  # Nonassociative operators
    -    ('left', 'PLUS', 'MINUS'),
    -    ('left', 'TIMES', 'DIVIDE'),
    -    ('right', 'UMINUS'),            # Unary minus operator
    -)
    -
    -
    - -

    -If you do this, the occurrence of input text such as a < b < c will result in a syntax error. However, simple -expressions such as a < b will still be fine. - -

    -Reduce/reduce conflicts are caused when there are multiple grammar -rules that can be applied to a given set of symbols. This kind of -conflict is almost always bad and is always resolved by picking the -rule that appears first in the grammar file. Reduce/reduce conflicts -are almost always caused when different sets of grammar rules somehow -generate the same set of symbols. For example: - -

    -
    -assignment :  ID EQUALS NUMBER
    -           |  ID EQUALS expression
    -           
    -expression : expression PLUS expression
    -           | expression MINUS expression
    -           | expression TIMES expression
    -           | expression DIVIDE expression
    -           | LPAREN expression RPAREN
    -           | NUMBER
    -
    -
    - -In this case, a reduce/reduce conflict exists between these two rules: - -
    -
    -assignment  : ID EQUALS NUMBER
    -expression  : NUMBER
    -
    -
    - -For example, if you wrote "a = 5", the parser can't figure out if this -is supposed to be reduced as assignment : ID EQUALS NUMBER or -whether it's supposed to reduce the 5 as an expression and then reduce -the rule assignment : ID EQUALS expression. - -

    -It should be noted that reduce/reduce conflicts are notoriously -difficult to spot simply looking at the input grammer. When a -reduce/reduce conflict occurs, yacc() will try to help by -printing a warning message such as this: - -

    -
    -WARNING: 1 reduce/reduce conflict
    -WARNING: reduce/reduce conflict in state 15 resolved using rule (assignment -> ID EQUALS NUMBER)
    -WARNING: rejected rule (expression -> NUMBER)
    -
    -
    - -This message identifies the two rules that are in conflict. However, -it may not tell you how the parser arrived at such a state. To try -and figure it out, you'll probably have to look at your grammar and -the contents of the -parser.out debugging file with an appropriately high level of -caffeination. - -

    6.7 The parser.out file

    - - -Tracking down shift/reduce and reduce/reduce conflicts is one of the finer pleasures of using an LR -parsing algorithm. To assist in debugging, yacc.py creates a debugging file called -'parser.out' when it generates the parsing table. The contents of this file look like the following: - -
    -
    -Unused terminals:
    -
    -
    -Grammar
    -
    -Rule 1     expression -> expression PLUS expression
    -Rule 2     expression -> expression MINUS expression
    -Rule 3     expression -> expression TIMES expression
    -Rule 4     expression -> expression DIVIDE expression
    -Rule 5     expression -> NUMBER
    -Rule 6     expression -> LPAREN expression RPAREN
    -
    -Terminals, with rules where they appear
    -
    -TIMES                : 3
    -error                : 
    -MINUS                : 2
    -RPAREN               : 6
    -LPAREN               : 6
    -DIVIDE               : 4
    -PLUS                 : 1
    -NUMBER               : 5
    -
    -Nonterminals, with rules where they appear
    -
    -expression           : 1 1 2 2 3 3 4 4 6 0
    -
    -
    -Parsing method: LALR
    -
    -
    -state 0
    -
    -    S' -> . expression
    -    expression -> . expression PLUS expression
    -    expression -> . expression MINUS expression
    -    expression -> . expression TIMES expression
    -    expression -> . expression DIVIDE expression
    -    expression -> . NUMBER
    -    expression -> . LPAREN expression RPAREN
    -
    -    NUMBER          shift and go to state 3
    -    LPAREN          shift and go to state 2
    -
    -
    -state 1
    -
    -    S' -> expression .
    -    expression -> expression . PLUS expression
    -    expression -> expression . MINUS expression
    -    expression -> expression . TIMES expression
    -    expression -> expression . DIVIDE expression
    -
    -    PLUS            shift and go to state 6
    -    MINUS           shift and go to state 5
    -    TIMES           shift and go to state 4
    -    DIVIDE          shift and go to state 7
    -
    -
    -state 2
    -
    -    expression -> LPAREN . expression RPAREN
    -    expression -> . expression PLUS expression
    -    expression -> . expression MINUS expression
    -    expression -> . expression TIMES expression
    -    expression -> . expression DIVIDE expression
    -    expression -> . NUMBER
    -    expression -> . LPAREN expression RPAREN
    -
    -    NUMBER          shift and go to state 3
    -    LPAREN          shift and go to state 2
    -
    -
    -state 3
    -
    -    expression -> NUMBER .
    -
    -    $               reduce using rule 5
    -    PLUS            reduce using rule 5
    -    MINUS           reduce using rule 5
    -    TIMES           reduce using rule 5
    -    DIVIDE          reduce using rule 5
    -    RPAREN          reduce using rule 5
    -
    -
    -state 4
    -
    -    expression -> expression TIMES . expression
    -    expression -> . expression PLUS expression
    -    expression -> . expression MINUS expression
    -    expression -> . expression TIMES expression
    -    expression -> . expression DIVIDE expression
    -    expression -> . NUMBER
    -    expression -> . LPAREN expression RPAREN
    -
    -    NUMBER          shift and go to state 3
    -    LPAREN          shift and go to state 2
    -
    -
    -state 5
    -
    -    expression -> expression MINUS . expression
    -    expression -> . expression PLUS expression
    -    expression -> . expression MINUS expression
    -    expression -> . expression TIMES expression
    -    expression -> . expression DIVIDE expression
    -    expression -> . NUMBER
    -    expression -> . LPAREN expression RPAREN
    -
    -    NUMBER          shift and go to state 3
    -    LPAREN          shift and go to state 2
    -
    -
    -state 6
    -
    -    expression -> expression PLUS . expression
    -    expression -> . expression PLUS expression
    -    expression -> . expression MINUS expression
    -    expression -> . expression TIMES expression
    -    expression -> . expression DIVIDE expression
    -    expression -> . NUMBER
    -    expression -> . LPAREN expression RPAREN
    -
    -    NUMBER          shift and go to state 3
    -    LPAREN          shift and go to state 2
    -
    -
    -state 7
    -
    -    expression -> expression DIVIDE . expression
    -    expression -> . expression PLUS expression
    -    expression -> . expression MINUS expression
    -    expression -> . expression TIMES expression
    -    expression -> . expression DIVIDE expression
    -    expression -> . NUMBER
    -    expression -> . LPAREN expression RPAREN
    -
    -    NUMBER          shift and go to state 3
    -    LPAREN          shift and go to state 2
    -
    -
    -state 8
    -
    -    expression -> LPAREN expression . RPAREN
    -    expression -> expression . PLUS expression
    -    expression -> expression . MINUS expression
    -    expression -> expression . TIMES expression
    -    expression -> expression . DIVIDE expression
    -
    -    RPAREN          shift and go to state 13
    -    PLUS            shift and go to state 6
    -    MINUS           shift and go to state 5
    -    TIMES           shift and go to state 4
    -    DIVIDE          shift and go to state 7
    -
    -
    -state 9
    -
    -    expression -> expression TIMES expression .
    -    expression -> expression . PLUS expression
    -    expression -> expression . MINUS expression
    -    expression -> expression . TIMES expression
    -    expression -> expression . DIVIDE expression
    -
    -    $               reduce using rule 3
    -    PLUS            reduce using rule 3
    -    MINUS           reduce using rule 3
    -    TIMES           reduce using rule 3
    -    DIVIDE          reduce using rule 3
    -    RPAREN          reduce using rule 3
    -
    -  ! PLUS            [ shift and go to state 6 ]
    -  ! MINUS           [ shift and go to state 5 ]
    -  ! TIMES           [ shift and go to state 4 ]
    -  ! DIVIDE          [ shift and go to state 7 ]
    -
    -state 10
    -
    -    expression -> expression MINUS expression .
    -    expression -> expression . PLUS expression
    -    expression -> expression . MINUS expression
    -    expression -> expression . TIMES expression
    -    expression -> expression . DIVIDE expression
    -
    -    $               reduce using rule 2
    -    PLUS            reduce using rule 2
    -    MINUS           reduce using rule 2
    -    RPAREN          reduce using rule 2
    -    TIMES           shift and go to state 4
    -    DIVIDE          shift and go to state 7
    -
    -  ! TIMES           [ reduce using rule 2 ]
    -  ! DIVIDE          [ reduce using rule 2 ]
    -  ! PLUS            [ shift and go to state 6 ]
    -  ! MINUS           [ shift and go to state 5 ]
    -
    -state 11
    -
    -    expression -> expression PLUS expression .
    -    expression -> expression . PLUS expression
    -    expression -> expression . MINUS expression
    -    expression -> expression . TIMES expression
    -    expression -> expression . DIVIDE expression
    -
    -    $               reduce using rule 1
    -    PLUS            reduce using rule 1
    -    MINUS           reduce using rule 1
    -    RPAREN          reduce using rule 1
    -    TIMES           shift and go to state 4
    -    DIVIDE          shift and go to state 7
    -
    -  ! TIMES           [ reduce using rule 1 ]
    -  ! DIVIDE          [ reduce using rule 1 ]
    -  ! PLUS            [ shift and go to state 6 ]
    -  ! MINUS           [ shift and go to state 5 ]
    -
    -state 12
    -
    -    expression -> expression DIVIDE expression .
    -    expression -> expression . PLUS expression
    -    expression -> expression . MINUS expression
    -    expression -> expression . TIMES expression
    -    expression -> expression . DIVIDE expression
    -
    -    $               reduce using rule 4
    -    PLUS            reduce using rule 4
    -    MINUS           reduce using rule 4
    -    TIMES           reduce using rule 4
    -    DIVIDE          reduce using rule 4
    -    RPAREN          reduce using rule 4
    -
    -  ! PLUS            [ shift and go to state 6 ]
    -  ! MINUS           [ shift and go to state 5 ]
    -  ! TIMES           [ shift and go to state 4 ]
    -  ! DIVIDE          [ shift and go to state 7 ]
    -
    -state 13
    -
    -    expression -> LPAREN expression RPAREN .
    -
    -    $               reduce using rule 6
    -    PLUS            reduce using rule 6
    -    MINUS           reduce using rule 6
    -    TIMES           reduce using rule 6
    -    DIVIDE          reduce using rule 6
    -    RPAREN          reduce using rule 6
    -
    -
    - -The different states that appear in this file are a representation of -every possible sequence of valid input tokens allowed by the grammar. -When receiving input tokens, the parser is building up a stack and -looking for matching rules. Each state keeps track of the grammar -rules that might be in the process of being matched at that point. Within each -rule, the "." character indicates the current location of the parse -within that rule. In addition, the actions for each valid input token -are listed. When a shift/reduce or reduce/reduce conflict arises, -rules not selected are prefixed with an !. For example: - -
    -
    -  ! TIMES           [ reduce using rule 2 ]
    -  ! DIVIDE          [ reduce using rule 2 ]
    -  ! PLUS            [ shift and go to state 6 ]
    -  ! MINUS           [ shift and go to state 5 ]
    -
    -
    - -By looking at these rules (and with a little practice), you can usually track down the source -of most parsing conflicts. It should also be stressed that not all shift-reduce conflicts are -bad. However, the only way to be sure that they are resolved correctly is to look at parser.out. - -

    6.8 Syntax Error Handling

    - - -If you are creating a parser for production use, the handling of -syntax errors is important. As a general rule, you don't want a -parser to simply throw up its hands and stop at the first sign of -trouble. Instead, you want it to report the error, recover if possible, and -continue parsing so that all of the errors in the input get reported -to the user at once. This is the standard behavior found in compilers -for languages such as C, C++, and Java. - -In PLY, when a syntax error occurs during parsing, the error is immediately -detected (i.e., the parser does not read any more tokens beyond the -source of the error). However, at this point, the parser enters a -recovery mode that can be used to try and continue further parsing. -As a general rule, error recovery in LR parsers is a delicate -topic that involves ancient rituals and black-magic. The recovery mechanism -provided by yacc.py is comparable to Unix yacc so you may want -consult a book like O'Reilly's "Lex and Yacc" for some of the finer details. - -

    -When a syntax error occurs, yacc.py performs the following steps: - -

      -
    1. On the first occurrence of an error, the user-defined p_error() function -is called with the offending token as an argument. However, if the syntax error is due to -reaching the end-of-file, p_error() is called with an argument of None. -Afterwards, the parser enters -an "error-recovery" mode in which it will not make future calls to p_error() until it -has successfully shifted at least 3 tokens onto the parsing stack. - -

      -

    2. If no recovery action is taken in p_error(), the offending lookahead token is replaced -with a special error token. - -

      -

    3. If the offending lookahead token is already set to error, the top item of the parsing stack is -deleted. - -

      -

    4. If the entire parsing stack is unwound, the parser enters a restart state and attempts to start -parsing from its initial state. - -

      -

    5. If a grammar rule accepts error as a token, it will be -shifted onto the parsing stack. - -

      -

    6. If the top item of the parsing stack is error, lookahead tokens will be discarded until the -parser can successfully shift a new symbol or reduce a rule involving error. -
    - -

    6.8.1 Recovery and resynchronization with error rules

    - - -The most well-behaved approach for handling syntax errors is to write grammar rules that include the error -token. For example, suppose your language had a grammar rule for a print statement like this: - -
    -
    -def p_statement_print(p):
    -     'statement : PRINT expr SEMI'
    -     ...
    -
    -
    - -To account for the possibility of a bad expression, you might write an additional grammar rule like this: - -
    -
    -def p_statement_print_error(p):
    -     'statement : PRINT error SEMI'
    -     print "Syntax error in print statement. Bad expression"
    -
    -
    -
    - -In this case, the error token will match any sequence of -tokens that might appear up to the first semicolon that is -encountered. Once the semicolon is reached, the rule will be -invoked and the error token will go away. - -

    -This type of recovery is sometimes known as parser resynchronization. -The error token acts as a wildcard for any bad input text and -the token immediately following error acts as a -synchronization token. - -

    -It is important to note that the error token usually does not appear as the last token -on the right in an error rule. For example: - -

    -
    -def p_statement_print_error(p):
    -    'statement : PRINT error'
    -    print "Syntax error in print statement. Bad expression"
    -
    -
    - -This is because the first bad token encountered will cause the rule to -be reduced--which may make it difficult to recover if more bad tokens -immediately follow. - -

    6.8.2 Panic mode recovery

    - - -An alternative error recovery scheme is to enter a panic mode recovery in which tokens are -discarded to a point where the parser might be able to recover in some sensible manner. - -

    -Panic mode recovery is implemented entirely in the p_error() function. For example, this -function starts discarding tokens until it reaches a closing '}'. Then, it restarts the -parser in its initial state. - -

    -
    -def p_error(p):
    -    print "Whoa. You are seriously hosed."
    -    # Read ahead looking for a closing '}'
    -    while 1:
    -        tok = yacc.token()             # Get the next token
    -        if not tok or tok.type == 'RBRACE': break
    -    yacc.restart()
    -
    -
    - -

    -This function simply discards the bad token and tells the parser that the error was ok. - -

    -
    -def p_error(p):
    -    print "Syntax error at token", p.type
    -    # Just discard the token and tell the parser it's okay.
    -    yacc.errok()
    -
    -
    - -

    -Within the p_error() function, three functions are available to control the behavior -of the parser: -

    -

      -
    • yacc.errok(). This resets the parser state so it doesn't think it's in error-recovery -mode. This will prevent an error token from being generated and will reset the internal -error counters so that the next syntax error will call p_error() again. - -

      -

    • yacc.token(). This returns the next token on the input stream. - -

      -

    • yacc.restart(). This discards the entire parsing stack and resets the parser -to its initial state. -
    - -Note: these functions are only available when invoking p_error() and are not available -at any other time. - -

    -To supply the next lookahead token to the parser, p_error() can return a token. This might be -useful if trying to synchronize on special characters. For example: - -

    -
    -def p_error(p):
    -    # Read ahead looking for a terminating ";"
    -    while 1:
    -        tok = yacc.token()             # Get the next token
    -        if not tok or tok.type == 'SEMI': break
    -    yacc.errok()
    -
    -    # Return SEMI to the parser as the next lookahead token
    -    return tok  
    -
    -
    - -

    6.8.3 Signaling an error from a production

    - - -If necessary, a production rule can manually force the parser to enter error recovery. This -is done by raising the SyntaxError exception like this: - -
    -
    -def p_production(p):
    -    'production : some production ...'
    -    raise SyntaxError
    -
    -
    - -The effect of raising SyntaxError is the same as if the last symbol shifted onto the -parsing stack was actually a syntax error. Thus, when you do this, the last symbol shifted is popped off -of the parsing stack and the current lookahead token is set to an error token. The parser -then enters error-recovery mode where it tries to reduce rules that can accept error tokens. -The steps that follow from this point are exactly the same as if a syntax error were detected and -p_error() were called. - -

    -One important aspect of manually setting an error is that the p_error() function will NOT be -called in this case. If you need to issue an error message, make sure you do it in the production that -raises SyntaxError. - -

    -Note: This feature of PLY is meant to mimic the behavior of the YYERROR macro in yacc. - - -

    6.8.4 General comments on error handling

    - - -For normal types of languages, error recovery with error rules and resynchronization characters is probably the most reliable -technique. This is because you can instrument the grammar to catch errors at selected places where it is relatively easy -to recover and continue parsing. Panic mode recovery is really only useful in certain specialized applications where you might want -to discard huge portions of the input text to find a valid restart point. - -

    6.9 Line Number and Position Tracking

    - - -Position tracking is often a tricky problem when writing compilers. -By default, PLY tracks the line number and position of all tokens. -This information is available using the following functions: - -
      -
    • p.lineno(num). Return the line number for symbol num -
    • p.lexpos(num). Return the lexing position for symbol num -
    - -For example: - -
    -
    -def p_expression(p):
    -    'expression : expression PLUS expression'
    -    line   = p.lineno(2)        # line number of the PLUS token
    -    index  = p.lexpos(2)        # Position of the PLUS token
    -
    -
    - -As an optional feature, yacc.py can automatically track line -numbers and positions for all of the grammar symbols as well. -However, this extra tracking requires extra processing and can -significantly slow down parsing. Therefore, it must be enabled by -passing the -tracking=True option to yacc.parse(). For example: - -
    -
    -yacc.parse(data,tracking=True)
    -
    -
    - -Once enabled, the lineno() and lexpos() methods work -for all grammar symbols. In addition, two additional methods can be -used: - -
      -
    • p.linespan(num). Return a tuple (startline,endline) with the starting and ending line number for symbol num. -
    • p.lexspan(num). Return a tuple (start,end) with the starting and ending positions for symbol num. -
    - -For example: - -
    -
    -def p_expression(p):
    -    'expression : expression PLUS expression'
    -    p.lineno(1)        # Line number of the left expression
    -    p.lineno(2)        # line number of the PLUS operator
    -    p.lineno(3)        # line number of the right expression
    -    ...
    -    start,end = p.linespan(3)    # Start,end lines of the right expression
    -    starti,endi = p.lexspan(3)   # Start,end positions of right expression
    -
    -
    -
    - -Note: The lexspan() function only returns the range of values up to the start of the last grammar symbol. - -

    -Although it may be convenient for PLY to track position information on -all grammar symbols, this is often unnecessary. For example, if you -are merely using line number information in an error message, you can -often just key off of a specific token in the grammar rule. For -example: - -

    -
    -def p_bad_func(p):
    -    'funccall : fname LPAREN error RPAREN'
    -    # Line number reported from LPAREN token
    -    print "Bad function call at line", p.lineno(2)
    -
    -
    - -

    -Similarly, you may get better parsing performance if you only -selectively propagate line number information where it's needed using -the p.set_lineno() method. For example: - -

    -
    -def p_fname(p):
    -    'fname : ID'
    -    p[0] = p[1]
    -    p.set_lineno(0,p.lineno(1))
    -
    -
    - -PLY doesn't retain line number information from rules that have already been -parsed. If you are building an abstract syntax tree and need to have line numbers, -you should make sure that the line numbers appear in the tree itself. - -

    6.10 AST Construction

    - - -yacc.py provides no special functions for constructing an -abstract syntax tree. However, such construction is easy enough to do -on your own. - -

    A minimal way to construct a tree is to simply create and -propagate a tuple or list in each grammar rule function. There -are many possible ways to do this, but one example would be something -like this: - -

    -
    -def p_expression_binop(p):
    -    '''expression : expression PLUS expression
    -                  | expression MINUS expression
    -                  | expression TIMES expression
    -                  | expression DIVIDE expression'''
    -
    -    p[0] = ('binary-expression',p[2],p[1],p[3])
    -
    -def p_expression_group(p):
    -    'expression : LPAREN expression RPAREN'
    -    p[0] = ('group-expression',p[2])
    -
    -def p_expression_number(p):
    -    'expression : NUMBER'
    -    p[0] = ('number-expression',p[1])
    -
    -
    - -

    -Another approach is to create a set of data structure for different -kinds of abstract syntax tree nodes and assign nodes to p[0] -in each rule. For example: - -

    -
    -class Expr: pass
    -
    -class BinOp(Expr):
    -    def __init__(self,left,op,right):
    -        self.type = "binop"
    -        self.left = left
    -        self.right = right
    -        self.op = op
    -
    -class Number(Expr):
    -    def __init__(self,value):
    -        self.type = "number"
    -        self.value = value
    -
    -def p_expression_binop(p):
    -    '''expression : expression PLUS expression
    -                  | expression MINUS expression
    -                  | expression TIMES expression
    -                  | expression DIVIDE expression'''
    -
    -    p[0] = BinOp(p[1],p[2],p[3])
    -
    -def p_expression_group(p):
    -    'expression : LPAREN expression RPAREN'
    -    p[0] = p[2]
    -
    -def p_expression_number(p):
    -    'expression : NUMBER'
    -    p[0] = Number(p[1])
    -
    -
    - -The advantage to this approach is that it may make it easier to attach more complicated -semantics, type checking, code generation, and other features to the node classes. - -

    -To simplify tree traversal, it may make sense to pick a very generic -tree structure for your parse tree nodes. For example: - -

    -
    -class Node:
    -    def __init__(self,type,children=None,leaf=None):
    -         self.type = type
    -         if children:
    -              self.children = children
    -         else:
    -              self.children = [ ]
    -         self.leaf = leaf
    -	 
    -def p_expression_binop(p):
    -    '''expression : expression PLUS expression
    -                  | expression MINUS expression
    -                  | expression TIMES expression
    -                  | expression DIVIDE expression'''
    -
    -    p[0] = Node("binop", [p[1],p[3]], p[2])
    -
    -
    - -

    6.11 Embedded Actions

    - - -The parsing technique used by yacc only allows actions to be executed at the end of a rule. For example, -suppose you have a rule like this: - -
    -
    -def p_foo(p):
    -    "foo : A B C D"
    -    print "Parsed a foo", p[1],p[2],p[3],p[4]
    -
    -
    - -

    -In this case, the supplied action code only executes after all of the -symbols A, B, C, and D have been -parsed. Sometimes, however, it is useful to execute small code -fragments during intermediate stages of parsing. For example, suppose -you wanted to perform some action immediately after A has -been parsed. To do this, write an empty rule like this: - -

    -
    -def p_foo(p):
    -    "foo : A seen_A B C D"
    -    print "Parsed a foo", p[1],p[3],p[4],p[5]
    -    print "seen_A returned", p[2]
    -
    -def p_seen_A(p):
    -    "seen_A :"
    -    print "Saw an A = ", p[-1]   # Access grammar symbol to left
    -    p[0] = some_value            # Assign value to seen_A
    -
    -
    -
    - -

    -In this example, the empty seen_A rule executes immediately -after A is shifted onto the parsing stack. Within this -rule, p[-1] refers to the symbol on the stack that appears -immediately to the left of the seen_A symbol. In this case, -it would be the value of A in the foo rule -immediately above. Like other rules, a value can be returned from an -embedded action by simply assigning it to p[0] - -

    -The use of embedded actions can sometimes introduce extra shift/reduce conflicts. For example, -this grammar has no conflicts: - -

    -
    -def p_foo(p):
    -    """foo : abcd
    -           | abcx"""
    -
    -def p_abcd(p):
    -    "abcd : A B C D"
    -
    -def p_abcx(p):
    -    "abcx : A B C X"
    -
    -
    - -However, if you insert an embedded action into one of the rules like this, - -
    -
    -def p_foo(p):
    -    """foo : abcd
    -           | abcx"""
    -
    -def p_abcd(p):
    -    "abcd : A B C D"
    -
    -def p_abcx(p):
    -    "abcx : A B seen_AB C X"
    -
    -def p_seen_AB(p):
    -    "seen_AB :"
    -
    -
    - -an extra shift-reduce conflict will be introduced. This conflict is -caused by the fact that the same symbol C appears next in -both the abcd and abcx rules. The parser can either -shift the symbol (abcd rule) or reduce the empty -rule seen_AB (abcx rule). - -

    -A common use of embedded rules is to control other aspects of parsing -such as scoping of local variables. For example, if you were parsing C code, you might -write code like this: - -

    -
    -def p_statements_block(p):
    -    "statements: LBRACE new_scope statements RBRACE"""
    -    # Action code
    -    ...
    -    pop_scope()        # Return to previous scope
    -
    -def p_new_scope(p):
    -    "new_scope :"
    -    # Create a new scope for local variables
    -    s = new_scope()
    -    push_scope(s)
    -    ...
    -
    -
    - -In this case, the embedded action new_scope executes -immediately after a LBRACE ({) symbol is parsed. -This might adjust internal symbol tables and other aspects of the -parser. Upon completion of the rule statements_block, code -might undo the operations performed in the embedded action -(e.g., pop_scope()). - -

    6.12 Miscellaneous Yacc Notes

    - - -
      -
    • The default parsing method is LALR. To use SLR instead, run yacc() as follows: - -
      -
      -yacc.yacc(method="SLR")
      -
      -
      -Note: LALR table generation takes approximately twice as long as SLR table generation. There is no -difference in actual parsing performance---the same code is used in both cases. LALR is preferred when working -with more complicated grammars since it is more powerful. - -

      - -

    • By default, yacc.py relies on lex.py for tokenizing. However, an alternative tokenizer -can be supplied as follows: - -
      -
      -yacc.parse(lexer=x)
      -
      -
      -in this case, x must be a Lexer object that minimally has a x.token() method for retrieving the next -token. If an input string is given to yacc.parse(), the lexer must also have an x.input() method. - -

      -

    • By default, the yacc generates tables in debugging mode (which produces the parser.out file and other output). -To disable this, use - -
      -
      -yacc.yacc(debug=0)
      -
      -
      - -

      -

    • To change the name of the parsetab.py file, use: - -
      -
      -yacc.yacc(tabmodule="foo")
      -
      -
      - -

      -

    • To change the directory in which the parsetab.py file (and other output files) are written, use: -
      -
      -yacc.yacc(tabmodule="foo",outputdir="somedirectory")
      -
      -
      - -

      -

    • To prevent yacc from generating any kind of parser table file, use: -
      -
      -yacc.yacc(write_tables=0)
      -
      -
      - -Note: If you disable table generation, yacc() will regenerate the parsing tables -each time it runs (which may take awhile depending on how large your grammar is). - -

      -

    • To print copious amounts of debugging during parsing, use: - -
      -
      -yacc.parse(debug=1)     
      -
      -
      - -

      -

    • The yacc.yacc() function really returns a parser object. If you want to support multiple -parsers in the same application, do this: - -
      -
      -p = yacc.yacc()
      -...
      -p.parse()
      -
      -
      - -Note: The function yacc.parse() is bound to the last parser that was generated. - -

      -

    • Since the generation of the LALR tables is relatively expensive, previously generated tables are -cached and reused if possible. The decision to regenerate the tables is determined by taking an MD5 -checksum of all grammar rules and precedence rules. Only in the event of a mismatch are the tables regenerated. - -

      -It should be noted that table generation is reasonably efficient, even for grammars that involve around a 100 rules -and several hundred states. For more complex languages such as C, table generation may take 30-60 seconds on a slow -machine. Please be patient. - -

      -

    • Since LR parsing is driven by tables, the performance of the parser is largely independent of the -size of the grammar. The biggest bottlenecks will be the lexer and the complexity of the code in your grammar rules. -
    - -

    7. Multiple Parsers and Lexers

    - - -In advanced parsing applications, you may want to have multiple -parsers and lexers. - -

    -As a general rules this isn't a problem. However, to make it work, -you need to carefully make sure everything gets hooked up correctly. -First, make sure you save the objects returned by lex() and -yacc(). For example: - -

    -
    -lexer  = lex.lex()       # Return lexer object
    -parser = yacc.yacc()     # Return parser object
    -
    -
    - -Next, when parsing, make sure you give the parse() function a reference to the lexer it -should be using. For example: - -
    -
    -parser.parse(text,lexer=lexer)
    -
    -
    - -If you forget to do this, the parser will use the last lexer -created--which is not always what you want. - -

    -Within lexer and parser rule functions, these objects are also -available. In the lexer, the "lexer" attribute of a token refers to -the lexer object that triggered the rule. For example: - -

    -
    -def t_NUMBER(t):
    -   r'\d+'
    -   ...
    -   print t.lexer           # Show lexer object
    -
    -
    - -In the parser, the "lexer" and "parser" attributes refer to the lexer -and parser objects respectively. - -
    -
    -def p_expr_plus(p):
    -   'expr : expr PLUS expr'
    -   ...
    -   print p.parser          # Show parser object
    -   print p.lexer           # Show lexer object
    -
    -
    - -If necessary, arbitrary attributes can be attached to the lexer or parser object. -For example, if you wanted to have different parsing modes, you could attach a mode -attribute to the parser object and look at it later. - -

    8. Using Python's Optimized Mode

    - - -Because PLY uses information from doc-strings, parsing and lexing -information must be gathered while running the Python interpreter in -normal mode (i.e., not with the -O or -OO options). However, if you -specify optimized mode like this: - -
    -
    -lex.lex(optimize=1)
    -yacc.yacc(optimize=1)
    -
    -
    - -then PLY can later be used when Python runs in optimized mode. To make this work, -make sure you first run Python in normal mode. Once the lexing and parsing tables -have been generated the first time, run Python in optimized mode. PLY will use -the tables without the need for doc strings. - -

    -Beware: running PLY in optimized mode disables a lot of error -checking. You should only do this when your project has stabilized -and you don't need to do any debugging. One of the purposes of -optimized mode is to substantially decrease the startup time of -your compiler (by assuming that everything is already properly -specified and works). - -

    9. Advanced Debugging

    - - -

    -Debugging a compiler is typically not an easy task. PLY provides some -advanced diagonistic capabilities through the use of Python's -logging module. The next two sections describe this: - -

    9.1 Debugging the lex() and yacc() commands

    - - -

    -Both the lex() and yacc() commands have a debugging -mode that can be enabled using the debug flag. For example: - -

    -
    -lex.lex(debug=True)
    -yacc.yacc(debug=True)
    -
    -
    - -Normally, the output produced by debugging is routed to either -standard error or, in the case of yacc(), to a file -parser.out. This output can be more carefully controlled -by supplying a logging object. Here is an example that adds -information about where different debugging messages are coming from: - -
    -
    -# Set up a logging object
    -import logging
    -logging.basicConfig(
    -    level = logging.DEBUG,
    -    filename = "parselog.txt",
    -    filemode = "w",
    -    format = "%(filename)10s:%(lineno)4d:%(message)s"
    -)
    -log = logging.getLogger()
    -
    -lex.lex(debug=True,debuglog=log)
    -yacc.yacc(debug=True,debuglog=log)
    -
    -
    - -If you supply a custom logger, the amount of debugging -information produced can be controlled by setting the logging level. -Typically, debugging messages are either issued at the DEBUG, -INFO, or WARNING levels. - -

    -PLY's error messages and warnings are also produced using the logging -interface. This can be controlled by passing a logging object -using the errorlog parameter. - -

    -
    -lex.lex(errorlog=log)
    -yacc.yacc(errorlog=log)
    -
    -
    - -If you want to completely silence warnings, you can either pass in a -logging object with an appropriate filter level or use the NullLogger -object defined in either lex or yacc. For example: - -
    -
    -yacc.yacc(errorlog=yacc.NullLogger())
    -
    -
    - -

    9.2 Run-time Debugging

    - - -

    -To enable run-time debugging of a parser, use the debug option to parse. This -option can either be an integer (which simply turns debugging on or off) or an instance -of a logger object. For example: - -

    -
    -log = logging.getLogger()
    -parser.parse(input,debug=log)
    -
    -
    - -If a logging object is passed, you can use its filtering level to control how much -output gets generated. The INFO level is used to produce information -about rule reductions. The DEBUG level will show information about the -parsing stack, token shifts, and other details. The ERROR level shows information -related to parsing errors. - -

    -For very complicated problems, you should pass in a logging object that -redirects to a file where you can more easily inspect the output after -execution. - -

    10. Where to go from here?

    - - -The examples directory of the PLY distribution contains several simple examples. Please consult a -compilers textbook for the theory and underlying implementation details or LR parsing. - - - - - - - - - - diff --git a/ply/example/BASIC/README b/ply/example/BASIC/README deleted file mode 100644 index be24a30..0000000 --- a/ply/example/BASIC/README +++ /dev/null @@ -1,79 +0,0 @@ -Inspired by a September 14, 2006 Salon article "Why Johnny Can't Code" by -David Brin (http://www.salon.com/tech/feature/2006/09/14/basic/index.html), -I thought that a fully working BASIC interpreter might be an interesting, -if not questionable, PLY example. Uh, okay, so maybe it's just a bad idea, -but in any case, here it is. - -In this example, you'll find a rough implementation of 1964 Dartmouth BASIC -as described in the manual at: - - http://www.bitsavers.org/pdf/dartmouth/BASIC_Oct64.pdf - -See also: - - http://en.wikipedia.org/wiki/Dartmouth_BASIC - -This dialect is downright primitive---there are no string variables -and no facilities for interactive input. Moreover, subroutines and functions -are brain-dead even more than they usually are for BASIC. Of course, -the GOTO statement is provided. - -Nevertheless, there are a few interesting aspects of this example: - - - It illustrates a fully working interpreter including lexing, parsing, - and interpretation of instructions. - - - The parser shows how to catch and report various kinds of parsing - errors in a more graceful way. - - - The example both parses files (supplied on command line) and - interactive input entered line by line. - - - It shows how you might represent parsed information. In this case, - each BASIC statement is encoded into a Python tuple containing the - statement type and parameters. These tuples are then stored in - a dictionary indexed by program line numbers. - - - Even though it's just BASIC, the parser contains more than 80 - rules and 150 parsing states. Thus, it's a little more meaty than - the calculator example. - -To use the example, run it as follows: - - % python basic.py hello.bas - HELLO WORLD - % - -or use it interactively: - - % python basic.py - [BASIC] 10 PRINT "HELLO WORLD" - [BASIC] 20 END - [BASIC] RUN - HELLO WORLD - [BASIC] - -The following files are defined: - - basic.py - High level script that controls everything - basiclex.py - BASIC tokenizer - basparse.py - BASIC parser - basinterp.py - BASIC interpreter that runs parsed programs. - -In addition, a number of sample BASIC programs (.bas suffix) are -provided. These were taken out of the Dartmouth manual. - -Disclaimer: I haven't spent a ton of time testing this and it's likely that -I've skimped here and there on a few finer details (e.g., strictly enforcing -variable naming rules). However, the interpreter seems to be able to run -the examples in the BASIC manual. - -Have fun! - --Dave - - - - - - diff --git a/ply/example/BASIC/basic.py b/ply/example/BASIC/basic.py deleted file mode 100644 index b14483d..0000000 --- a/ply/example/BASIC/basic.py +++ /dev/null @@ -1,71 +0,0 @@ -# An implementation of Dartmouth BASIC (1964) -# - -import sys -sys.path.insert(0,"../..") - -if sys.version_info[0] >= 3: - raw_input = input - -import basiclex -import basparse -import basinterp - -# If a filename has been specified, we try to run it. -# If a runtime error occurs, we bail out and enter -# interactive mode below -if len(sys.argv) == 2: - data = open(sys.argv[1]).read() - prog = basparse.parse(data) - if not prog: raise SystemExit - b = basinterp.BasicInterpreter(prog) - try: - b.run() - raise SystemExit - except RuntimeError: - pass - -else: - b = basinterp.BasicInterpreter({}) - -# Interactive mode. This incrementally adds/deletes statements -# from the program stored in the BasicInterpreter object. In -# addition, special commands 'NEW','LIST',and 'RUN' are added. -# Specifying a line number with no code deletes that line from -# the program. - -while 1: - try: - line = raw_input("[BASIC] ") - except EOFError: - raise SystemExit - if not line: continue - line += "\n" - prog = basparse.parse(line) - if not prog: continue - - keys = list(prog) - if keys[0] > 0: - b.add_statements(prog) - else: - stat = prog[keys[0]] - if stat[0] == 'RUN': - try: - b.run() - except RuntimeError: - pass - elif stat[0] == 'LIST': - b.list() - elif stat[0] == 'BLANK': - b.del_line(stat[1]) - elif stat[0] == 'NEW': - b.new() - - - - - - - - - diff --git a/ply/example/BASIC/basiclex.py b/ply/example/BASIC/basiclex.py deleted file mode 100644 index 3d27cde..0000000 --- a/ply/example/BASIC/basiclex.py +++ /dev/null @@ -1,74 +0,0 @@ -# An implementation of Dartmouth BASIC (1964) - -from ply import * - -keywords = ( - 'LET','READ','DATA','PRINT','GOTO','IF','THEN','FOR','NEXT','TO','STEP', - 'END','STOP','DEF','GOSUB','DIM','REM','RETURN','RUN','LIST','NEW', -) - -tokens = keywords + ( - 'EQUALS','PLUS','MINUS','TIMES','DIVIDE','POWER', - 'LPAREN','RPAREN','LT','LE','GT','GE','NE', - 'COMMA','SEMI', 'INTEGER','FLOAT', 'STRING', - 'ID','NEWLINE' -) - -t_ignore = ' \t' - -def t_REM(t): - r'REM .*' - return t - -def t_ID(t): - r'[A-Z][A-Z0-9]*' - if t.value in keywords: - t.type = t.value - return t - -t_EQUALS = r'=' -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_POWER = r'\^' -t_DIVIDE = r'/' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_LT = r'<' -t_LE = r'<=' -t_GT = r'>' -t_GE = r'>=' -t_NE = r'<>' -t_COMMA = r'\,' -t_SEMI = r';' -t_INTEGER = r'\d+' -t_FLOAT = r'((\d*\.\d+)(E[\+-]?\d+)?|([1-9]\d*E[\+-]?\d+))' -t_STRING = r'\".*?\"' - -def t_NEWLINE(t): - r'\n' - t.lexer.lineno += 1 - return t - -def t_error(t): - print("Illegal character %s" % t.value[0]) - t.lexer.skip(1) - -lex.lex(debug=0) - - - - - - - - - - - - - - - - - diff --git a/ply/example/BASIC/basiclog.py b/ply/example/BASIC/basiclog.py deleted file mode 100644 index ccfd7b9..0000000 --- a/ply/example/BASIC/basiclog.py +++ /dev/null @@ -1,79 +0,0 @@ -# An implementation of Dartmouth BASIC (1964) -# - -import sys -sys.path.insert(0,"../..") - -if sys.version_info[0] >= 3: - raw_input = input - -import logging -logging.basicConfig( - level = logging.INFO, - filename = "parselog.txt", - filemode = "w" -) -log = logging.getLogger() - -import basiclex -import basparse -import basinterp - -# If a filename has been specified, we try to run it. -# If a runtime error occurs, we bail out and enter -# interactive mode below -if len(sys.argv) == 2: - data = open(sys.argv[1]).read() - prog = basparse.parse(data,debug=log) - if not prog: raise SystemExit - b = basinterp.BasicInterpreter(prog) - try: - b.run() - raise SystemExit - except RuntimeError: - pass - -else: - b = basinterp.BasicInterpreter({}) - -# Interactive mode. This incrementally adds/deletes statements -# from the program stored in the BasicInterpreter object. In -# addition, special commands 'NEW','LIST',and 'RUN' are added. -# Specifying a line number with no code deletes that line from -# the program. - -while 1: - try: - line = raw_input("[BASIC] ") - except EOFError: - raise SystemExit - if not line: continue - line += "\n" - prog = basparse.parse(line,debug=log) - if not prog: continue - - keys = list(prog) - if keys[0] > 0: - b.add_statements(prog) - else: - stat = prog[keys[0]] - if stat[0] == 'RUN': - try: - b.run() - except RuntimeError: - pass - elif stat[0] == 'LIST': - b.list() - elif stat[0] == 'BLANK': - b.del_line(stat[1]) - elif stat[0] == 'NEW': - b.new() - - - - - - - - - diff --git a/ply/example/BASIC/basinterp.py b/ply/example/BASIC/basinterp.py deleted file mode 100644 index 3e8a777..0000000 --- a/ply/example/BASIC/basinterp.py +++ /dev/null @@ -1,441 +0,0 @@ -# This file provides the runtime support for running a basic program -# Assumes the program has been parsed using basparse.py - -import sys -import math -import random - -class BasicInterpreter: - - # Initialize the interpreter. prog is a dictionary - # containing (line,statement) mappings - def __init__(self,prog): - self.prog = prog - - self.functions = { # Built-in function table - 'SIN' : lambda z: math.sin(self.eval(z)), - 'COS' : lambda z: math.cos(self.eval(z)), - 'TAN' : lambda z: math.tan(self.eval(z)), - 'ATN' : lambda z: math.atan(self.eval(z)), - 'EXP' : lambda z: math.exp(self.eval(z)), - 'ABS' : lambda z: abs(self.eval(z)), - 'LOG' : lambda z: math.log(self.eval(z)), - 'SQR' : lambda z: math.sqrt(self.eval(z)), - 'INT' : lambda z: int(self.eval(z)), - 'RND' : lambda z: random.random() - } - - # Collect all data statements - def collect_data(self): - self.data = [] - for lineno in self.stat: - if self.prog[lineno][0] == 'DATA': - self.data = self.data + self.prog[lineno][1] - self.dc = 0 # Initialize the data counter - - # Check for end statements - def check_end(self): - has_end = 0 - for lineno in self.stat: - if self.prog[lineno][0] == 'END' and not has_end: - has_end = lineno - if not has_end: - print("NO END INSTRUCTION") - self.error = 1 - return - if has_end != lineno: - print("END IS NOT LAST") - self.error = 1 - - # Check loops - def check_loops(self): - for pc in range(len(self.stat)): - lineno = self.stat[pc] - if self.prog[lineno][0] == 'FOR': - forinst = self.prog[lineno] - loopvar = forinst[1] - for i in range(pc+1,len(self.stat)): - if self.prog[self.stat[i]][0] == 'NEXT': - nextvar = self.prog[self.stat[i]][1] - if nextvar != loopvar: continue - self.loopend[pc] = i - break - else: - print("FOR WITHOUT NEXT AT LINE %s" % self.stat[pc]) - self.error = 1 - - # Evaluate an expression - def eval(self,expr): - etype = expr[0] - if etype == 'NUM': return expr[1] - elif etype == 'GROUP': return self.eval(expr[1]) - elif etype == 'UNARY': - if expr[1] == '-': return -self.eval(expr[2]) - elif etype == 'BINOP': - if expr[1] == '+': return self.eval(expr[2])+self.eval(expr[3]) - elif expr[1] == '-': return self.eval(expr[2])-self.eval(expr[3]) - elif expr[1] == '*': return self.eval(expr[2])*self.eval(expr[3]) - elif expr[1] == '/': return float(self.eval(expr[2]))/self.eval(expr[3]) - elif expr[1] == '^': return abs(self.eval(expr[2]))**self.eval(expr[3]) - elif etype == 'VAR': - var,dim1,dim2 = expr[1] - if not dim1 and not dim2: - if var in self.vars: - return self.vars[var] - else: - print("UNDEFINED VARIABLE %s AT LINE %s" % (var, self.stat[self.pc])) - raise RuntimeError - # May be a list lookup or a function evaluation - if dim1 and not dim2: - if var in self.functions: - # A function - return self.functions[var](dim1) - else: - # A list evaluation - if var in self.lists: - dim1val = self.eval(dim1) - if dim1val < 1 or dim1val > len(self.lists[var]): - print("LIST INDEX OUT OF BOUNDS AT LINE %s" % self.stat[self.pc]) - raise RuntimeError - return self.lists[var][dim1val-1] - if dim1 and dim2: - if var in self.tables: - dim1val = self.eval(dim1) - dim2val = self.eval(dim2) - if dim1val < 1 or dim1val > len(self.tables[var]) or dim2val < 1 or dim2val > len(self.tables[var][0]): - print("TABLE INDEX OUT OUT BOUNDS AT LINE %s" % self.stat[self.pc]) - raise RuntimeError - return self.tables[var][dim1val-1][dim2val-1] - print("UNDEFINED VARIABLE %s AT LINE %s" % (var, self.stat[self.pc])) - raise RuntimeError - - # Evaluate a relational expression - def releval(self,expr): - etype = expr[1] - lhs = self.eval(expr[2]) - rhs = self.eval(expr[3]) - if etype == '<': - if lhs < rhs: return 1 - else: return 0 - - elif etype == '<=': - if lhs <= rhs: return 1 - else: return 0 - - elif etype == '>': - if lhs > rhs: return 1 - else: return 0 - - elif etype == '>=': - if lhs >= rhs: return 1 - else: return 0 - - elif etype == '=': - if lhs == rhs: return 1 - else: return 0 - - elif etype == '<>': - if lhs != rhs: return 1 - else: return 0 - - # Assignment - def assign(self,target,value): - var, dim1, dim2 = target - if not dim1 and not dim2: - self.vars[var] = self.eval(value) - elif dim1 and not dim2: - # List assignment - dim1val = self.eval(dim1) - if not var in self.lists: - self.lists[var] = [0]*10 - - if dim1val > len(self.lists[var]): - print ("DIMENSION TOO LARGE AT LINE %s" % self.stat[self.pc]) - raise RuntimeError - self.lists[var][dim1val-1] = self.eval(value) - elif dim1 and dim2: - dim1val = self.eval(dim1) - dim2val = self.eval(dim2) - if not var in self.tables: - temp = [0]*10 - v = [] - for i in range(10): v.append(temp[:]) - self.tables[var] = v - # Variable already exists - if dim1val > len(self.tables[var]) or dim2val > len(self.tables[var][0]): - print("DIMENSION TOO LARGE AT LINE %s" % self.stat[self.pc]) - raise RuntimeError - self.tables[var][dim1val-1][dim2val-1] = self.eval(value) - - # Change the current line number - def goto(self,linenum): - if not linenum in self.prog: - print("UNDEFINED LINE NUMBER %d AT LINE %d" % (linenum, self.stat[self.pc])) - raise RuntimeError - self.pc = self.stat.index(linenum) - - # Run it - def run(self): - self.vars = { } # All variables - self.lists = { } # List variables - self.tables = { } # Tables - self.loops = [ ] # Currently active loops - self.loopend= { } # Mapping saying where loops end - self.gosub = None # Gosub return point (if any) - self.error = 0 # Indicates program error - - self.stat = list(self.prog) # Ordered list of all line numbers - self.stat.sort() - self.pc = 0 # Current program counter - - # Processing prior to running - - self.collect_data() # Collect all of the data statements - self.check_end() - self.check_loops() - - if self.error: raise RuntimeError - - while 1: - line = self.stat[self.pc] - instr = self.prog[line] - - op = instr[0] - - # END and STOP statements - if op == 'END' or op == 'STOP': - break # We're done - - # GOTO statement - elif op == 'GOTO': - newline = instr[1] - self.goto(newline) - continue - - # PRINT statement - elif op == 'PRINT': - plist = instr[1] - out = "" - for label,val in plist: - if out: - out += ' '*(15 - (len(out) % 15)) - out += label - if val: - if label: out += " " - eval = self.eval(val) - out += str(eval) - sys.stdout.write(out) - end = instr[2] - if not (end == ',' or end == ';'): - sys.stdout.write("\n") - if end == ',': sys.stdout.write(" "*(15-(len(out) % 15))) - if end == ';': sys.stdout.write(" "*(3-(len(out) % 3))) - - # LET statement - elif op == 'LET': - target = instr[1] - value = instr[2] - self.assign(target,value) - - # READ statement - elif op == 'READ': - for target in instr[1]: - if self.dc < len(self.data): - value = ('NUM',self.data[self.dc]) - self.assign(target,value) - self.dc += 1 - else: - # No more data. Program ends - return - elif op == 'IF': - relop = instr[1] - newline = instr[2] - if (self.releval(relop)): - self.goto(newline) - continue - - elif op == 'FOR': - loopvar = instr[1] - initval = instr[2] - finval = instr[3] - stepval = instr[4] - - # Check to see if this is a new loop - if not self.loops or self.loops[-1][0] != self.pc: - # Looks like a new loop. Make the initial assignment - newvalue = initval - self.assign((loopvar,None,None),initval) - if not stepval: stepval = ('NUM',1) - stepval = self.eval(stepval) # Evaluate step here - self.loops.append((self.pc,stepval)) - else: - # It's a repeat of the previous loop - # Update the value of the loop variable according to the step - stepval = ('NUM',self.loops[-1][1]) - newvalue = ('BINOP','+',('VAR',(loopvar,None,None)),stepval) - - if self.loops[-1][1] < 0: relop = '>=' - else: relop = '<=' - if not self.releval(('RELOP',relop,newvalue,finval)): - # Loop is done. Jump to the NEXT - self.pc = self.loopend[self.pc] - self.loops.pop() - else: - self.assign((loopvar,None,None),newvalue) - - elif op == 'NEXT': - if not self.loops: - print("NEXT WITHOUT FOR AT LINE %s" % line) - return - - nextvar = instr[1] - self.pc = self.loops[-1][0] - loopinst = self.prog[self.stat[self.pc]] - forvar = loopinst[1] - if nextvar != forvar: - print("NEXT DOESN'T MATCH FOR AT LINE %s" % line) - return - continue - elif op == 'GOSUB': - newline = instr[1] - if self.gosub: - print("ALREADY IN A SUBROUTINE AT LINE %s" % line) - return - self.gosub = self.stat[self.pc] - self.goto(newline) - continue - - elif op == 'RETURN': - if not self.gosub: - print("RETURN WITHOUT A GOSUB AT LINE %s" % line) - return - self.goto(self.gosub) - self.gosub = None - - elif op == 'FUNC': - fname = instr[1] - pname = instr[2] - expr = instr[3] - def eval_func(pvalue,name=pname,self=self,expr=expr): - self.assign((pname,None,None),pvalue) - return self.eval(expr) - self.functions[fname] = eval_func - - elif op == 'DIM': - for vname,x,y in instr[1]: - if y == 0: - # Single dimension variable - self.lists[vname] = [0]*x - else: - # Double dimension variable - temp = [0]*y - v = [] - for i in range(x): - v.append(temp[:]) - self.tables[vname] = v - - self.pc += 1 - - # Utility functions for program listing - def expr_str(self,expr): - etype = expr[0] - if etype == 'NUM': return str(expr[1]) - elif etype == 'GROUP': return "(%s)" % self.expr_str(expr[1]) - elif etype == 'UNARY': - if expr[1] == '-': return "-"+str(expr[2]) - elif etype == 'BINOP': - return "%s %s %s" % (self.expr_str(expr[2]),expr[1],self.expr_str(expr[3])) - elif etype == 'VAR': - return self.var_str(expr[1]) - - def relexpr_str(self,expr): - return "%s %s %s" % (self.expr_str(expr[2]),expr[1],self.expr_str(expr[3])) - - def var_str(self,var): - varname,dim1,dim2 = var - if not dim1 and not dim2: return varname - if dim1 and not dim2: return "%s(%s)" % (varname, self.expr_str(dim1)) - return "%s(%s,%s)" % (varname, self.expr_str(dim1),self.expr_str(dim2)) - - # Create a program listing - def list(self): - stat = list(self.prog) # Ordered list of all line numbers - stat.sort() - for line in stat: - instr = self.prog[line] - op = instr[0] - if op in ['END','STOP','RETURN']: - print("%s %s" % (line, op)) - continue - elif op == 'REM': - print("%s %s" % (line, instr[1])) - elif op == 'PRINT': - _out = "%s %s " % (line, op) - first = 1 - for p in instr[1]: - if not first: _out += ", " - if p[0] and p[1]: _out += '"%s"%s' % (p[0],self.expr_str(p[1])) - elif p[1]: _out += self.expr_str(p[1]) - else: _out += '"%s"' % (p[0],) - first = 0 - if instr[2]: _out += instr[2] - print(_out) - elif op == 'LET': - print("%s LET %s = %s" % (line,self.var_str(instr[1]),self.expr_str(instr[2]))) - elif op == 'READ': - _out = "%s READ " % line - first = 1 - for r in instr[1]: - if not first: _out += "," - _out += self.var_str(r) - first = 0 - print(_out) - elif op == 'IF': - print("%s IF %s THEN %d" % (line,self.relexpr_str(instr[1]),instr[2])) - elif op == 'GOTO' or op == 'GOSUB': - print("%s %s %s" % (line, op, instr[1])) - elif op == 'FOR': - _out = "%s FOR %s = %s TO %s" % (line,instr[1],self.expr_str(instr[2]),self.expr_str(instr[3])) - if instr[4]: _out += " STEP %s" % (self.expr_str(instr[4])) - print(_out) - elif op == 'NEXT': - print("%s NEXT %s" % (line, instr[1])) - elif op == 'FUNC': - print("%s DEF %s(%s) = %s" % (line,instr[1],instr[2],self.expr_str(instr[3]))) - elif op == 'DIM': - _out = "%s DIM " % line - first = 1 - for vname,x,y in instr[1]: - if not first: _out += "," - first = 0 - if y == 0: - _out += "%s(%d)" % (vname,x) - else: - _out += "%s(%d,%d)" % (vname,x,y) - - print(_out) - elif op == 'DATA': - _out = "%s DATA " % line - first = 1 - for v in instr[1]: - if not first: _out += "," - first = 0 - _out += v - print(_out) - - # Erase the current program - def new(self): - self.prog = {} - - # Insert statements - def add_statements(self,prog): - for line,stat in prog.items(): - self.prog[line] = stat - - # Delete a statement - def del_line(self,lineno): - try: - del self.prog[lineno] - except KeyError: - pass - diff --git a/ply/example/BASIC/basparse.py b/ply/example/BASIC/basparse.py deleted file mode 100644 index ccdeb16..0000000 --- a/ply/example/BASIC/basparse.py +++ /dev/null @@ -1,424 +0,0 @@ -# An implementation of Dartmouth BASIC (1964) -# - -from ply import * -import basiclex - -tokens = basiclex.tokens - -precedence = ( - ('left', 'PLUS','MINUS'), - ('left', 'TIMES','DIVIDE'), - ('left', 'POWER'), - ('right','UMINUS') -) - -#### A BASIC program is a series of statements. We represent the program as a -#### dictionary of tuples indexed by line number. - -def p_program(p): - '''program : program statement - | statement''' - - if len(p) == 2 and p[1]: - p[0] = { } - line,stat = p[1] - p[0][line] = stat - elif len(p) ==3: - p[0] = p[1] - if not p[0]: p[0] = { } - if p[2]: - line,stat = p[2] - p[0][line] = stat - -#### This catch-all rule is used for any catastrophic errors. In this case, -#### we simply return nothing - -def p_program_error(p): - '''program : error''' - p[0] = None - p.parser.error = 1 - -#### Format of all BASIC statements. - -def p_statement(p): - '''statement : INTEGER command NEWLINE''' - if isinstance(p[2],str): - print("%s %s %s" % (p[2],"AT LINE", p[1])) - p[0] = None - p.parser.error = 1 - else: - lineno = int(p[1]) - p[0] = (lineno,p[2]) - -#### Interactive statements. - -def p_statement_interactive(p): - '''statement : RUN NEWLINE - | LIST NEWLINE - | NEW NEWLINE''' - p[0] = (0, (p[1],0)) - -#### Blank line number -def p_statement_blank(p): - '''statement : INTEGER NEWLINE''' - p[0] = (0,('BLANK',int(p[1]))) - -#### Error handling for malformed statements - -def p_statement_bad(p): - '''statement : INTEGER error NEWLINE''' - print("MALFORMED STATEMENT AT LINE %s" % p[1]) - p[0] = None - p.parser.error = 1 - -#### Blank line - -def p_statement_newline(p): - '''statement : NEWLINE''' - p[0] = None - -#### LET statement - -def p_command_let(p): - '''command : LET variable EQUALS expr''' - p[0] = ('LET',p[2],p[4]) - -def p_command_let_bad(p): - '''command : LET variable EQUALS error''' - p[0] = "BAD EXPRESSION IN LET" - -#### READ statement - -def p_command_read(p): - '''command : READ varlist''' - p[0] = ('READ',p[2]) - -def p_command_read_bad(p): - '''command : READ error''' - p[0] = "MALFORMED VARIABLE LIST IN READ" - -#### DATA statement - -def p_command_data(p): - '''command : DATA numlist''' - p[0] = ('DATA',p[2]) - -def p_command_data_bad(p): - '''command : DATA error''' - p[0] = "MALFORMED NUMBER LIST IN DATA" - -#### PRINT statement - -def p_command_print(p): - '''command : PRINT plist optend''' - p[0] = ('PRINT',p[2],p[3]) - -def p_command_print_bad(p): - '''command : PRINT error''' - p[0] = "MALFORMED PRINT STATEMENT" - -#### Optional ending on PRINT. Either a comma (,) or semicolon (;) - -def p_optend(p): - '''optend : COMMA - | SEMI - |''' - if len(p) == 2: - p[0] = p[1] - else: - p[0] = None - -#### PRINT statement with no arguments - -def p_command_print_empty(p): - '''command : PRINT''' - p[0] = ('PRINT',[],None) - -#### GOTO statement - -def p_command_goto(p): - '''command : GOTO INTEGER''' - p[0] = ('GOTO',int(p[2])) - -def p_command_goto_bad(p): - '''command : GOTO error''' - p[0] = "INVALID LINE NUMBER IN GOTO" - -#### IF-THEN statement - -def p_command_if(p): - '''command : IF relexpr THEN INTEGER''' - p[0] = ('IF',p[2],int(p[4])) - -def p_command_if_bad(p): - '''command : IF error THEN INTEGER''' - p[0] = "BAD RELATIONAL EXPRESSION" - -def p_command_if_bad2(p): - '''command : IF relexpr THEN error''' - p[0] = "INVALID LINE NUMBER IN THEN" - -#### FOR statement - -def p_command_for(p): - '''command : FOR ID EQUALS expr TO expr optstep''' - p[0] = ('FOR',p[2],p[4],p[6],p[7]) - -def p_command_for_bad_initial(p): - '''command : FOR ID EQUALS error TO expr optstep''' - p[0] = "BAD INITIAL VALUE IN FOR STATEMENT" - -def p_command_for_bad_final(p): - '''command : FOR ID EQUALS expr TO error optstep''' - p[0] = "BAD FINAL VALUE IN FOR STATEMENT" - -def p_command_for_bad_step(p): - '''command : FOR ID EQUALS expr TO expr STEP error''' - p[0] = "MALFORMED STEP IN FOR STATEMENT" - -#### Optional STEP qualifier on FOR statement - -def p_optstep(p): - '''optstep : STEP expr - | empty''' - if len(p) == 3: - p[0] = p[2] - else: - p[0] = None - -#### NEXT statement - -def p_command_next(p): - '''command : NEXT ID''' - - p[0] = ('NEXT',p[2]) - -def p_command_next_bad(p): - '''command : NEXT error''' - p[0] = "MALFORMED NEXT" - -#### END statement - -def p_command_end(p): - '''command : END''' - p[0] = ('END',) - -#### REM statement - -def p_command_rem(p): - '''command : REM''' - p[0] = ('REM',p[1]) - -#### STOP statement - -def p_command_stop(p): - '''command : STOP''' - p[0] = ('STOP',) - -#### DEF statement - -def p_command_def(p): - '''command : DEF ID LPAREN ID RPAREN EQUALS expr''' - p[0] = ('FUNC',p[2],p[4],p[7]) - -def p_command_def_bad_rhs(p): - '''command : DEF ID LPAREN ID RPAREN EQUALS error''' - p[0] = "BAD EXPRESSION IN DEF STATEMENT" - -def p_command_def_bad_arg(p): - '''command : DEF ID LPAREN error RPAREN EQUALS expr''' - p[0] = "BAD ARGUMENT IN DEF STATEMENT" - -#### GOSUB statement - -def p_command_gosub(p): - '''command : GOSUB INTEGER''' - p[0] = ('GOSUB',int(p[2])) - -def p_command_gosub_bad(p): - '''command : GOSUB error''' - p[0] = "INVALID LINE NUMBER IN GOSUB" - -#### RETURN statement - -def p_command_return(p): - '''command : RETURN''' - p[0] = ('RETURN',) - -#### DIM statement - -def p_command_dim(p): - '''command : DIM dimlist''' - p[0] = ('DIM',p[2]) - -def p_command_dim_bad(p): - '''command : DIM error''' - p[0] = "MALFORMED VARIABLE LIST IN DIM" - -#### List of variables supplied to DIM statement - -def p_dimlist(p): - '''dimlist : dimlist COMMA dimitem - | dimitem''' - if len(p) == 4: - p[0] = p[1] - p[0].append(p[3]) - else: - p[0] = [p[1]] - -#### DIM items - -def p_dimitem_single(p): - '''dimitem : ID LPAREN INTEGER RPAREN''' - p[0] = (p[1],eval(p[3]),0) - -def p_dimitem_double(p): - '''dimitem : ID LPAREN INTEGER COMMA INTEGER RPAREN''' - p[0] = (p[1],eval(p[3]),eval(p[5])) - -#### Arithmetic expressions - -def p_expr_binary(p): - '''expr : expr PLUS expr - | expr MINUS expr - | expr TIMES expr - | expr DIVIDE expr - | expr POWER expr''' - - p[0] = ('BINOP',p[2],p[1],p[3]) - -def p_expr_number(p): - '''expr : INTEGER - | FLOAT''' - p[0] = ('NUM',eval(p[1])) - -def p_expr_variable(p): - '''expr : variable''' - p[0] = ('VAR',p[1]) - -def p_expr_group(p): - '''expr : LPAREN expr RPAREN''' - p[0] = ('GROUP',p[2]) - -def p_expr_unary(p): - '''expr : MINUS expr %prec UMINUS''' - p[0] = ('UNARY','-',p[2]) - -#### Relational expressions - -def p_relexpr(p): - '''relexpr : expr LT expr - | expr LE expr - | expr GT expr - | expr GE expr - | expr EQUALS expr - | expr NE expr''' - p[0] = ('RELOP',p[2],p[1],p[3]) - -#### Variables - -def p_variable(p): - '''variable : ID - | ID LPAREN expr RPAREN - | ID LPAREN expr COMMA expr RPAREN''' - if len(p) == 2: - p[0] = (p[1],None,None) - elif len(p) == 5: - p[0] = (p[1],p[3],None) - else: - p[0] = (p[1],p[3],p[5]) - -#### Builds a list of variable targets as a Python list - -def p_varlist(p): - '''varlist : varlist COMMA variable - | variable''' - if len(p) > 2: - p[0] = p[1] - p[0].append(p[3]) - else: - p[0] = [p[1]] - - -#### Builds a list of numbers as a Python list - -def p_numlist(p): - '''numlist : numlist COMMA number - | number''' - - if len(p) > 2: - p[0] = p[1] - p[0].append(p[3]) - else: - p[0] = [p[1]] - -#### A number. May be an integer or a float - -def p_number(p): - '''number : INTEGER - | FLOAT''' - p[0] = eval(p[1]) - -#### A signed number. - -def p_number_signed(p): - '''number : MINUS INTEGER - | MINUS FLOAT''' - p[0] = eval("-"+p[2]) - -#### List of targets for a print statement -#### Returns a list of tuples (label,expr) - -def p_plist(p): - '''plist : plist COMMA pitem - | pitem''' - if len(p) > 3: - p[0] = p[1] - p[0].append(p[3]) - else: - p[0] = [p[1]] - -def p_item_string(p): - '''pitem : STRING''' - p[0] = (p[1][1:-1],None) - -def p_item_string_expr(p): - '''pitem : STRING expr''' - p[0] = (p[1][1:-1],p[2]) - -def p_item_expr(p): - '''pitem : expr''' - p[0] = ("",p[1]) - -#### Empty - -def p_empty(p): - '''empty : ''' - -#### Catastrophic error handler -def p_error(p): - if not p: - print("SYNTAX ERROR AT EOF") - -bparser = yacc.yacc() - -def parse(data,debug=0): - bparser.error = 0 - p = bparser.parse(data,debug=debug) - if bparser.error: return None - return p - - - - - - - - - - - - - - diff --git a/ply/example/BASIC/dim.bas b/ply/example/BASIC/dim.bas deleted file mode 100644 index 87bd95b..0000000 --- a/ply/example/BASIC/dim.bas +++ /dev/null @@ -1,14 +0,0 @@ -5 DIM A(50,15) -10 FOR I = 1 TO 50 -20 FOR J = 1 TO 15 -30 LET A(I,J) = I + J -35 REM PRINT I,J, A(I,J) -40 NEXT J -50 NEXT I -100 FOR I = 1 TO 50 -110 FOR J = 1 TO 15 -120 PRINT A(I,J), -130 NEXT J -140 PRINT -150 NEXT I -999 END diff --git a/ply/example/BASIC/func.bas b/ply/example/BASIC/func.bas deleted file mode 100644 index 447ee16..0000000 --- a/ply/example/BASIC/func.bas +++ /dev/null @@ -1,5 +0,0 @@ -10 DEF FDX(X) = 2*X -20 FOR I = 0 TO 100 -30 PRINT FDX(I) -40 NEXT I -50 END diff --git a/ply/example/BASIC/gcd.bas b/ply/example/BASIC/gcd.bas deleted file mode 100644 index d0b7746..0000000 --- a/ply/example/BASIC/gcd.bas +++ /dev/null @@ -1,22 +0,0 @@ -10 PRINT "A","B","C","GCD" -20 READ A,B,C -30 LET X = A -40 LET Y = B -50 GOSUB 200 -60 LET X = G -70 LET Y = C -80 GOSUB 200 -90 PRINT A, B, C, G -100 GOTO 20 -110 DATA 60, 90, 120 -120 DATA 38456, 64872, 98765 -130 DATA 32, 384, 72 -200 LET Q = INT(X/Y) -210 LET R = X - Q*Y -220 IF R = 0 THEN 300 -230 LET X = Y -240 LET Y = R -250 GOTO 200 -300 LET G = Y -310 RETURN -999 END diff --git a/ply/example/BASIC/gosub.bas b/ply/example/BASIC/gosub.bas deleted file mode 100644 index 99737b1..0000000 --- a/ply/example/BASIC/gosub.bas +++ /dev/null @@ -1,13 +0,0 @@ -100 LET X = 3 -110 GOSUB 400 -120 PRINT U, V, W -200 LET X = 5 -210 GOSUB 400 -220 LET Z = U + 2*V + 3*W -230 PRINT Z -240 GOTO 999 -400 LET U = X*X -410 LET V = X*X*X -420 LET W = X*X*X*X + X*X*X + X*X + X -430 RETURN -999 END diff --git a/ply/example/BASIC/hello.bas b/ply/example/BASIC/hello.bas deleted file mode 100644 index cc6f0b0..0000000 --- a/ply/example/BASIC/hello.bas +++ /dev/null @@ -1,4 +0,0 @@ -5 REM HELLO WORLD PROGAM -10 PRINT "HELLO WORLD" -99 END - diff --git a/ply/example/BASIC/linear.bas b/ply/example/BASIC/linear.bas deleted file mode 100644 index 56c0822..0000000 --- a/ply/example/BASIC/linear.bas +++ /dev/null @@ -1,17 +0,0 @@ -1 REM ::: SOLVE A SYSTEM OF LINEAR EQUATIONS -2 REM ::: A1*X1 + A2*X2 = B1 -3 REM ::: A3*X1 + A4*X2 = B2 -4 REM -------------------------------------- -10 READ A1, A2, A3, A4 -15 LET D = A1 * A4 - A3 * A2 -20 IF D = 0 THEN 65 -30 READ B1, B2 -37 LET X1 = (B1*A4 - B2*A2) / D -42 LET X2 = (A1*B2 - A3*B1) / D -55 PRINT X1, X2 -60 GOTO 30 -65 PRINT "NO UNIQUE SOLUTION" -70 DATA 1, 2, 4 -80 DATA 2, -7, 5 -85 DATA 1, 3, 4, -7 -90 END diff --git a/ply/example/BASIC/maxsin.bas b/ply/example/BASIC/maxsin.bas deleted file mode 100644 index b969015..0000000 --- a/ply/example/BASIC/maxsin.bas +++ /dev/null @@ -1,12 +0,0 @@ -5 PRINT "X VALUE", "SINE", "RESOLUTION" -10 READ D -20 LET M = -1 -30 FOR X = 0 TO 3 STEP D -40 IF SIN(X) <= M THEN 80 -50 LET X0 = X -60 LET M = SIN(X) -80 NEXT X -85 PRINT X0, M, D -90 GOTO 10 -100 DATA .1, .01, .001 -110 END diff --git a/ply/example/BASIC/powers.bas b/ply/example/BASIC/powers.bas deleted file mode 100644 index a454dc3..0000000 --- a/ply/example/BASIC/powers.bas +++ /dev/null @@ -1,13 +0,0 @@ -5 PRINT "THIS PROGRAM COMPUTES AND PRINTS THE NTH POWERS" -6 PRINT "OF THE NUMBERS LESS THAN OR EQUAL TO N FOR VARIOUS" -7 PRINT "N FROM 1 THROUGH 7" -8 PRINT -10 FOR N = 1 TO 7 -15 PRINT "N = "N -20 FOR I = 1 TO N -30 PRINT I^N, -40 NEXT I -50 PRINT -60 PRINT -70 NEXT N -80 END diff --git a/ply/example/BASIC/rand.bas b/ply/example/BASIC/rand.bas deleted file mode 100644 index 4ff7a14..0000000 --- a/ply/example/BASIC/rand.bas +++ /dev/null @@ -1,4 +0,0 @@ -10 FOR I = 1 TO 20 -20 PRINT INT(10*RND(0)) -30 NEXT I -40 END diff --git a/ply/example/BASIC/sales.bas b/ply/example/BASIC/sales.bas deleted file mode 100644 index a39aefb..0000000 --- a/ply/example/BASIC/sales.bas +++ /dev/null @@ -1,20 +0,0 @@ -10 FOR I = 1 TO 3 -20 READ P(I) -30 NEXT I -40 FOR I = 1 TO 3 -50 FOR J = 1 TO 5 -60 READ S(I,J) -70 NEXT J -80 NEXT I -90 FOR J = 1 TO 5 -100 LET S = 0 -110 FOR I = 1 TO 3 -120 LET S = S + P(I) * S(I,J) -130 NEXT I -140 PRINT "TOTAL SALES FOR SALESMAN"J, "$"S -150 NEXT J -200 DATA 1.25, 4.30, 2.50 -210 DATA 40, 20, 37, 29, 42 -220 DATA 10, 16, 3, 21, 8 -230 DATA 35, 47, 29, 16, 33 -300 END diff --git a/ply/example/BASIC/sears.bas b/ply/example/BASIC/sears.bas deleted file mode 100644 index 5ced397..0000000 --- a/ply/example/BASIC/sears.bas +++ /dev/null @@ -1,18 +0,0 @@ -1 REM :: THIS PROGRAM COMPUTES HOW MANY TIMES YOU HAVE TO FOLD -2 REM :: A PIECE OF PAPER SO THAT IT IS TALLER THAN THE -3 REM :: SEARS TOWER. -4 REM :: S = HEIGHT OF TOWER (METERS) -5 REM :: T = THICKNESS OF PAPER (MILLIMETERS) -10 LET S = 442 -20 LET T = 0.1 -30 REM CONVERT T TO METERS -40 LET T = T * .001 -50 LET F = 1 -60 LET H = T -100 IF H > S THEN 200 -120 LET H = 2 * H -125 LET F = F + 1 -130 GOTO 100 -200 PRINT "NUMBER OF FOLDS ="F -220 PRINT "FINAL HEIGHT ="H -999 END diff --git a/ply/example/BASIC/sqrt1.bas b/ply/example/BASIC/sqrt1.bas deleted file mode 100644 index 6673a91..0000000 --- a/ply/example/BASIC/sqrt1.bas +++ /dev/null @@ -1,5 +0,0 @@ -10 LET X = 0 -20 LET X = X + 1 -30 PRINT X, SQR(X) -40 IF X < 100 THEN 20 -50 END diff --git a/ply/example/BASIC/sqrt2.bas b/ply/example/BASIC/sqrt2.bas deleted file mode 100644 index 862d85e..0000000 --- a/ply/example/BASIC/sqrt2.bas +++ /dev/null @@ -1,4 +0,0 @@ -10 FOR X = 1 TO 100 -20 PRINT X, SQR(X) -30 NEXT X -40 END diff --git a/ply/example/GardenSnake/GardenSnake.py b/ply/example/GardenSnake/GardenSnake.py deleted file mode 100644 index 2a7f45e..0000000 --- a/ply/example/GardenSnake/GardenSnake.py +++ /dev/null @@ -1,709 +0,0 @@ -# GardenSnake - a parser generator demonstration program -# -# This implements a modified version of a subset of Python: -# - only 'def', 'return' and 'if' statements -# - 'if' only has 'then' clause (no elif nor else) -# - single-quoted strings only, content in raw format -# - numbers are decimal.Decimal instances (not integers or floats) -# - no print statment; use the built-in 'print' function -# - only < > == + - / * implemented (and unary + -) -# - assignment and tuple assignment work -# - no generators of any sort -# - no ... well, no quite a lot - -# Why? I'm thinking about a new indentation-based configuration -# language for a project and wanted to figure out how to do it. Once -# I got that working I needed a way to test it out. My original AST -# was dumb so I decided to target Python's AST and compile it into -# Python code. Plus, it's pretty cool that it only took a day or so -# from sitting down with Ply to having working code. - -# This uses David Beazley's Ply from http://www.dabeaz.com/ply/ - -# This work is hereby released into the Public Domain. To view a copy of -# the public domain dedication, visit -# http://creativecommons.org/licenses/publicdomain/ or send a letter to -# Creative Commons, 543 Howard Street, 5th Floor, San Francisco, -# California, 94105, USA. -# -# Portions of this work are derived from Python's Grammar definition -# and may be covered under the Python copyright and license -# -# Andrew Dalke / Dalke Scientific Software, LLC -# 30 August 2006 / Cape Town, South Africa - -# Changelog: -# 30 August - added link to CC license; removed the "swapcase" encoding - -# Modifications for inclusion in PLY distribution -import sys -sys.path.insert(0,"../..") -from ply import * - -##### Lexer ###### -#import lex -import decimal - -tokens = ( - 'DEF', - 'IF', - 'NAME', - 'NUMBER', # Python decimals - 'STRING', # single quoted strings only; syntax of raw strings - 'LPAR', - 'RPAR', - 'COLON', - 'EQ', - 'ASSIGN', - 'LT', - 'GT', - 'PLUS', - 'MINUS', - 'MULT', - 'DIV', - 'RETURN', - 'WS', - 'NEWLINE', - 'COMMA', - 'SEMICOLON', - 'INDENT', - 'DEDENT', - 'ENDMARKER', - ) - -#t_NUMBER = r'\d+' -# taken from decmial.py but without the leading sign -def t_NUMBER(t): - r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?""" - t.value = decimal.Decimal(t.value) - return t - -def t_STRING(t): - r"'([^\\']+|\\'|\\\\)*'" # I think this is right ... - t.value=t.value[1:-1].decode("string-escape") # .swapcase() # for fun - return t - -t_COLON = r':' -t_EQ = r'==' -t_ASSIGN = r'=' -t_LT = r'<' -t_GT = r'>' -t_PLUS = r'\+' -t_MINUS = r'-' -t_MULT = r'\*' -t_DIV = r'/' -t_COMMA = r',' -t_SEMICOLON = r';' - -# Ply nicely documented how to do this. - -RESERVED = { - "def": "DEF", - "if": "IF", - "return": "RETURN", - } - -def t_NAME(t): - r'[a-zA-Z_][a-zA-Z0-9_]*' - t.type = RESERVED.get(t.value, "NAME") - return t - -# Putting this before t_WS let it consume lines with only comments in -# them so the latter code never sees the WS part. Not consuming the -# newline. Needed for "if 1: #comment" -def t_comment(t): - r"[ ]*\043[^\n]*" # \043 is '#' - pass - - -# Whitespace -def t_WS(t): - r' [ ]+ ' - if t.lexer.at_line_start and t.lexer.paren_count == 0: - return t - -# Don't generate newline tokens when inside of parenthesis, eg -# a = (1, -# 2, 3) -def t_newline(t): - r'\n+' - t.lexer.lineno += len(t.value) - t.type = "NEWLINE" - if t.lexer.paren_count == 0: - return t - -def t_LPAR(t): - r'\(' - t.lexer.paren_count += 1 - return t - -def t_RPAR(t): - r'\)' - # check for underflow? should be the job of the parser - t.lexer.paren_count -= 1 - return t - - -def t_error(t): - raise SyntaxError("Unknown symbol %r" % (t.value[0],)) - print "Skipping", repr(t.value[0]) - t.lexer.skip(1) - -## I implemented INDENT / DEDENT generation as a post-processing filter - -# The original lex token stream contains WS and NEWLINE characters. -# WS will only occur before any other tokens on a line. - -# I have three filters. One tags tokens by adding two attributes. -# "must_indent" is True if the token must be indented from the -# previous code. The other is "at_line_start" which is True for WS -# and the first non-WS/non-NEWLINE on a line. It flags the check so -# see if the new line has changed indication level. - -# Python's syntax has three INDENT states -# 0) no colon hence no need to indent -# 1) "if 1: go()" - simple statements have a COLON but no need for an indent -# 2) "if 1:\n go()" - complex statements have a COLON NEWLINE and must indent -NO_INDENT = 0 -MAY_INDENT = 1 -MUST_INDENT = 2 - -# only care about whitespace at the start of a line -def track_tokens_filter(lexer, tokens): - lexer.at_line_start = at_line_start = True - indent = NO_INDENT - saw_colon = False - for token in tokens: - token.at_line_start = at_line_start - - if token.type == "COLON": - at_line_start = False - indent = MAY_INDENT - token.must_indent = False - - elif token.type == "NEWLINE": - at_line_start = True - if indent == MAY_INDENT: - indent = MUST_INDENT - token.must_indent = False - - elif token.type == "WS": - assert token.at_line_start == True - at_line_start = True - token.must_indent = False - - else: - # A real token; only indent after COLON NEWLINE - if indent == MUST_INDENT: - token.must_indent = True - else: - token.must_indent = False - at_line_start = False - indent = NO_INDENT - - yield token - lexer.at_line_start = at_line_start - -def _new_token(type, lineno): - tok = lex.LexToken() - tok.type = type - tok.value = None - tok.lineno = lineno - return tok - -# Synthesize a DEDENT tag -def DEDENT(lineno): - return _new_token("DEDENT", lineno) - -# Synthesize an INDENT tag -def INDENT(lineno): - return _new_token("INDENT", lineno) - - -# Track the indentation level and emit the right INDENT / DEDENT events. -def indentation_filter(tokens): - # A stack of indentation levels; will never pop item 0 - levels = [0] - token = None - depth = 0 - prev_was_ws = False - for token in tokens: -## if 1: -## print "Process", token, -## if token.at_line_start: -## print "at_line_start", -## if token.must_indent: -## print "must_indent", -## print - - # WS only occurs at the start of the line - # There may be WS followed by NEWLINE so - # only track the depth here. Don't indent/dedent - # until there's something real. - if token.type == "WS": - assert depth == 0 - depth = len(token.value) - prev_was_ws = True - # WS tokens are never passed to the parser - continue - - if token.type == "NEWLINE": - depth = 0 - if prev_was_ws or token.at_line_start: - # ignore blank lines - continue - # pass the other cases on through - yield token - continue - - # then it must be a real token (not WS, not NEWLINE) - # which can affect the indentation level - - prev_was_ws = False - if token.must_indent: - # The current depth must be larger than the previous level - if not (depth > levels[-1]): - raise IndentationError("expected an indented block") - - levels.append(depth) - yield INDENT(token.lineno) - - elif token.at_line_start: - # Must be on the same level or one of the previous levels - if depth == levels[-1]: - # At the same level - pass - elif depth > levels[-1]: - raise IndentationError("indentation increase but not in new block") - else: - # Back up; but only if it matches a previous level - try: - i = levels.index(depth) - except ValueError: - raise IndentationError("inconsistent indentation") - for _ in range(i+1, len(levels)): - yield DEDENT(token.lineno) - levels.pop() - - yield token - - ### Finished processing ### - - # Must dedent any remaining levels - if len(levels) > 1: - assert token is not None - for _ in range(1, len(levels)): - yield DEDENT(token.lineno) - - -# The top-level filter adds an ENDMARKER, if requested. -# Python's grammar uses it. -def filter(lexer, add_endmarker = True): - token = None - tokens = iter(lexer.token, None) - tokens = track_tokens_filter(lexer, tokens) - for token in indentation_filter(tokens): - yield token - - if add_endmarker: - lineno = 1 - if token is not None: - lineno = token.lineno - yield _new_token("ENDMARKER", lineno) - -# Combine Ply and my filters into a new lexer - -class IndentLexer(object): - def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0): - self.lexer = lex.lex(debug=debug, optimize=optimize, lextab=lextab, reflags=reflags) - self.token_stream = None - def input(self, s, add_endmarker=True): - self.lexer.paren_count = 0 - self.lexer.input(s) - self.token_stream = filter(self.lexer, add_endmarker) - def token(self): - try: - return self.token_stream.next() - except StopIteration: - return None - -########## Parser (tokens -> AST) ###### - -# also part of Ply -#import yacc - -# I use the Python AST -from compiler import ast - -# Helper function -def Assign(left, right): - names = [] - if isinstance(left, ast.Name): - # Single assignment on left - return ast.Assign([ast.AssName(left.name, 'OP_ASSIGN')], right) - elif isinstance(left, ast.Tuple): - # List of things - make sure they are Name nodes - names = [] - for child in left.getChildren(): - if not isinstance(child, ast.Name): - raise SyntaxError("that assignment not supported") - names.append(child.name) - ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names] - return ast.Assign([ast.AssTuple(ass_list)], right) - else: - raise SyntaxError("Can't do that yet") - - -# The grammar comments come from Python's Grammar/Grammar file - -## NB: compound_stmt in single_input is followed by extra NEWLINE! -# file_input: (NEWLINE | stmt)* ENDMARKER -def p_file_input_end(p): - """file_input_end : file_input ENDMARKER""" - p[0] = ast.Stmt(p[1]) -def p_file_input(p): - """file_input : file_input NEWLINE - | file_input stmt - | NEWLINE - | stmt""" - if isinstance(p[len(p)-1], basestring): - if len(p) == 3: - p[0] = p[1] - else: - p[0] = [] # p == 2 --> only a blank line - else: - if len(p) == 3: - p[0] = p[1] + p[2] - else: - p[0] = p[1] - - -# funcdef: [decorators] 'def' NAME parameters ':' suite -# ignoring decorators -def p_funcdef(p): - "funcdef : DEF NAME parameters COLON suite" - p[0] = ast.Function(None, p[2], tuple(p[3]), (), 0, None, p[5]) - -# parameters: '(' [varargslist] ')' -def p_parameters(p): - """parameters : LPAR RPAR - | LPAR varargslist RPAR""" - if len(p) == 3: - p[0] = [] - else: - p[0] = p[2] - - -# varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) | -# highly simplified -def p_varargslist(p): - """varargslist : varargslist COMMA NAME - | NAME""" - if len(p) == 4: - p[0] = p[1] + p[3] - else: - p[0] = [p[1]] - -# stmt: simple_stmt | compound_stmt -def p_stmt_simple(p): - """stmt : simple_stmt""" - # simple_stmt is a list - p[0] = p[1] - -def p_stmt_compound(p): - """stmt : compound_stmt""" - p[0] = [p[1]] - -# simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE -def p_simple_stmt(p): - """simple_stmt : small_stmts NEWLINE - | small_stmts SEMICOLON NEWLINE""" - p[0] = p[1] - -def p_small_stmts(p): - """small_stmts : small_stmts SEMICOLON small_stmt - | small_stmt""" - if len(p) == 4: - p[0] = p[1] + [p[3]] - else: - p[0] = [p[1]] - -# small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | -# import_stmt | global_stmt | exec_stmt | assert_stmt -def p_small_stmt(p): - """small_stmt : flow_stmt - | expr_stmt""" - p[0] = p[1] - -# expr_stmt: testlist (augassign (yield_expr|testlist) | -# ('=' (yield_expr|testlist))*) -# augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | -# '<<=' | '>>=' | '**=' | '//=') -def p_expr_stmt(p): - """expr_stmt : testlist ASSIGN testlist - | testlist """ - if len(p) == 2: - # a list of expressions - p[0] = ast.Discard(p[1]) - else: - p[0] = Assign(p[1], p[3]) - -def p_flow_stmt(p): - "flow_stmt : return_stmt" - p[0] = p[1] - -# return_stmt: 'return' [testlist] -def p_return_stmt(p): - "return_stmt : RETURN testlist" - p[0] = ast.Return(p[2]) - - -def p_compound_stmt(p): - """compound_stmt : if_stmt - | funcdef""" - p[0] = p[1] - -def p_if_stmt(p): - 'if_stmt : IF test COLON suite' - p[0] = ast.If([(p[2], p[4])], None) - -def p_suite(p): - """suite : simple_stmt - | NEWLINE INDENT stmts DEDENT""" - if len(p) == 2: - p[0] = ast.Stmt(p[1]) - else: - p[0] = ast.Stmt(p[3]) - - -def p_stmts(p): - """stmts : stmts stmt - | stmt""" - if len(p) == 3: - p[0] = p[1] + p[2] - else: - p[0] = p[1] - -## No using Python's approach because Ply supports precedence - -# comparison: expr (comp_op expr)* -# arith_expr: term (('+'|'-') term)* -# term: factor (('*'|'/'|'%'|'//') factor)* -# factor: ('+'|'-'|'~') factor | power -# comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' - -def make_lt_compare((left, right)): - return ast.Compare(left, [('<', right),]) -def make_gt_compare((left, right)): - return ast.Compare(left, [('>', right),]) -def make_eq_compare((left, right)): - return ast.Compare(left, [('==', right),]) - - -binary_ops = { - "+": ast.Add, - "-": ast.Sub, - "*": ast.Mul, - "/": ast.Div, - "<": make_lt_compare, - ">": make_gt_compare, - "==": make_eq_compare, -} -unary_ops = { - "+": ast.UnaryAdd, - "-": ast.UnarySub, - } -precedence = ( - ("left", "EQ", "GT", "LT"), - ("left", "PLUS", "MINUS"), - ("left", "MULT", "DIV"), - ) - -def p_comparison(p): - """comparison : comparison PLUS comparison - | comparison MINUS comparison - | comparison MULT comparison - | comparison DIV comparison - | comparison LT comparison - | comparison EQ comparison - | comparison GT comparison - | PLUS comparison - | MINUS comparison - | power""" - if len(p) == 4: - p[0] = binary_ops[p[2]]((p[1], p[3])) - elif len(p) == 3: - p[0] = unary_ops[p[1]](p[2]) - else: - p[0] = p[1] - -# power: atom trailer* ['**' factor] -# trailers enables function calls. I only allow one level of calls -# so this is 'trailer' -def p_power(p): - """power : atom - | atom trailer""" - if len(p) == 2: - p[0] = p[1] - else: - if p[2][0] == "CALL": - p[0] = ast.CallFunc(p[1], p[2][1], None, None) - else: - raise AssertionError("not implemented") - -def p_atom_name(p): - """atom : NAME""" - p[0] = ast.Name(p[1]) - -def p_atom_number(p): - """atom : NUMBER - | STRING""" - p[0] = ast.Const(p[1]) - -def p_atom_tuple(p): - """atom : LPAR testlist RPAR""" - p[0] = p[2] - -# trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME -def p_trailer(p): - "trailer : LPAR arglist RPAR" - p[0] = ("CALL", p[2]) - -# testlist: test (',' test)* [','] -# Contains shift/reduce error -def p_testlist(p): - """testlist : testlist_multi COMMA - | testlist_multi """ - if len(p) == 2: - p[0] = p[1] - else: - # May need to promote singleton to tuple - if isinstance(p[1], list): - p[0] = p[1] - else: - p[0] = [p[1]] - # Convert into a tuple? - if isinstance(p[0], list): - p[0] = ast.Tuple(p[0]) - -def p_testlist_multi(p): - """testlist_multi : testlist_multi COMMA test - | test""" - if len(p) == 2: - # singleton - p[0] = p[1] - else: - if isinstance(p[1], list): - p[0] = p[1] + [p[3]] - else: - # singleton -> tuple - p[0] = [p[1], p[3]] - - -# test: or_test ['if' or_test 'else' test] | lambdef -# as I don't support 'and', 'or', and 'not' this works down to 'comparison' -def p_test(p): - "test : comparison" - p[0] = p[1] - - - -# arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test) -# XXX INCOMPLETE: this doesn't allow the trailing comma -def p_arglist(p): - """arglist : arglist COMMA argument - | argument""" - if len(p) == 4: - p[0] = p[1] + [p[3]] - else: - p[0] = [p[1]] - -# argument: test [gen_for] | test '=' test # Really [keyword '='] test -def p_argument(p): - "argument : test" - p[0] = p[1] - -def p_error(p): - #print "Error!", repr(p) - raise SyntaxError(p) - - -class GardenSnakeParser(object): - def __init__(self, lexer = None): - if lexer is None: - lexer = IndentLexer() - self.lexer = lexer - self.parser = yacc.yacc(start="file_input_end") - - def parse(self, code): - self.lexer.input(code) - result = self.parser.parse(lexer = self.lexer) - return ast.Module(None, result) - - -###### Code generation ###### - -from compiler import misc, syntax, pycodegen - -class GardenSnakeCompiler(object): - def __init__(self): - self.parser = GardenSnakeParser() - def compile(self, code, filename=""): - tree = self.parser.parse(code) - #print tree - misc.set_filename(filename, tree) - syntax.check(tree) - gen = pycodegen.ModuleCodeGenerator(tree) - code = gen.getCode() - return code - -####### Test code ####### - -compile = GardenSnakeCompiler().compile - -code = r""" - -print('LET\'S TRY THIS \\OUT') - -#Comment here -def x(a): - print('called with',a) - if a == 1: - return 2 - if a*2 > 10: return 999 / 4 - # Another comment here - - return a+2*3 - -ints = (1, 2, - 3, 4, -5) -print('mutiline-expression', ints) - -t = 4+1/3*2+6*(9-5+1) -print('predence test; should be 34+2/3:', t, t==(34+2/3)) - -print('numbers', 1,2,3,4,5) -if 1: - 8 - a=9 - print(x(a)) - -print(x(1)) -print(x(2)) -print(x(8),'3') -print('this is decimal', 1/5) -print('BIG DECIMAL', 1.234567891234567e12345) - -""" - -# Set up the GardenSnake run-time environment -def print_(*args): - print "-->", " ".join(map(str,args)) - -globals()["print"] = print_ - -compiled_code = compile(code) - -exec compiled_code in globals() -print "Done" diff --git a/ply/example/GardenSnake/README b/ply/example/GardenSnake/README deleted file mode 100644 index 4d8be2d..0000000 --- a/ply/example/GardenSnake/README +++ /dev/null @@ -1,5 +0,0 @@ -This example is Andrew Dalke's GardenSnake language. It shows how to process an -indentation-like language like Python. Further details can be found here: - -http://dalkescientific.com/writings/diary/archive/2006/08/30/gardensnake_language.html - diff --git a/ply/example/README b/ply/example/README deleted file mode 100644 index 63519b5..0000000 --- a/ply/example/README +++ /dev/null @@ -1,10 +0,0 @@ -Simple examples: - calc - Simple calculator - classcalc - Simple calculate defined as a class - -Complex examples - ansic - ANSI C grammar from K&R - BASIC - A small BASIC interpreter - GardenSnake - A simple python-like language - yply - Converts Unix yacc files to PLY programs. - diff --git a/ply/example/ansic/README b/ply/example/ansic/README deleted file mode 100644 index e049d3b..0000000 --- a/ply/example/ansic/README +++ /dev/null @@ -1,2 +0,0 @@ -This example is incomplete. Was going to specify an ANSI C parser. -This is part of it. diff --git a/ply/example/ansic/clex.py b/ply/example/ansic/clex.py deleted file mode 100644 index 37fdd8e..0000000 --- a/ply/example/ansic/clex.py +++ /dev/null @@ -1,164 +0,0 @@ -# ---------------------------------------------------------------------- -# clex.py -# -# A lexer for ANSI C. -# ---------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - -import ply.lex as lex - -# Reserved words -reserved = ( - 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', - 'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER', - 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF', - 'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE', - ) - -tokens = reserved + ( - # Literals (identifier, integer constant, float constant, string constant, char const) - 'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST', - - # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) - 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', - 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', - 'LOR', 'LAND', 'LNOT', - 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', - - # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) - 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', - 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', - - # Increment/decrement (++,--) - 'PLUSPLUS', 'MINUSMINUS', - - # Structure dereference (->) - 'ARROW', - - # Conditional operator (?) - 'CONDOP', - - # Delimeters ( ) [ ] { } , . ; : - 'LPAREN', 'RPAREN', - 'LBRACKET', 'RBRACKET', - 'LBRACE', 'RBRACE', - 'COMMA', 'PERIOD', 'SEMI', 'COLON', - - # Ellipsis (...) - 'ELLIPSIS', - ) - -# Completely ignored characters -t_ignore = ' \t\x0c' - -# Newlines -def t_NEWLINE(t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - -# Operators -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_MOD = r'%' -t_OR = r'\|' -t_AND = r'&' -t_NOT = r'~' -t_XOR = r'\^' -t_LSHIFT = r'<<' -t_RSHIFT = r'>>' -t_LOR = r'\|\|' -t_LAND = r'&&' -t_LNOT = r'!' -t_LT = r'<' -t_GT = r'>' -t_LE = r'<=' -t_GE = r'>=' -t_EQ = r'==' -t_NE = r'!=' - -# Assignment operators - -t_EQUALS = r'=' -t_TIMESEQUAL = r'\*=' -t_DIVEQUAL = r'/=' -t_MODEQUAL = r'%=' -t_PLUSEQUAL = r'\+=' -t_MINUSEQUAL = r'-=' -t_LSHIFTEQUAL = r'<<=' -t_RSHIFTEQUAL = r'>>=' -t_ANDEQUAL = r'&=' -t_OREQUAL = r'\|=' -t_XOREQUAL = r'^=' - -# Increment/decrement -t_PLUSPLUS = r'\+\+' -t_MINUSMINUS = r'--' - -# -> -t_ARROW = r'->' - -# ? -t_CONDOP = r'\?' - -# Delimeters -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_LBRACKET = r'\[' -t_RBRACKET = r'\]' -t_LBRACE = r'\{' -t_RBRACE = r'\}' -t_COMMA = r',' -t_PERIOD = r'\.' -t_SEMI = r';' -t_COLON = r':' -t_ELLIPSIS = r'\.\.\.' - -# Identifiers and reserved words - -reserved_map = { } -for r in reserved: - reserved_map[r.lower()] = r - -def t_ID(t): - r'[A-Za-z_][\w_]*' - t.type = reserved_map.get(t.value,"ID") - return t - -# Integer literal -t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?' - -# Floating literal -t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' - -# String literal -t_SCONST = r'\"([^\\\n]|(\\.))*?\"' - -# Character constant 'c' or L'c' -t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\'' - -# Comments -def t_comment(t): - r'/\*(.|\n)*?\*/' - t.lexer.lineno += t.value.count('\n') - -# Preprocessor directive (ignored) -def t_preprocessor(t): - r'\#(.)*?\n' - t.lexer.lineno += 1 - -def t_error(t): - print("Illegal character %s" % repr(t.value[0])) - t.lexer.skip(1) - -lexer = lex.lex(optimize=1) -if __name__ == "__main__": - lex.runmain(lexer) - - - - - diff --git a/ply/example/ansic/cparse.py b/ply/example/ansic/cparse.py deleted file mode 100644 index c9b9164..0000000 --- a/ply/example/ansic/cparse.py +++ /dev/null @@ -1,863 +0,0 @@ -# ----------------------------------------------------------------------------- -# cparse.py -# -# Simple parser for ANSI C. Based on the grammar in K&R, 2nd Ed. -# ----------------------------------------------------------------------------- - -import sys -import clex -import ply.yacc as yacc - -# Get the token map -tokens = clex.tokens - -# translation-unit: - -def p_translation_unit_1(t): - 'translation_unit : external_declaration' - pass - -def p_translation_unit_2(t): - 'translation_unit : translation_unit external_declaration' - pass - -# external-declaration: - -def p_external_declaration_1(t): - 'external_declaration : function_definition' - pass - -def p_external_declaration_2(t): - 'external_declaration : declaration' - pass - -# function-definition: - -def p_function_definition_1(t): - 'function_definition : declaration_specifiers declarator declaration_list compound_statement' - pass - -def p_function_definition_2(t): - 'function_definition : declarator declaration_list compound_statement' - pass - -def p_function_definition_3(t): - 'function_definition : declarator compound_statement' - pass - -def p_function_definition_4(t): - 'function_definition : declaration_specifiers declarator compound_statement' - pass - -# declaration: - -def p_declaration_1(t): - 'declaration : declaration_specifiers init_declarator_list SEMI' - pass - -def p_declaration_2(t): - 'declaration : declaration_specifiers SEMI' - pass - -# declaration-list: - -def p_declaration_list_1(t): - 'declaration_list : declaration' - pass - -def p_declaration_list_2(t): - 'declaration_list : declaration_list declaration ' - pass - -# declaration-specifiers -def p_declaration_specifiers_1(t): - 'declaration_specifiers : storage_class_specifier declaration_specifiers' - pass - -def p_declaration_specifiers_2(t): - 'declaration_specifiers : type_specifier declaration_specifiers' - pass - -def p_declaration_specifiers_3(t): - 'declaration_specifiers : type_qualifier declaration_specifiers' - pass - -def p_declaration_specifiers_4(t): - 'declaration_specifiers : storage_class_specifier' - pass - -def p_declaration_specifiers_5(t): - 'declaration_specifiers : type_specifier' - pass - -def p_declaration_specifiers_6(t): - 'declaration_specifiers : type_qualifier' - pass - -# storage-class-specifier -def p_storage_class_specifier(t): - '''storage_class_specifier : AUTO - | REGISTER - | STATIC - | EXTERN - | TYPEDEF - ''' - pass - -# type-specifier: -def p_type_specifier(t): - '''type_specifier : VOID - | CHAR - | SHORT - | INT - | LONG - | FLOAT - | DOUBLE - | SIGNED - | UNSIGNED - | struct_or_union_specifier - | enum_specifier - | TYPEID - ''' - pass - -# type-qualifier: -def p_type_qualifier(t): - '''type_qualifier : CONST - | VOLATILE''' - pass - -# struct-or-union-specifier - -def p_struct_or_union_specifier_1(t): - 'struct_or_union_specifier : struct_or_union ID LBRACE struct_declaration_list RBRACE' - pass - -def p_struct_or_union_specifier_2(t): - 'struct_or_union_specifier : struct_or_union LBRACE struct_declaration_list RBRACE' - pass - -def p_struct_or_union_specifier_3(t): - 'struct_or_union_specifier : struct_or_union ID' - pass - -# struct-or-union: -def p_struct_or_union(t): - '''struct_or_union : STRUCT - | UNION - ''' - pass - -# struct-declaration-list: - -def p_struct_declaration_list_1(t): - 'struct_declaration_list : struct_declaration' - pass - -def p_struct_declaration_list_2(t): - 'struct_declaration_list : struct_declaration_list struct_declaration' - pass - -# init-declarator-list: - -def p_init_declarator_list_1(t): - 'init_declarator_list : init_declarator' - pass - -def p_init_declarator_list_2(t): - 'init_declarator_list : init_declarator_list COMMA init_declarator' - pass - -# init-declarator - -def p_init_declarator_1(t): - 'init_declarator : declarator' - pass - -def p_init_declarator_2(t): - 'init_declarator : declarator EQUALS initializer' - pass - -# struct-declaration: - -def p_struct_declaration(t): - 'struct_declaration : specifier_qualifier_list struct_declarator_list SEMI' - pass - -# specifier-qualifier-list: - -def p_specifier_qualifier_list_1(t): - 'specifier_qualifier_list : type_specifier specifier_qualifier_list' - pass - -def p_specifier_qualifier_list_2(t): - 'specifier_qualifier_list : type_specifier' - pass - -def p_specifier_qualifier_list_3(t): - 'specifier_qualifier_list : type_qualifier specifier_qualifier_list' - pass - -def p_specifier_qualifier_list_4(t): - 'specifier_qualifier_list : type_qualifier' - pass - -# struct-declarator-list: - -def p_struct_declarator_list_1(t): - 'struct_declarator_list : struct_declarator' - pass - -def p_struct_declarator_list_2(t): - 'struct_declarator_list : struct_declarator_list COMMA struct_declarator' - pass - -# struct-declarator: - -def p_struct_declarator_1(t): - 'struct_declarator : declarator' - pass - -def p_struct_declarator_2(t): - 'struct_declarator : declarator COLON constant_expression' - pass - -def p_struct_declarator_3(t): - 'struct_declarator : COLON constant_expression' - pass - -# enum-specifier: - -def p_enum_specifier_1(t): - 'enum_specifier : ENUM ID LBRACE enumerator_list RBRACE' - pass - -def p_enum_specifier_2(t): - 'enum_specifier : ENUM LBRACE enumerator_list RBRACE' - pass - -def p_enum_specifier_3(t): - 'enum_specifier : ENUM ID' - pass - -# enumerator_list: -def p_enumerator_list_1(t): - 'enumerator_list : enumerator' - pass - -def p_enumerator_list_2(t): - 'enumerator_list : enumerator_list COMMA enumerator' - pass - -# enumerator: -def p_enumerator_1(t): - 'enumerator : ID' - pass - -def p_enumerator_2(t): - 'enumerator : ID EQUALS constant_expression' - pass - -# declarator: - -def p_declarator_1(t): - 'declarator : pointer direct_declarator' - pass - -def p_declarator_2(t): - 'declarator : direct_declarator' - pass - -# direct-declarator: - -def p_direct_declarator_1(t): - 'direct_declarator : ID' - pass - -def p_direct_declarator_2(t): - 'direct_declarator : LPAREN declarator RPAREN' - pass - -def p_direct_declarator_3(t): - 'direct_declarator : direct_declarator LBRACKET constant_expression_opt RBRACKET' - pass - -def p_direct_declarator_4(t): - 'direct_declarator : direct_declarator LPAREN parameter_type_list RPAREN ' - pass - -def p_direct_declarator_5(t): - 'direct_declarator : direct_declarator LPAREN identifier_list RPAREN ' - pass - -def p_direct_declarator_6(t): - 'direct_declarator : direct_declarator LPAREN RPAREN ' - pass - -# pointer: -def p_pointer_1(t): - 'pointer : TIMES type_qualifier_list' - pass - -def p_pointer_2(t): - 'pointer : TIMES' - pass - -def p_pointer_3(t): - 'pointer : TIMES type_qualifier_list pointer' - pass - -def p_pointer_4(t): - 'pointer : TIMES pointer' - pass - -# type-qualifier-list: - -def p_type_qualifier_list_1(t): - 'type_qualifier_list : type_qualifier' - pass - -def p_type_qualifier_list_2(t): - 'type_qualifier_list : type_qualifier_list type_qualifier' - pass - -# parameter-type-list: - -def p_parameter_type_list_1(t): - 'parameter_type_list : parameter_list' - pass - -def p_parameter_type_list_2(t): - 'parameter_type_list : parameter_list COMMA ELLIPSIS' - pass - -# parameter-list: - -def p_parameter_list_1(t): - 'parameter_list : parameter_declaration' - pass - -def p_parameter_list_2(t): - 'parameter_list : parameter_list COMMA parameter_declaration' - pass - -# parameter-declaration: -def p_parameter_declaration_1(t): - 'parameter_declaration : declaration_specifiers declarator' - pass - -def p_parameter_declaration_2(t): - 'parameter_declaration : declaration_specifiers abstract_declarator_opt' - pass - -# identifier-list: -def p_identifier_list_1(t): - 'identifier_list : ID' - pass - -def p_identifier_list_2(t): - 'identifier_list : identifier_list COMMA ID' - pass - -# initializer: - -def p_initializer_1(t): - 'initializer : assignment_expression' - pass - -def p_initializer_2(t): - '''initializer : LBRACE initializer_list RBRACE - | LBRACE initializer_list COMMA RBRACE''' - pass - -# initializer-list: - -def p_initializer_list_1(t): - 'initializer_list : initializer' - pass - -def p_initializer_list_2(t): - 'initializer_list : initializer_list COMMA initializer' - pass - -# type-name: - -def p_type_name(t): - 'type_name : specifier_qualifier_list abstract_declarator_opt' - pass - -def p_abstract_declarator_opt_1(t): - 'abstract_declarator_opt : empty' - pass - -def p_abstract_declarator_opt_2(t): - 'abstract_declarator_opt : abstract_declarator' - pass - -# abstract-declarator: - -def p_abstract_declarator_1(t): - 'abstract_declarator : pointer ' - pass - -def p_abstract_declarator_2(t): - 'abstract_declarator : pointer direct_abstract_declarator' - pass - -def p_abstract_declarator_3(t): - 'abstract_declarator : direct_abstract_declarator' - pass - -# direct-abstract-declarator: - -def p_direct_abstract_declarator_1(t): - 'direct_abstract_declarator : LPAREN abstract_declarator RPAREN' - pass - -def p_direct_abstract_declarator_2(t): - 'direct_abstract_declarator : direct_abstract_declarator LBRACKET constant_expression_opt RBRACKET' - pass - -def p_direct_abstract_declarator_3(t): - 'direct_abstract_declarator : LBRACKET constant_expression_opt RBRACKET' - pass - -def p_direct_abstract_declarator_4(t): - 'direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN' - pass - -def p_direct_abstract_declarator_5(t): - 'direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN' - pass - -# Optional fields in abstract declarators - -def p_constant_expression_opt_1(t): - 'constant_expression_opt : empty' - pass - -def p_constant_expression_opt_2(t): - 'constant_expression_opt : constant_expression' - pass - -def p_parameter_type_list_opt_1(t): - 'parameter_type_list_opt : empty' - pass - -def p_parameter_type_list_opt_2(t): - 'parameter_type_list_opt : parameter_type_list' - pass - -# statement: - -def p_statement(t): - ''' - statement : labeled_statement - | expression_statement - | compound_statement - | selection_statement - | iteration_statement - | jump_statement - ''' - pass - -# labeled-statement: - -def p_labeled_statement_1(t): - 'labeled_statement : ID COLON statement' - pass - -def p_labeled_statement_2(t): - 'labeled_statement : CASE constant_expression COLON statement' - pass - -def p_labeled_statement_3(t): - 'labeled_statement : DEFAULT COLON statement' - pass - -# expression-statement: -def p_expression_statement(t): - 'expression_statement : expression_opt SEMI' - pass - -# compound-statement: - -def p_compound_statement_1(t): - 'compound_statement : LBRACE declaration_list statement_list RBRACE' - pass - -def p_compound_statement_2(t): - 'compound_statement : LBRACE statement_list RBRACE' - pass - -def p_compound_statement_3(t): - 'compound_statement : LBRACE declaration_list RBRACE' - pass - -def p_compound_statement_4(t): - 'compound_statement : LBRACE RBRACE' - pass - -# statement-list: - -def p_statement_list_1(t): - 'statement_list : statement' - pass - -def p_statement_list_2(t): - 'statement_list : statement_list statement' - pass - -# selection-statement - -def p_selection_statement_1(t): - 'selection_statement : IF LPAREN expression RPAREN statement' - pass - -def p_selection_statement_2(t): - 'selection_statement : IF LPAREN expression RPAREN statement ELSE statement ' - pass - -def p_selection_statement_3(t): - 'selection_statement : SWITCH LPAREN expression RPAREN statement ' - pass - -# iteration_statement: - -def p_iteration_statement_1(t): - 'iteration_statement : WHILE LPAREN expression RPAREN statement' - pass - -def p_iteration_statement_2(t): - 'iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN statement ' - pass - -def p_iteration_statement_3(t): - 'iteration_statement : DO statement WHILE LPAREN expression RPAREN SEMI' - pass - -# jump_statement: - -def p_jump_statement_1(t): - 'jump_statement : GOTO ID SEMI' - pass - -def p_jump_statement_2(t): - 'jump_statement : CONTINUE SEMI' - pass - -def p_jump_statement_3(t): - 'jump_statement : BREAK SEMI' - pass - -def p_jump_statement_4(t): - 'jump_statement : RETURN expression_opt SEMI' - pass - -def p_expression_opt_1(t): - 'expression_opt : empty' - pass - -def p_expression_opt_2(t): - 'expression_opt : expression' - pass - -# expression: -def p_expression_1(t): - 'expression : assignment_expression' - pass - -def p_expression_2(t): - 'expression : expression COMMA assignment_expression' - pass - -# assigment_expression: -def p_assignment_expression_1(t): - 'assignment_expression : conditional_expression' - pass - -def p_assignment_expression_2(t): - 'assignment_expression : unary_expression assignment_operator assignment_expression' - pass - -# assignment_operator: -def p_assignment_operator(t): - ''' - assignment_operator : EQUALS - | TIMESEQUAL - | DIVEQUAL - | MODEQUAL - | PLUSEQUAL - | MINUSEQUAL - | LSHIFTEQUAL - | RSHIFTEQUAL - | ANDEQUAL - | OREQUAL - | XOREQUAL - ''' - pass - -# conditional-expression -def p_conditional_expression_1(t): - 'conditional_expression : logical_or_expression' - pass - -def p_conditional_expression_2(t): - 'conditional_expression : logical_or_expression CONDOP expression COLON conditional_expression ' - pass - -# constant-expression - -def p_constant_expression(t): - 'constant_expression : conditional_expression' - pass - -# logical-or-expression - -def p_logical_or_expression_1(t): - 'logical_or_expression : logical_and_expression' - pass - -def p_logical_or_expression_2(t): - 'logical_or_expression : logical_or_expression LOR logical_and_expression' - pass - -# logical-and-expression - -def p_logical_and_expression_1(t): - 'logical_and_expression : inclusive_or_expression' - pass - -def p_logical_and_expression_2(t): - 'logical_and_expression : logical_and_expression LAND inclusive_or_expression' - pass - -# inclusive-or-expression: - -def p_inclusive_or_expression_1(t): - 'inclusive_or_expression : exclusive_or_expression' - pass - -def p_inclusive_or_expression_2(t): - 'inclusive_or_expression : inclusive_or_expression OR exclusive_or_expression' - pass - -# exclusive-or-expression: - -def p_exclusive_or_expression_1(t): - 'exclusive_or_expression : and_expression' - pass - -def p_exclusive_or_expression_2(t): - 'exclusive_or_expression : exclusive_or_expression XOR and_expression' - pass - -# AND-expression - -def p_and_expression_1(t): - 'and_expression : equality_expression' - pass - -def p_and_expression_2(t): - 'and_expression : and_expression AND equality_expression' - pass - - -# equality-expression: -def p_equality_expression_1(t): - 'equality_expression : relational_expression' - pass - -def p_equality_expression_2(t): - 'equality_expression : equality_expression EQ relational_expression' - pass - -def p_equality_expression_3(t): - 'equality_expression : equality_expression NE relational_expression' - pass - - -# relational-expression: -def p_relational_expression_1(t): - 'relational_expression : shift_expression' - pass - -def p_relational_expression_2(t): - 'relational_expression : relational_expression LT shift_expression' - pass - -def p_relational_expression_3(t): - 'relational_expression : relational_expression GT shift_expression' - pass - -def p_relational_expression_4(t): - 'relational_expression : relational_expression LE shift_expression' - pass - -def p_relational_expression_5(t): - 'relational_expression : relational_expression GE shift_expression' - pass - -# shift-expression - -def p_shift_expression_1(t): - 'shift_expression : additive_expression' - pass - -def p_shift_expression_2(t): - 'shift_expression : shift_expression LSHIFT additive_expression' - pass - -def p_shift_expression_3(t): - 'shift_expression : shift_expression RSHIFT additive_expression' - pass - -# additive-expression - -def p_additive_expression_1(t): - 'additive_expression : multiplicative_expression' - pass - -def p_additive_expression_2(t): - 'additive_expression : additive_expression PLUS multiplicative_expression' - pass - -def p_additive_expression_3(t): - 'additive_expression : additive_expression MINUS multiplicative_expression' - pass - -# multiplicative-expression - -def p_multiplicative_expression_1(t): - 'multiplicative_expression : cast_expression' - pass - -def p_multiplicative_expression_2(t): - 'multiplicative_expression : multiplicative_expression TIMES cast_expression' - pass - -def p_multiplicative_expression_3(t): - 'multiplicative_expression : multiplicative_expression DIVIDE cast_expression' - pass - -def p_multiplicative_expression_4(t): - 'multiplicative_expression : multiplicative_expression MOD cast_expression' - pass - -# cast-expression: - -def p_cast_expression_1(t): - 'cast_expression : unary_expression' - pass - -def p_cast_expression_2(t): - 'cast_expression : LPAREN type_name RPAREN cast_expression' - pass - -# unary-expression: -def p_unary_expression_1(t): - 'unary_expression : postfix_expression' - pass - -def p_unary_expression_2(t): - 'unary_expression : PLUSPLUS unary_expression' - pass - -def p_unary_expression_3(t): - 'unary_expression : MINUSMINUS unary_expression' - pass - -def p_unary_expression_4(t): - 'unary_expression : unary_operator cast_expression' - pass - -def p_unary_expression_5(t): - 'unary_expression : SIZEOF unary_expression' - pass - -def p_unary_expression_6(t): - 'unary_expression : SIZEOF LPAREN type_name RPAREN' - pass - -#unary-operator -def p_unary_operator(t): - '''unary_operator : AND - | TIMES - | PLUS - | MINUS - | NOT - | LNOT ''' - pass - -# postfix-expression: -def p_postfix_expression_1(t): - 'postfix_expression : primary_expression' - pass - -def p_postfix_expression_2(t): - 'postfix_expression : postfix_expression LBRACKET expression RBRACKET' - pass - -def p_postfix_expression_3(t): - 'postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN' - pass - -def p_postfix_expression_4(t): - 'postfix_expression : postfix_expression LPAREN RPAREN' - pass - -def p_postfix_expression_5(t): - 'postfix_expression : postfix_expression PERIOD ID' - pass - -def p_postfix_expression_6(t): - 'postfix_expression : postfix_expression ARROW ID' - pass - -def p_postfix_expression_7(t): - 'postfix_expression : postfix_expression PLUSPLUS' - pass - -def p_postfix_expression_8(t): - 'postfix_expression : postfix_expression MINUSMINUS' - pass - -# primary-expression: -def p_primary_expression(t): - '''primary_expression : ID - | constant - | SCONST - | LPAREN expression RPAREN''' - pass - -# argument-expression-list: -def p_argument_expression_list(t): - '''argument_expression_list : assignment_expression - | argument_expression_list COMMA assignment_expression''' - pass - -# constant: -def p_constant(t): - '''constant : ICONST - | FCONST - | CCONST''' - pass - - -def p_empty(t): - 'empty : ' - pass - -def p_error(t): - print("Whoa. We're hosed") - -import profile -# Build the grammar - -yacc.yacc(method='LALR') - -#profile.run("yacc.yacc(method='LALR')") - - - - diff --git a/ply/example/calc/calc.py b/ply/example/calc/calc.py deleted file mode 100644 index b923780..0000000 --- a/ply/example/calc/calc.py +++ /dev/null @@ -1,107 +0,0 @@ -# ----------------------------------------------------------------------------- -# calc.py -# -# A simple calculator with variables. This is from O'Reilly's -# "Lex and Yacc", p. 63. -# ----------------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - -if sys.version_info[0] >= 3: - raw_input = input - -tokens = ( - 'NAME','NUMBER', - ) - -literals = ['=','+','-','*','/', '(',')'] - -# Tokens - -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - t.value = int(t.value) - return t - -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -import ply.lex as lex -lex.lex() - -# Parsing rules - -precedence = ( - ('left','+','-'), - ('left','*','/'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(p): - 'statement : NAME "=" expression' - names[p[1]] = p[3] - -def p_statement_expr(p): - 'statement : expression' - print(p[1]) - -def p_expression_binop(p): - '''expression : expression '+' expression - | expression '-' expression - | expression '*' expression - | expression '/' expression''' - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] - -def p_expression_uminus(p): - "expression : '-' expression %prec UMINUS" - p[0] = -p[2] - -def p_expression_group(p): - "expression : '(' expression ')'" - p[0] = p[2] - -def p_expression_number(p): - "expression : NUMBER" - p[0] = p[1] - -def p_expression_name(p): - "expression : NAME" - try: - p[0] = names[p[1]] - except LookupError: - print("Undefined name '%s'" % p[1]) - p[0] = 0 - -def p_error(p): - if p: - print("Syntax error at '%s'" % p.value) - else: - print("Syntax error at EOF") - -import ply.yacc as yacc -yacc.yacc() - -while 1: - try: - s = raw_input('calc > ') - except EOFError: - break - if not s: continue - yacc.parse(s) diff --git a/ply/example/calcdebug/calc.py b/ply/example/calcdebug/calc.py deleted file mode 100644 index 6732f9f..0000000 --- a/ply/example/calcdebug/calc.py +++ /dev/null @@ -1,113 +0,0 @@ -# ----------------------------------------------------------------------------- -# calc.py -# -# This example shows how to run the parser in a debugging mode -# with output routed to a logging object. -# ----------------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - -if sys.version_info[0] >= 3: - raw_input = input - -tokens = ( - 'NAME','NUMBER', - ) - -literals = ['=','+','-','*','/', '(',')'] - -# Tokens - -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - t.value = int(t.value) - return t - -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -import ply.lex as lex -lex.lex() - -# Parsing rules - -precedence = ( - ('left','+','-'), - ('left','*','/'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(p): - 'statement : NAME "=" expression' - names[p[1]] = p[3] - -def p_statement_expr(p): - 'statement : expression' - print(p[1]) - -def p_expression_binop(p): - '''expression : expression '+' expression - | expression '-' expression - | expression '*' expression - | expression '/' expression''' - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] - -def p_expression_uminus(p): - "expression : '-' expression %prec UMINUS" - p[0] = -p[2] - -def p_expression_group(p): - "expression : '(' expression ')'" - p[0] = p[2] - -def p_expression_number(p): - "expression : NUMBER" - p[0] = p[1] - -def p_expression_name(p): - "expression : NAME" - try: - p[0] = names[p[1]] - except LookupError: - print("Undefined name '%s'" % p[1]) - p[0] = 0 - -def p_error(p): - if p: - print("Syntax error at '%s'" % p.value) - else: - print("Syntax error at EOF") - -import ply.yacc as yacc -yacc.yacc() - -import logging -logging.basicConfig( - level=logging.INFO, - filename="parselog.txt" -) - -while 1: - try: - s = raw_input('calc > ') - except EOFError: - break - if not s: continue - yacc.parse(s,debug=logging.getLogger()) diff --git a/ply/example/classcalc/calc.py b/ply/example/classcalc/calc.py deleted file mode 100755 index bf0d065..0000000 --- a/ply/example/classcalc/calc.py +++ /dev/null @@ -1,157 +0,0 @@ -#!/usr/bin/env python - -# ----------------------------------------------------------------------------- -# calc.py -# -# A simple calculator with variables. This is from O'Reilly's -# "Lex and Yacc", p. 63. -# -# Class-based example contributed to PLY by David McNab -# ----------------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - -if sys.version_info[0] >= 3: - raw_input = input - -import ply.lex as lex -import ply.yacc as yacc -import os - -class Parser: - """ - Base class for a lexer/parser that has the rules defined as methods - """ - tokens = () - precedence = () - - def __init__(self, **kw): - self.debug = kw.get('debug', 0) - self.names = { } - try: - modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__ - except: - modname = "parser"+"_"+self.__class__.__name__ - self.debugfile = modname + ".dbg" - self.tabmodule = modname + "_" + "parsetab" - #print self.debugfile, self.tabmodule - - # Build the lexer and parser - lex.lex(module=self, debug=self.debug) - yacc.yacc(module=self, - debug=self.debug, - debugfile=self.debugfile, - tabmodule=self.tabmodule) - - def run(self): - while 1: - try: - s = raw_input('calc > ') - except EOFError: - break - if not s: continue - yacc.parse(s) - - -class Calc(Parser): - - tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','EXP', 'TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - - # Tokens - - t_PLUS = r'\+' - t_MINUS = r'-' - t_EXP = r'\*\*' - t_TIMES = r'\*' - t_DIVIDE = r'/' - t_EQUALS = r'=' - t_LPAREN = r'\(' - t_RPAREN = r'\)' - t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - - def t_NUMBER(self, t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - #print "parsed number %s" % repr(t.value) - return t - - t_ignore = " \t" - - def t_newline(self, t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - - def t_error(self, t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - - # Parsing rules - - precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('left', 'EXP'), - ('right','UMINUS'), - ) - - def p_statement_assign(self, p): - 'statement : NAME EQUALS expression' - self.names[p[1]] = p[3] - - def p_statement_expr(self, p): - 'statement : expression' - print(p[1]) - - def p_expression_binop(self, p): - """ - expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression - | expression EXP expression - """ - #print [repr(p[i]) for i in range(0,4)] - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] - elif p[2] == '**': p[0] = p[1] ** p[3] - - def p_expression_uminus(self, p): - 'expression : MINUS expression %prec UMINUS' - p[0] = -p[2] - - def p_expression_group(self, p): - 'expression : LPAREN expression RPAREN' - p[0] = p[2] - - def p_expression_number(self, p): - 'expression : NUMBER' - p[0] = p[1] - - def p_expression_name(self, p): - 'expression : NAME' - try: - p[0] = self.names[p[1]] - except LookupError: - print("Undefined name '%s'" % p[1]) - p[0] = 0 - - def p_error(self, p): - if p: - print("Syntax error at '%s'" % p.value) - else: - print("Syntax error at EOF") - -if __name__ == '__main__': - calc = Calc() - calc.run() diff --git a/ply/example/classcalc/calc_Calc_parsetab.py b/ply/example/classcalc/calc_Calc_parsetab.py deleted file mode 100644 index 6ec0d30..0000000 --- a/ply/example/classcalc/calc_Calc_parsetab.py +++ /dev/null @@ -1,40 +0,0 @@ - -# calc_Calc_parsetab.py -# This file is automatically generated. Do not edit. -_tabversion = '3.2' - -_lr_method = 'LALR' - -_lr_signature = '|\x0f"\xe2\x0e\xf7\x0fT\x15K\x1c\xc0\x1e\xa3c\x10' - -_lr_action_items = {'$end':([1,2,3,5,9,15,16,17,18,19,20,21,22,],[-11,-10,0,-2,-11,-8,-1,-9,-6,-5,-3,-7,-4,]),'RPAREN':([2,8,9,15,17,18,19,20,21,22,],[-10,17,-11,-8,-9,-6,-5,-3,-7,-4,]),'DIVIDE':([1,2,5,8,9,15,16,17,18,19,20,21,22,],[-11,-10,10,10,-11,-8,10,-9,-6,-5,10,-7,10,]),'EQUALS':([1,],[7,]),'NUMBER':([0,4,6,7,10,11,12,13,14,],[2,2,2,2,2,2,2,2,2,]),'PLUS':([1,2,5,8,9,15,16,17,18,19,20,21,22,],[-11,-10,12,12,-11,-8,12,-9,-6,-5,-3,-7,-4,]),'LPAREN':([0,4,6,7,10,11,12,13,14,],[4,4,4,4,4,4,4,4,4,]),'EXP':([1,2,5,8,9,15,16,17,18,19,20,21,22,],[-11,-10,13,13,-11,-8,13,-9,13,13,13,-7,13,]),'TIMES':([1,2,5,8,9,15,16,17,18,19,20,21,22,],[-11,-10,11,11,-11,-8,11,-9,-6,-5,11,-7,11,]),'MINUS':([0,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,],[6,-11,-10,6,14,6,6,14,-11,6,6,6,6,6,-8,14,-9,-6,-5,-3,-7,-4,]),'NAME':([0,4,6,7,10,11,12,13,14,],[1,9,9,9,9,9,9,9,9,]),} - -_lr_action = { } -for _k, _v in _lr_action_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_action: _lr_action[_x] = { } - _lr_action[_x][_k] = _y -del _lr_action_items - -_lr_goto_items = {'expression':([0,4,6,7,10,11,12,13,14,],[5,8,15,16,18,19,20,21,22,]),'statement':([0,],[3,]),} - -_lr_goto = { } -for _k, _v in _lr_goto_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_goto: _lr_goto[_x] = { } - _lr_goto[_x][_k] = _y -del _lr_goto_items -_lr_productions = [ - ("S' -> statement","S'",1,None,None,None), - ('statement -> NAME EQUALS expression','statement',3,'p_statement_assign','./calc.py',107), - ('statement -> expression','statement',1,'p_statement_expr','./calc.py',111), - ('expression -> expression PLUS expression','expression',3,'p_expression_binop','./calc.py',116), - ('expression -> expression MINUS expression','expression',3,'p_expression_binop','./calc.py',117), - ('expression -> expression TIMES expression','expression',3,'p_expression_binop','./calc.py',118), - ('expression -> expression DIVIDE expression','expression',3,'p_expression_binop','./calc.py',119), - ('expression -> expression EXP expression','expression',3,'p_expression_binop','./calc.py',120), - ('expression -> MINUS expression','expression',2,'p_expression_uminus','./calc.py',130), - ('expression -> LPAREN expression RPAREN','expression',3,'p_expression_group','./calc.py',134), - ('expression -> NUMBER','expression',1,'p_expression_number','./calc.py',138), - ('expression -> NAME','expression',1,'p_expression_name','./calc.py',142), -] diff --git a/ply/example/cleanup.sh b/ply/example/cleanup.sh deleted file mode 100755 index 3e115f4..0000000 --- a/ply/example/cleanup.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -rm -f */*.pyc */parsetab.py */parser.out */*~ */*.class diff --git a/ply/example/closurecalc/calc.py b/ply/example/closurecalc/calc.py deleted file mode 100644 index 6598f58..0000000 --- a/ply/example/closurecalc/calc.py +++ /dev/null @@ -1,130 +0,0 @@ -# ----------------------------------------------------------------------------- -# calc.py -# -# A calculator parser that makes use of closures. The function make_calculator() -# returns a function that accepts an input string and returns a result. All -# lexing rules, parsing rules, and internal state are held inside the function. -# ----------------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - -if sys.version_info[0] >= 3: - raw_input = input - -# Make a calculator function - -def make_calculator(): - import ply.lex as lex - import ply.yacc as yacc - - # ------- Internal calculator state - - variables = { } # Dictionary of stored variables - - # ------- Calculator tokenizing rules - - tokens = ( - 'NAME','NUMBER', - ) - - literals = ['=','+','-','*','/', '(',')'] - - t_ignore = " \t" - - t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - - def t_NUMBER(t): - r'\d+' - t.value = int(t.value) - return t - - def t_newline(t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - - def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - - # Build the lexer - lexer = lex.lex() - - # ------- Calculator parsing rules - - precedence = ( - ('left','+','-'), - ('left','*','/'), - ('right','UMINUS'), - ) - - def p_statement_assign(p): - 'statement : NAME "=" expression' - variables[p[1]] = p[3] - p[0] = None - - def p_statement_expr(p): - 'statement : expression' - p[0] = p[1] - - def p_expression_binop(p): - '''expression : expression '+' expression - | expression '-' expression - | expression '*' expression - | expression '/' expression''' - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] - - def p_expression_uminus(p): - "expression : '-' expression %prec UMINUS" - p[0] = -p[2] - - def p_expression_group(p): - "expression : '(' expression ')'" - p[0] = p[2] - - def p_expression_number(p): - "expression : NUMBER" - p[0] = p[1] - - def p_expression_name(p): - "expression : NAME" - try: - p[0] = variables[p[1]] - except LookupError: - print("Undefined name '%s'" % p[1]) - p[0] = 0 - - def p_error(p): - if p: - print("Syntax error at '%s'" % p.value) - else: - print("Syntax error at EOF") - - - # Build the parser - parser = yacc.yacc() - - # ------- Input function - - def input(text): - result = parser.parse(text,lexer=lexer) - return result - - return input - -# Make a calculator object and use it -calc = make_calculator() - -while True: - try: - s = raw_input("calc > ") - except EOFError: - break - r = calc(s) - if r: - print(r) - - diff --git a/ply/example/hedit/hedit.py b/ply/example/hedit/hedit.py deleted file mode 100644 index 2e80675..0000000 --- a/ply/example/hedit/hedit.py +++ /dev/null @@ -1,48 +0,0 @@ -# ----------------------------------------------------------------------------- -# hedit.py -# -# Paring of Fortran H Edit descriptions (Contributed by Pearu Peterson) -# -# These tokens can't be easily tokenized because they are of the following -# form: -# -# nHc1...cn -# -# where n is a positive integer and c1 ... cn are characters. -# -# This example shows how to modify the state of the lexer to parse -# such tokens -# ----------------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - - -tokens = ( - 'H_EDIT_DESCRIPTOR', - ) - -# Tokens -t_ignore = " \t\n" - -def t_H_EDIT_DESCRIPTOR(t): - r"\d+H.*" # This grabs all of the remaining text - i = t.value.index('H') - n = eval(t.value[:i]) - - # Adjust the tokenizing position - t.lexer.lexpos -= len(t.value) - (i+1+n) - - t.value = t.value[i+1:i+1+n] - return t - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -import ply.lex as lex -lex.lex() -lex.runmain() - - diff --git a/ply/example/newclasscalc/calc.py b/ply/example/newclasscalc/calc.py deleted file mode 100755 index a12e498..0000000 --- a/ply/example/newclasscalc/calc.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python - -# ----------------------------------------------------------------------------- -# calc.py -# -# A simple calculator with variables. This is from O'Reilly's -# "Lex and Yacc", p. 63. -# -# Class-based example contributed to PLY by David McNab. -# -# Modified to use new-style classes. Test case. -# ----------------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - -if sys.version_info[0] >= 3: - raw_input = input - -import ply.lex as lex -import ply.yacc as yacc -import os - -class Parser(object): - """ - Base class for a lexer/parser that has the rules defined as methods - """ - tokens = () - precedence = () - - - def __init__(self, **kw): - self.debug = kw.get('debug', 0) - self.names = { } - try: - modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__ - except: - modname = "parser"+"_"+self.__class__.__name__ - self.debugfile = modname + ".dbg" - self.tabmodule = modname + "_" + "parsetab" - #print self.debugfile, self.tabmodule - - # Build the lexer and parser - lex.lex(module=self, debug=self.debug) - yacc.yacc(module=self, - debug=self.debug, - debugfile=self.debugfile, - tabmodule=self.tabmodule) - - def run(self): - while 1: - try: - s = raw_input('calc > ') - except EOFError: - break - if not s: continue - yacc.parse(s) - - -class Calc(Parser): - - tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','EXP', 'TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - - # Tokens - - t_PLUS = r'\+' - t_MINUS = r'-' - t_EXP = r'\*\*' - t_TIMES = r'\*' - t_DIVIDE = r'/' - t_EQUALS = r'=' - t_LPAREN = r'\(' - t_RPAREN = r'\)' - t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - - def t_NUMBER(self, t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - #print "parsed number %s" % repr(t.value) - return t - - t_ignore = " \t" - - def t_newline(self, t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - - def t_error(self, t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - - # Parsing rules - - precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('left', 'EXP'), - ('right','UMINUS'), - ) - - def p_statement_assign(self, p): - 'statement : NAME EQUALS expression' - self.names[p[1]] = p[3] - - def p_statement_expr(self, p): - 'statement : expression' - print(p[1]) - - def p_expression_binop(self, p): - """ - expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression - | expression EXP expression - """ - #print [repr(p[i]) for i in range(0,4)] - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] - elif p[2] == '**': p[0] = p[1] ** p[3] - - def p_expression_uminus(self, p): - 'expression : MINUS expression %prec UMINUS' - p[0] = -p[2] - - def p_expression_group(self, p): - 'expression : LPAREN expression RPAREN' - p[0] = p[2] - - def p_expression_number(self, p): - 'expression : NUMBER' - p[0] = p[1] - - def p_expression_name(self, p): - 'expression : NAME' - try: - p[0] = self.names[p[1]] - except LookupError: - print("Undefined name '%s'" % p[1]) - p[0] = 0 - - def p_error(self, p): - if p: - print("Syntax error at '%s'" % p.value) - else: - print("Syntax error at EOF") - -if __name__ == '__main__': - calc = Calc() - calc.run() diff --git a/ply/example/optcalc/README b/ply/example/optcalc/README deleted file mode 100644 index 53dd5fc..0000000 --- a/ply/example/optcalc/README +++ /dev/null @@ -1,9 +0,0 @@ -An example showing how to use Python optimized mode. -To run: - - - First run 'python calc.py' - - - Then run 'python -OO calc.py' - -If working correctly, the second version should run the -same way. diff --git a/ply/example/optcalc/calc.py b/ply/example/optcalc/calc.py deleted file mode 100644 index dd83351..0000000 --- a/ply/example/optcalc/calc.py +++ /dev/null @@ -1,119 +0,0 @@ -# ----------------------------------------------------------------------------- -# calc.py -# -# A simple calculator with variables. This is from O'Reilly's -# "Lex and Yacc", p. 63. -# ----------------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - -if sys.version_info[0] >= 3: - raw_input = input - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - return t - -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -import ply.lex as lex -lex.lex(optimize=1) - -# Parsing rules - -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - elif t[2] == '<': t[0] = t[1] < t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - if t: - print("Syntax error at '%s'" % t.value) - else: - print("Syntax error at EOF") - -import ply.yacc as yacc -yacc.yacc(optimize=1) - -while 1: - try: - s = raw_input('calc > ') - except EOFError: - break - yacc.parse(s) - diff --git a/ply/example/unicalc/calc.py b/ply/example/unicalc/calc.py deleted file mode 100644 index 55fb48d..0000000 --- a/ply/example/unicalc/calc.py +++ /dev/null @@ -1,117 +0,0 @@ -# ----------------------------------------------------------------------------- -# calc.py -# -# A simple calculator with variables. This is from O'Reilly's -# "Lex and Yacc", p. 63. -# -# This example uses unicode strings for tokens, docstrings, and input. -# ----------------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = ur'\+' -t_MINUS = ur'-' -t_TIMES = ur'\*' -t_DIVIDE = ur'/' -t_EQUALS = ur'=' -t_LPAREN = ur'\(' -t_RPAREN = ur'\)' -t_NAME = ur'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - ur'\d+' - try: - t.value = int(t.value) - except ValueError: - print "Integer value too large", t.value - t.value = 0 - return t - -t_ignore = u" \t" - -def t_newline(t): - ur'\n+' - t.lexer.lineno += t.value.count("\n") - -def t_error(t): - print "Illegal character '%s'" % t.value[0] - t.lexer.skip(1) - -# Build the lexer -import ply.lex as lex -lex.lex() - -# Parsing rules - -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(p): - 'statement : NAME EQUALS expression' - names[p[1]] = p[3] - -def p_statement_expr(p): - 'statement : expression' - print p[1] - -def p_expression_binop(p): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if p[2] == u'+' : p[0] = p[1] + p[3] - elif p[2] == u'-': p[0] = p[1] - p[3] - elif p[2] == u'*': p[0] = p[1] * p[3] - elif p[2] == u'/': p[0] = p[1] / p[3] - -def p_expression_uminus(p): - 'expression : MINUS expression %prec UMINUS' - p[0] = -p[2] - -def p_expression_group(p): - 'expression : LPAREN expression RPAREN' - p[0] = p[2] - -def p_expression_number(p): - 'expression : NUMBER' - p[0] = p[1] - -def p_expression_name(p): - 'expression : NAME' - try: - p[0] = names[p[1]] - except LookupError: - print "Undefined name '%s'" % p[1] - p[0] = 0 - -def p_error(p): - if p: - print "Syntax error at '%s'" % p.value - else: - print "Syntax error at EOF" - -import ply.yacc as yacc -yacc.yacc() - -while 1: - try: - s = raw_input('calc > ') - except EOFError: - break - if not s: continue - yacc.parse(unicode(s)) diff --git a/ply/example/yply/README b/ply/example/yply/README deleted file mode 100644 index bfadf36..0000000 --- a/ply/example/yply/README +++ /dev/null @@ -1,41 +0,0 @@ -yply.py - -This example implements a program yply.py that converts a UNIX-yacc -specification file into a PLY-compatible program. To use, simply -run it like this: - - % python yply.py [-nocode] inputfile.y >myparser.py - -The output of this program is Python code. In the output, -any C code in the original file is included, but is commented out. -If you use the -nocode option, then all of the C code in the -original file is just discarded. - -To use the resulting grammer with PLY, you'll need to edit the -myparser.py file. Within this file, some stub code is included that -can be used to test the construction of the parsing tables. However, -you'll need to do more editing to make a workable parser. - -Disclaimer: This just an example I threw together in an afternoon. -It might have some bugs. However, it worked when I tried it on -a yacc-specified C++ parser containing 442 rules and 855 parsing -states. - -Comments: - -1. This example does not parse specification files meant for lex/flex. - You'll need to specify the tokenizer on your own. - -2. This example shows a number of interesting PLY features including - - - Parsing of literal text delimited by nested parentheses - - Some interaction between the parser and the lexer. - - Use of literals in the grammar specification - - One pass compilation. The program just emits the result, - there is no intermediate parse tree. - -3. This program could probably be cleaned up and enhanced a lot. - It would be great if someone wanted to work on this (hint). - --Dave - diff --git a/ply/example/yply/ylex.py b/ply/example/yply/ylex.py deleted file mode 100644 index 84f2f7a..0000000 --- a/ply/example/yply/ylex.py +++ /dev/null @@ -1,112 +0,0 @@ -# lexer for yacc-grammars -# -# Author: David Beazley (dave@dabeaz.com) -# Date : October 2, 2006 - -import sys -sys.path.append("../..") - -from ply import * - -tokens = ( - 'LITERAL','SECTION','TOKEN','LEFT','RIGHT','PREC','START','TYPE','NONASSOC','UNION','CODE', - 'ID','QLITERAL','NUMBER', -) - -states = (('code','exclusive'),) - -literals = [ ';', ',', '<', '>', '|',':' ] -t_ignore = ' \t' - -t_TOKEN = r'%token' -t_LEFT = r'%left' -t_RIGHT = r'%right' -t_NONASSOC = r'%nonassoc' -t_PREC = r'%prec' -t_START = r'%start' -t_TYPE = r'%type' -t_UNION = r'%union' -t_ID = r'[a-zA-Z_][a-zA-Z_0-9]*' -t_QLITERAL = r'''(?P['"]).*?(?P=quote)''' -t_NUMBER = r'\d+' - -def t_SECTION(t): - r'%%' - if getattr(t.lexer,"lastsection",0): - t.value = t.lexer.lexdata[t.lexpos+2:] - t.lexer.lexpos = len(t.lexer.lexdata) - else: - t.lexer.lastsection = 0 - return t - -# Comments -def t_ccomment(t): - r'/\*(.|\n)*?\*/' - t.lexer.lineno += t.value.count('\n') - -t_ignore_cppcomment = r'//.*' - -def t_LITERAL(t): - r'%\{(.|\n)*?%\}' - t.lexer.lineno += t.value.count("\n") - return t - -def t_NEWLINE(t): - r'\n' - t.lexer.lineno += 1 - -def t_code(t): - r'\{' - t.lexer.codestart = t.lexpos - t.lexer.level = 1 - t.lexer.begin('code') - -def t_code_ignore_string(t): - r'\"([^\\\n]|(\\.))*?\"' - -def t_code_ignore_char(t): - r'\'([^\\\n]|(\\.))*?\'' - -def t_code_ignore_comment(t): - r'/\*(.|\n)*?\*/' - -def t_code_ignore_cppcom(t): - r'//.*' - -def t_code_lbrace(t): - r'\{' - t.lexer.level += 1 - -def t_code_rbrace(t): - r'\}' - t.lexer.level -= 1 - if t.lexer.level == 0: - t.type = 'CODE' - t.value = t.lexer.lexdata[t.lexer.codestart:t.lexpos+1] - t.lexer.begin('INITIAL') - t.lexer.lineno += t.value.count('\n') - return t - -t_code_ignore_nonspace = r'[^\s\}\'\"\{]+' -t_code_ignore_whitespace = r'\s+' -t_code_ignore = "" - -def t_code_error(t): - raise RuntimeError - -def t_error(t): - print "%d: Illegal character '%s'" % (t.lexer.lineno, t.value[0]) - print t.value - t.lexer.skip(1) - -lex.lex() - -if __name__ == '__main__': - lex.runmain() - - - - - - - diff --git a/ply/example/yply/yparse.py b/ply/example/yply/yparse.py deleted file mode 100644 index ab5b884..0000000 --- a/ply/example/yply/yparse.py +++ /dev/null @@ -1,217 +0,0 @@ -# parser for Unix yacc-based grammars -# -# Author: David Beazley (dave@dabeaz.com) -# Date : October 2, 2006 - -import ylex -tokens = ylex.tokens - -from ply import * - -tokenlist = [] -preclist = [] - -emit_code = 1 - -def p_yacc(p): - '''yacc : defsection rulesection''' - -def p_defsection(p): - '''defsection : definitions SECTION - | SECTION''' - p.lexer.lastsection = 1 - print "tokens = ", repr(tokenlist) - print - print "precedence = ", repr(preclist) - print - print "# -------------- RULES ----------------" - print - -def p_rulesection(p): - '''rulesection : rules SECTION''' - - print "# -------------- RULES END ----------------" - print_code(p[2],0) - -def p_definitions(p): - '''definitions : definitions definition - | definition''' - -def p_definition_literal(p): - '''definition : LITERAL''' - print_code(p[1],0) - -def p_definition_start(p): - '''definition : START ID''' - print "start = '%s'" % p[2] - -def p_definition_token(p): - '''definition : toktype opttype idlist optsemi ''' - for i in p[3]: - if i[0] not in "'\"": - tokenlist.append(i) - if p[1] == '%left': - preclist.append(('left',) + tuple(p[3])) - elif p[1] == '%right': - preclist.append(('right',) + tuple(p[3])) - elif p[1] == '%nonassoc': - preclist.append(('nonassoc',)+ tuple(p[3])) - -def p_toktype(p): - '''toktype : TOKEN - | LEFT - | RIGHT - | NONASSOC''' - p[0] = p[1] - -def p_opttype(p): - '''opttype : '<' ID '>' - | empty''' - -def p_idlist(p): - '''idlist : idlist optcomma tokenid - | tokenid''' - if len(p) == 2: - p[0] = [p[1]] - else: - p[0] = p[1] - p[1].append(p[3]) - -def p_tokenid(p): - '''tokenid : ID - | ID NUMBER - | QLITERAL - | QLITERAL NUMBER''' - p[0] = p[1] - -def p_optsemi(p): - '''optsemi : ';' - | empty''' - -def p_optcomma(p): - '''optcomma : ',' - | empty''' - -def p_definition_type(p): - '''definition : TYPE '<' ID '>' namelist optsemi''' - # type declarations are ignored - -def p_namelist(p): - '''namelist : namelist optcomma ID - | ID''' - -def p_definition_union(p): - '''definition : UNION CODE optsemi''' - # Union declarations are ignored - -def p_rules(p): - '''rules : rules rule - | rule''' - if len(p) == 2: - rule = p[1] - else: - rule = p[2] - - # Print out a Python equivalent of this rule - - embedded = [ ] # Embedded actions (a mess) - embed_count = 0 - - rulename = rule[0] - rulecount = 1 - for r in rule[1]: - # r contains one of the rule possibilities - print "def p_%s_%d(p):" % (rulename,rulecount) - prod = [] - prodcode = "" - for i in range(len(r)): - item = r[i] - if item[0] == '{': # A code block - if i == len(r) - 1: - prodcode = item - break - else: - # an embedded action - embed_name = "_embed%d_%s" % (embed_count,rulename) - prod.append(embed_name) - embedded.append((embed_name,item)) - embed_count += 1 - else: - prod.append(item) - print " '''%s : %s'''" % (rulename, " ".join(prod)) - # Emit code - print_code(prodcode,4) - print - rulecount += 1 - - for e,code in embedded: - print "def p_%s(p):" % e - print " '''%s : '''" % e - print_code(code,4) - print - -def p_rule(p): - '''rule : ID ':' rulelist ';' ''' - p[0] = (p[1],[p[3]]) - -def p_rule2(p): - '''rule : ID ':' rulelist morerules ';' ''' - p[4].insert(0,p[3]) - p[0] = (p[1],p[4]) - -def p_rule_empty(p): - '''rule : ID ':' ';' ''' - p[0] = (p[1],[[]]) - -def p_rule_empty2(p): - '''rule : ID ':' morerules ';' ''' - - p[3].insert(0,[]) - p[0] = (p[1],p[3]) - -def p_morerules(p): - '''morerules : morerules '|' rulelist - | '|' rulelist - | '|' ''' - - if len(p) == 2: - p[0] = [[]] - elif len(p) == 3: - p[0] = [p[2]] - else: - p[0] = p[1] - p[0].append(p[3]) - -# print "morerules", len(p), p[0] - -def p_rulelist(p): - '''rulelist : rulelist ruleitem - | ruleitem''' - - if len(p) == 2: - p[0] = [p[1]] - else: - p[0] = p[1] - p[1].append(p[2]) - -def p_ruleitem(p): - '''ruleitem : ID - | QLITERAL - | CODE - | PREC''' - p[0] = p[1] - -def p_empty(p): - '''empty : ''' - -def p_error(p): - pass - -yacc.yacc(debug=0) - -def print_code(code,indent): - if not emit_code: return - codelines = code.splitlines() - for c in codelines: - print "%s# %s" % (" "*indent,c) - diff --git a/ply/example/yply/yply.py b/ply/example/yply/yply.py deleted file mode 100755 index a439817..0000000 --- a/ply/example/yply/yply.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/local/bin/python -# yply.py -# -# Author: David Beazley (dave@dabeaz.com) -# Date : October 2, 2006 -# -# Converts a UNIX-yacc specification file into a PLY-compatible -# specification. To use, simply do this: -# -# % python yply.py [-nocode] inputfile.y >myparser.py -# -# The output of this program is Python code. In the output, -# any C code in the original file is included, but is commented. -# If you use the -nocode option, then all of the C code in the -# original file is discarded. -# -# Disclaimer: This just an example I threw together in an afternoon. -# It might have some bugs. However, it worked when I tried it on -# a yacc-specified C++ parser containing 442 rules and 855 parsing -# states. -# - -import sys -sys.path.insert(0,"../..") - -import ylex -import yparse - -from ply import * - -if len(sys.argv) == 1: - print "usage : yply.py [-nocode] inputfile" - raise SystemExit - -if len(sys.argv) == 3: - if sys.argv[1] == '-nocode': - yparse.emit_code = 0 - else: - print "Unknown option '%s'" % sys.argv[1] - raise SystemExit - filename = sys.argv[2] -else: - filename = sys.argv[1] - -yacc.parse(open(filename).read()) - -print """ -if __name__ == '__main__': - from ply import * - yacc.yacc() -""" - - diff --git a/ply/ply/__init__.py b/ply/ply/__init__.py deleted file mode 100644 index 853a985..0000000 --- a/ply/ply/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# PLY package -# Author: David Beazley (dave@dabeaz.com) - -__all__ = ['lex','yacc'] diff --git a/ply/ply/cpp.py b/ply/ply/cpp.py deleted file mode 100644 index 5cad682..0000000 --- a/ply/ply/cpp.py +++ /dev/null @@ -1,898 +0,0 @@ -# ----------------------------------------------------------------------------- -# cpp.py -# -# Author: David Beazley (http://www.dabeaz.com) -# Copyright (C) 2007 -# All rights reserved -# -# This module implements an ANSI-C style lexical preprocessor for PLY. -# ----------------------------------------------------------------------------- -from __future__ import generators - -# ----------------------------------------------------------------------------- -# Default preprocessor lexer definitions. These tokens are enough to get -# a basic preprocessor working. Other modules may import these if they want -# ----------------------------------------------------------------------------- - -tokens = ( - 'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT', 'CPP_POUND','CPP_DPOUND' -) - -literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\"" - -# Whitespace -def t_CPP_WS(t): - r'\s+' - t.lexer.lineno += t.value.count("\n") - return t - -t_CPP_POUND = r'\#' -t_CPP_DPOUND = r'\#\#' - -# Identifier -t_CPP_ID = r'[A-Za-z_][\w_]*' - -# Integer literal -def CPP_INTEGER(t): - r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU]|[lL]|[uU][lL]|[lL][uU])?)' - return t - -t_CPP_INTEGER = CPP_INTEGER - -# Floating literal -t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' - -# String literal -def t_CPP_STRING(t): - r'\"([^\\\n]|(\\(.|\n)))*?\"' - t.lexer.lineno += t.value.count("\n") - return t - -# Character constant 'c' or L'c' -def t_CPP_CHAR(t): - r'(L)?\'([^\\\n]|(\\(.|\n)))*?\'' - t.lexer.lineno += t.value.count("\n") - return t - -# Comment -def t_CPP_COMMENT(t): - r'(/\*(.|\n)*?\*/)|(//.*?\n)' - t.lexer.lineno += t.value.count("\n") - return t - -def t_error(t): - t.type = t.value[0] - t.value = t.value[0] - t.lexer.skip(1) - return t - -import re -import copy -import time -import os.path - -# ----------------------------------------------------------------------------- -# trigraph() -# -# Given an input string, this function replaces all trigraph sequences. -# The following mapping is used: -# -# ??= # -# ??/ \ -# ??' ^ -# ??( [ -# ??) ] -# ??! | -# ??< { -# ??> } -# ??- ~ -# ----------------------------------------------------------------------------- - -_trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''') -_trigraph_rep = { - '=':'#', - '/':'\\', - "'":'^', - '(':'[', - ')':']', - '!':'|', - '<':'{', - '>':'}', - '-':'~' -} - -def trigraph(input): - return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input) - -# ------------------------------------------------------------------ -# Macro object -# -# This object holds information about preprocessor macros -# -# .name - Macro name (string) -# .value - Macro value (a list of tokens) -# .arglist - List of argument names -# .variadic - Boolean indicating whether or not variadic macro -# .vararg - Name of the variadic parameter -# -# When a macro is created, the macro replacement token sequence is -# pre-scanned and used to create patch lists that are later used -# during macro expansion -# ------------------------------------------------------------------ - -class Macro(object): - def __init__(self,name,value,arglist=None,variadic=False): - self.name = name - self.value = value - self.arglist = arglist - self.variadic = variadic - if variadic: - self.vararg = arglist[-1] - self.source = None - -# ------------------------------------------------------------------ -# Preprocessor object -# -# Object representing a preprocessor. Contains macro definitions, -# include directories, and other information -# ------------------------------------------------------------------ - -class Preprocessor(object): - def __init__(self,lexer=None): - if lexer is None: - lexer = lex.lexer - self.lexer = lexer - self.macros = { } - self.path = [] - self.temp_path = [] - - # Probe the lexer for selected tokens - self.lexprobe() - - tm = time.localtime() - self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm)) - self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm)) - self.parser = None - - # ----------------------------------------------------------------------------- - # tokenize() - # - # Utility function. Given a string of text, tokenize into a list of tokens - # ----------------------------------------------------------------------------- - - def tokenize(self,text): - tokens = [] - self.lexer.input(text) - while True: - tok = self.lexer.token() - if not tok: break - tokens.append(tok) - return tokens - - # --------------------------------------------------------------------- - # error() - # - # Report a preprocessor error/warning of some kind - # ---------------------------------------------------------------------- - - def error(self,file,line,msg): - print("%s:%d %s" % (file,line,msg)) - - # ---------------------------------------------------------------------- - # lexprobe() - # - # This method probes the preprocessor lexer object to discover - # the token types of symbols that are important to the preprocessor. - # If this works right, the preprocessor will simply "work" - # with any suitable lexer regardless of how tokens have been named. - # ---------------------------------------------------------------------- - - def lexprobe(self): - - # Determine the token type for identifiers - self.lexer.input("identifier") - tok = self.lexer.token() - if not tok or tok.value != "identifier": - print("Couldn't determine identifier type") - else: - self.t_ID = tok.type - - # Determine the token type for integers - self.lexer.input("12345") - tok = self.lexer.token() - if not tok or int(tok.value) != 12345: - print("Couldn't determine integer type") - else: - self.t_INTEGER = tok.type - self.t_INTEGER_TYPE = type(tok.value) - - # Determine the token type for strings enclosed in double quotes - self.lexer.input("\"filename\"") - tok = self.lexer.token() - if not tok or tok.value != "\"filename\"": - print("Couldn't determine string type") - else: - self.t_STRING = tok.type - - # Determine the token type for whitespace--if any - self.lexer.input(" ") - tok = self.lexer.token() - if not tok or tok.value != " ": - self.t_SPACE = None - else: - self.t_SPACE = tok.type - - # Determine the token type for newlines - self.lexer.input("\n") - tok = self.lexer.token() - if not tok or tok.value != "\n": - self.t_NEWLINE = None - print("Couldn't determine token for newlines") - else: - self.t_NEWLINE = tok.type - - self.t_WS = (self.t_SPACE, self.t_NEWLINE) - - # Check for other characters used by the preprocessor - chars = [ '<','>','#','##','\\','(',')',',','.'] - for c in chars: - self.lexer.input(c) - tok = self.lexer.token() - if not tok or tok.value != c: - print("Unable to lex '%s' required for preprocessor" % c) - - # ---------------------------------------------------------------------- - # add_path() - # - # Adds a search path to the preprocessor. - # ---------------------------------------------------------------------- - - def add_path(self,path): - self.path.append(path) - - # ---------------------------------------------------------------------- - # group_lines() - # - # Given an input string, this function splits it into lines. Trailing whitespace - # is removed. Any line ending with \ is grouped with the next line. This - # function forms the lowest level of the preprocessor---grouping into text into - # a line-by-line format. - # ---------------------------------------------------------------------- - - def group_lines(self,input): - lex = self.lexer.clone() - lines = [x.rstrip() for x in input.splitlines()] - for i in xrange(len(lines)): - j = i+1 - while lines[i].endswith('\\') and (j < len(lines)): - lines[i] = lines[i][:-1]+lines[j] - lines[j] = "" - j += 1 - - input = "\n".join(lines) - lex.input(input) - lex.lineno = 1 - - current_line = [] - while True: - tok = lex.token() - if not tok: - break - current_line.append(tok) - if tok.type in self.t_WS and '\n' in tok.value: - yield current_line - current_line = [] - - if current_line: - yield current_line - - # ---------------------------------------------------------------------- - # tokenstrip() - # - # Remove leading/trailing whitespace tokens from a token list - # ---------------------------------------------------------------------- - - def tokenstrip(self,tokens): - i = 0 - while i < len(tokens) and tokens[i].type in self.t_WS: - i += 1 - del tokens[:i] - i = len(tokens)-1 - while i >= 0 and tokens[i].type in self.t_WS: - i -= 1 - del tokens[i+1:] - return tokens - - - # ---------------------------------------------------------------------- - # collect_args() - # - # Collects comma separated arguments from a list of tokens. The arguments - # must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions) - # where tokencount is the number of tokens consumed, args is a list of arguments, - # and positions is a list of integers containing the starting index of each - # argument. Each argument is represented by a list of tokens. - # - # When collecting arguments, leading and trailing whitespace is removed - # from each argument. - # - # This function properly handles nested parenthesis and commas---these do not - # define new arguments. - # ---------------------------------------------------------------------- - - def collect_args(self,tokenlist): - args = [] - positions = [] - current_arg = [] - nesting = 1 - tokenlen = len(tokenlist) - - # Search for the opening '('. - i = 0 - while (i < tokenlen) and (tokenlist[i].type in self.t_WS): - i += 1 - - if (i < tokenlen) and (tokenlist[i].value == '('): - positions.append(i+1) - else: - self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments") - return 0, [], [] - - i += 1 - - while i < tokenlen: - t = tokenlist[i] - if t.value == '(': - current_arg.append(t) - nesting += 1 - elif t.value == ')': - nesting -= 1 - if nesting == 0: - if current_arg: - args.append(self.tokenstrip(current_arg)) - positions.append(i) - return i+1,args,positions - current_arg.append(t) - elif t.value == ',' and nesting == 1: - args.append(self.tokenstrip(current_arg)) - positions.append(i+1) - current_arg = [] - else: - current_arg.append(t) - i += 1 - - # Missing end argument - self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments") - return 0, [],[] - - # ---------------------------------------------------------------------- - # macro_prescan() - # - # Examine the macro value (token sequence) and identify patch points - # This is used to speed up macro expansion later on---we'll know - # right away where to apply patches to the value to form the expansion - # ---------------------------------------------------------------------- - - def macro_prescan(self,macro): - macro.patch = [] # Standard macro arguments - macro.str_patch = [] # String conversion expansion - macro.var_comma_patch = [] # Variadic macro comma patch - i = 0 - while i < len(macro.value): - if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist: - argnum = macro.arglist.index(macro.value[i].value) - # Conversion of argument to a string - if i > 0 and macro.value[i-1].value == '#': - macro.value[i] = copy.copy(macro.value[i]) - macro.value[i].type = self.t_STRING - del macro.value[i-1] - macro.str_patch.append((argnum,i-1)) - continue - # Concatenation - elif (i > 0 and macro.value[i-1].value == '##'): - macro.patch.append(('c',argnum,i-1)) - del macro.value[i-1] - continue - elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'): - macro.patch.append(('c',argnum,i)) - i += 1 - continue - # Standard expansion - else: - macro.patch.append(('e',argnum,i)) - elif macro.value[i].value == '##': - if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \ - ((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \ - (macro.value[i+1].value == macro.vararg): - macro.var_comma_patch.append(i-1) - i += 1 - macro.patch.sort(key=lambda x: x[2],reverse=True) - - # ---------------------------------------------------------------------- - # macro_expand_args() - # - # Given a Macro and list of arguments (each a token list), this method - # returns an expanded version of a macro. The return value is a token sequence - # representing the replacement macro tokens - # ---------------------------------------------------------------------- - - def macro_expand_args(self,macro,args): - # Make a copy of the macro token sequence - rep = [copy.copy(_x) for _x in macro.value] - - # Make string expansion patches. These do not alter the length of the replacement sequence - - str_expansion = {} - for argnum, i in macro.str_patch: - if argnum not in str_expansion: - str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\") - rep[i] = copy.copy(rep[i]) - rep[i].value = str_expansion[argnum] - - # Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid - comma_patch = False - if macro.variadic and not args[-1]: - for i in macro.var_comma_patch: - rep[i] = None - comma_patch = True - - # Make all other patches. The order of these matters. It is assumed that the patch list - # has been sorted in reverse order of patch location since replacements will cause the - # size of the replacement sequence to expand from the patch point. - - expanded = { } - for ptype, argnum, i in macro.patch: - # Concatenation. Argument is left unexpanded - if ptype == 'c': - rep[i:i+1] = args[argnum] - # Normal expansion. Argument is macro expanded first - elif ptype == 'e': - if argnum not in expanded: - expanded[argnum] = self.expand_macros(args[argnum]) - rep[i:i+1] = expanded[argnum] - - # Get rid of removed comma if necessary - if comma_patch: - rep = [_i for _i in rep if _i] - - return rep - - - # ---------------------------------------------------------------------- - # expand_macros() - # - # Given a list of tokens, this function performs macro expansion. - # The expanded argument is a dictionary that contains macros already - # expanded. This is used to prevent infinite recursion. - # ---------------------------------------------------------------------- - - def expand_macros(self,tokens,expanded=None): - if expanded is None: - expanded = {} - i = 0 - while i < len(tokens): - t = tokens[i] - if t.type == self.t_ID: - if t.value in self.macros and t.value not in expanded: - # Yes, we found a macro match - expanded[t.value] = True - - m = self.macros[t.value] - if not m.arglist: - # A simple macro - ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded) - for e in ex: - e.lineno = t.lineno - tokens[i:i+1] = ex - i += len(ex) - else: - # A macro with arguments - j = i + 1 - while j < len(tokens) and tokens[j].type in self.t_WS: - j += 1 - if tokens[j].value == '(': - tokcount,args,positions = self.collect_args(tokens[j:]) - if not m.variadic and len(args) != len(m.arglist): - self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist))) - i = j + tokcount - elif m.variadic and len(args) < len(m.arglist)-1: - if len(m.arglist) > 2: - self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1)) - else: - self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1)) - i = j + tokcount - else: - if m.variadic: - if len(args) == len(m.arglist)-1: - args.append([]) - else: - args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1] - del args[len(m.arglist):] - - # Get macro replacement text - rep = self.macro_expand_args(m,args) - rep = self.expand_macros(rep,expanded) - for r in rep: - r.lineno = t.lineno - tokens[i:j+tokcount] = rep - i += len(rep) - del expanded[t.value] - continue - elif t.value == '__LINE__': - t.type = self.t_INTEGER - t.value = self.t_INTEGER_TYPE(t.lineno) - - i += 1 - return tokens - - # ---------------------------------------------------------------------- - # evalexpr() - # - # Evaluate an expression token sequence for the purposes of evaluating - # integral expressions. - # ---------------------------------------------------------------------- - - def evalexpr(self,tokens): - # tokens = tokenize(line) - # Search for defined macros - i = 0 - while i < len(tokens): - if tokens[i].type == self.t_ID and tokens[i].value == 'defined': - j = i + 1 - needparen = False - result = "0L" - while j < len(tokens): - if tokens[j].type in self.t_WS: - j += 1 - continue - elif tokens[j].type == self.t_ID: - if tokens[j].value in self.macros: - result = "1L" - else: - result = "0L" - if not needparen: break - elif tokens[j].value == '(': - needparen = True - elif tokens[j].value == ')': - break - else: - self.error(self.source,tokens[i].lineno,"Malformed defined()") - j += 1 - tokens[i].type = self.t_INTEGER - tokens[i].value = self.t_INTEGER_TYPE(result) - del tokens[i+1:j+1] - i += 1 - tokens = self.expand_macros(tokens) - for i,t in enumerate(tokens): - if t.type == self.t_ID: - tokens[i] = copy.copy(t) - tokens[i].type = self.t_INTEGER - tokens[i].value = self.t_INTEGER_TYPE("0L") - elif t.type == self.t_INTEGER: - tokens[i] = copy.copy(t) - # Strip off any trailing suffixes - tokens[i].value = str(tokens[i].value) - while tokens[i].value[-1] not in "0123456789abcdefABCDEF": - tokens[i].value = tokens[i].value[:-1] - - expr = "".join([str(x.value) for x in tokens]) - expr = expr.replace("&&"," and ") - expr = expr.replace("||"," or ") - expr = expr.replace("!"," not ") - try: - result = eval(expr) - except StandardError: - self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression") - result = 0 - return result - - # ---------------------------------------------------------------------- - # parsegen() - # - # Parse an input string/ - # ---------------------------------------------------------------------- - def parsegen(self,input,source=None): - - # Replace trigraph sequences - t = trigraph(input) - lines = self.group_lines(t) - - if not source: - source = "" - - self.define("__FILE__ \"%s\"" % source) - - self.source = source - chunk = [] - enable = True - iftrigger = False - ifstack = [] - - for x in lines: - for i,tok in enumerate(x): - if tok.type not in self.t_WS: break - if tok.value == '#': - # Preprocessor directive - - for tok in x: - if tok in self.t_WS and '\n' in tok.value: - chunk.append(tok) - - dirtokens = self.tokenstrip(x[i+1:]) - if dirtokens: - name = dirtokens[0].value - args = self.tokenstrip(dirtokens[1:]) - else: - name = "" - args = [] - - if name == 'define': - if enable: - for tok in self.expand_macros(chunk): - yield tok - chunk = [] - self.define(args) - elif name == 'include': - if enable: - for tok in self.expand_macros(chunk): - yield tok - chunk = [] - oldfile = self.macros['__FILE__'] - for tok in self.include(args): - yield tok - self.macros['__FILE__'] = oldfile - self.source = source - elif name == 'undef': - if enable: - for tok in self.expand_macros(chunk): - yield tok - chunk = [] - self.undef(args) - elif name == 'ifdef': - ifstack.append((enable,iftrigger)) - if enable: - if not args[0].value in self.macros: - enable = False - iftrigger = False - else: - iftrigger = True - elif name == 'ifndef': - ifstack.append((enable,iftrigger)) - if enable: - if args[0].value in self.macros: - enable = False - iftrigger = False - else: - iftrigger = True - elif name == 'if': - ifstack.append((enable,iftrigger)) - if enable: - result = self.evalexpr(args) - if not result: - enable = False - iftrigger = False - else: - iftrigger = True - elif name == 'elif': - if ifstack: - if ifstack[-1][0]: # We only pay attention if outer "if" allows this - if enable: # If already true, we flip enable False - enable = False - elif not iftrigger: # If False, but not triggered yet, we'll check expression - result = self.evalexpr(args) - if result: - enable = True - iftrigger = True - else: - self.error(self.source,dirtokens[0].lineno,"Misplaced #elif") - - elif name == 'else': - if ifstack: - if ifstack[-1][0]: - if enable: - enable = False - elif not iftrigger: - enable = True - iftrigger = True - else: - self.error(self.source,dirtokens[0].lineno,"Misplaced #else") - - elif name == 'endif': - if ifstack: - enable,iftrigger = ifstack.pop() - else: - self.error(self.source,dirtokens[0].lineno,"Misplaced #endif") - else: - # Unknown preprocessor directive - pass - - else: - # Normal text - if enable: - chunk.extend(x) - - for tok in self.expand_macros(chunk): - yield tok - chunk = [] - - # ---------------------------------------------------------------------- - # include() - # - # Implementation of file-inclusion - # ---------------------------------------------------------------------- - - def include(self,tokens): - # Try to extract the filename and then process an include file - if not tokens: - return - if tokens: - if tokens[0].value != '<' and tokens[0].type != self.t_STRING: - tokens = self.expand_macros(tokens) - - if tokens[0].value == '<': - # Include <...> - i = 1 - while i < len(tokens): - if tokens[i].value == '>': - break - i += 1 - else: - print("Malformed #include <...>") - return - filename = "".join([x.value for x in tokens[1:i]]) - path = self.path + [""] + self.temp_path - elif tokens[0].type == self.t_STRING: - filename = tokens[0].value[1:-1] - path = self.temp_path + [""] + self.path - else: - print("Malformed #include statement") - return - for p in path: - iname = os.path.join(p,filename) - try: - data = open(iname,"r").read() - dname = os.path.dirname(iname) - if dname: - self.temp_path.insert(0,dname) - for tok in self.parsegen(data,filename): - yield tok - if dname: - del self.temp_path[0] - break - except IOError: - pass - else: - print("Couldn't find '%s'" % filename) - - # ---------------------------------------------------------------------- - # define() - # - # Define a new macro - # ---------------------------------------------------------------------- - - def define(self,tokens): - if isinstance(tokens,(str,unicode)): - tokens = self.tokenize(tokens) - - linetok = tokens - try: - name = linetok[0] - if len(linetok) > 1: - mtype = linetok[1] - else: - mtype = None - if not mtype: - m = Macro(name.value,[]) - self.macros[name.value] = m - elif mtype.type in self.t_WS: - # A normal macro - m = Macro(name.value,self.tokenstrip(linetok[2:])) - self.macros[name.value] = m - elif mtype.value == '(': - # A macro with arguments - tokcount, args, positions = self.collect_args(linetok[1:]) - variadic = False - for a in args: - if variadic: - print("No more arguments may follow a variadic argument") - break - astr = "".join([str(_i.value) for _i in a]) - if astr == "...": - variadic = True - a[0].type = self.t_ID - a[0].value = '__VA_ARGS__' - variadic = True - del a[1:] - continue - elif astr[-3:] == "..." and a[0].type == self.t_ID: - variadic = True - del a[1:] - # If, for some reason, "." is part of the identifier, strip off the name for the purposes - # of macro expansion - if a[0].value[-3:] == '...': - a[0].value = a[0].value[:-3] - continue - if len(a) > 1 or a[0].type != self.t_ID: - print("Invalid macro argument") - break - else: - mvalue = self.tokenstrip(linetok[1+tokcount:]) - i = 0 - while i < len(mvalue): - if i+1 < len(mvalue): - if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##': - del mvalue[i] - continue - elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS: - del mvalue[i+1] - i += 1 - m = Macro(name.value,mvalue,[x[0].value for x in args],variadic) - self.macro_prescan(m) - self.macros[name.value] = m - else: - print("Bad macro definition") - except LookupError: - print("Bad macro definition") - - # ---------------------------------------------------------------------- - # undef() - # - # Undefine a macro - # ---------------------------------------------------------------------- - - def undef(self,tokens): - id = tokens[0].value - try: - del self.macros[id] - except LookupError: - pass - - # ---------------------------------------------------------------------- - # parse() - # - # Parse input text. - # ---------------------------------------------------------------------- - def parse(self,input,source=None,ignore={}): - self.ignore = ignore - self.parser = self.parsegen(input,source) - - # ---------------------------------------------------------------------- - # token() - # - # Method to return individual tokens - # ---------------------------------------------------------------------- - def token(self): - try: - while True: - tok = next(self.parser) - if tok.type not in self.ignore: return tok - except StopIteration: - self.parser = None - return None - -if __name__ == '__main__': - import ply.lex as lex - lexer = lex.lex() - - # Run a preprocessor - import sys - f = open(sys.argv[1]) - input = f.read() - - p = Preprocessor(lexer) - p.parse(input,sys.argv[1]) - while True: - tok = p.token() - if not tok: break - print(p.source, tok) - - - - - - - - - - - diff --git a/ply/ply/ctokens.py b/ply/ply/ctokens.py deleted file mode 100644 index dd5f102..0000000 --- a/ply/ply/ctokens.py +++ /dev/null @@ -1,133 +0,0 @@ -# ---------------------------------------------------------------------- -# ctokens.py -# -# Token specifications for symbols in ANSI C and C++. This file is -# meant to be used as a library in other tokenizers. -# ---------------------------------------------------------------------- - -# Reserved words - -tokens = [ - # Literals (identifier, integer constant, float constant, string constant, char const) - 'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST', - - # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) - 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', - 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', - 'LOR', 'LAND', 'LNOT', - 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', - - # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) - 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', - 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', - - # Increment/decrement (++,--) - 'PLUSPLUS', 'MINUSMINUS', - - # Structure dereference (->) - 'ARROW', - - # Ternary operator (?) - 'TERNARY', - - # Delimeters ( ) [ ] { } , . ; : - 'LPAREN', 'RPAREN', - 'LBRACKET', 'RBRACKET', - 'LBRACE', 'RBRACE', - 'COMMA', 'PERIOD', 'SEMI', 'COLON', - - # Ellipsis (...) - 'ELLIPSIS', -] - -# Operators -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_MODULO = r'%' -t_OR = r'\|' -t_AND = r'&' -t_NOT = r'~' -t_XOR = r'\^' -t_LSHIFT = r'<<' -t_RSHIFT = r'>>' -t_LOR = r'\|\|' -t_LAND = r'&&' -t_LNOT = r'!' -t_LT = r'<' -t_GT = r'>' -t_LE = r'<=' -t_GE = r'>=' -t_EQ = r'==' -t_NE = r'!=' - -# Assignment operators - -t_EQUALS = r'=' -t_TIMESEQUAL = r'\*=' -t_DIVEQUAL = r'/=' -t_MODEQUAL = r'%=' -t_PLUSEQUAL = r'\+=' -t_MINUSEQUAL = r'-=' -t_LSHIFTEQUAL = r'<<=' -t_RSHIFTEQUAL = r'>>=' -t_ANDEQUAL = r'&=' -t_OREQUAL = r'\|=' -t_XOREQUAL = r'^=' - -# Increment/decrement -t_INCREMENT = r'\+\+' -t_DECREMENT = r'--' - -# -> -t_ARROW = r'->' - -# ? -t_TERNARY = r'\?' - -# Delimeters -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_LBRACKET = r'\[' -t_RBRACKET = r'\]' -t_LBRACE = r'\{' -t_RBRACE = r'\}' -t_COMMA = r',' -t_PERIOD = r'\.' -t_SEMI = r';' -t_COLON = r':' -t_ELLIPSIS = r'\.\.\.' - -# Identifiers -t_ID = r'[A-Za-z_][A-Za-z0-9_]*' - -# Integer literal -t_INTEGER = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?' - -# Floating literal -t_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' - -# String literal -t_STRING = r'\"([^\\\n]|(\\.))*?\"' - -# Character constant 'c' or L'c' -t_CHARACTER = r'(L)?\'([^\\\n]|(\\.))*?\'' - -# Comment (C-Style) -def t_COMMENT(t): - r'/\*(.|\n)*?\*/' - t.lexer.lineno += t.value.count('\n') - return t - -# Comment (C++-Style) -def t_CPPCOMMENT(t): - r'//.*\n' - t.lexer.lineno += 1 - return t - - - - - - diff --git a/ply/ply/lex.py b/ply/ply/lex.py deleted file mode 100644 index bd32da9..0000000 --- a/ply/ply/lex.py +++ /dev/null @@ -1,1058 +0,0 @@ -# ----------------------------------------------------------------------------- -# ply: lex.py -# -# Copyright (C) 2001-2011, -# David M. Beazley (Dabeaz LLC) -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * Neither the name of the David Beazley or Dabeaz LLC may be used to -# endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ----------------------------------------------------------------------------- - -__version__ = "3.4" -__tabversion__ = "3.2" # Version of table file used - -import re, sys, types, copy, os - -# This tuple contains known string types -try: - # Python 2.6 - StringTypes = (types.StringType, types.UnicodeType) -except AttributeError: - # Python 3.0 - StringTypes = (str, bytes) - -# Extract the code attribute of a function. Different implementations -# are for Python 2/3 compatibility. - -if sys.version_info[0] < 3: - def func_code(f): - return f.func_code -else: - def func_code(f): - return f.__code__ - -# This regular expression is used to match valid token names -_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') - -# Exception thrown when invalid token encountered and no default error -# handler is defined. - -class LexError(Exception): - def __init__(self,message,s): - self.args = (message,) - self.text = s - -# Token class. This class is used to represent the tokens produced. -class LexToken(object): - def __str__(self): - return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos) - def __repr__(self): - return str(self) - -# This object is a stand-in for a logging object created by the -# logging module. - -class PlyLogger(object): - def __init__(self,f): - self.f = f - def critical(self,msg,*args,**kwargs): - self.f.write((msg % args) + "\n") - - def warning(self,msg,*args,**kwargs): - self.f.write("WARNING: "+ (msg % args) + "\n") - - def error(self,msg,*args,**kwargs): - self.f.write("ERROR: " + (msg % args) + "\n") - - info = critical - debug = critical - -# Null logger is used when no output is generated. Does nothing. -class NullLogger(object): - def __getattribute__(self,name): - return self - def __call__(self,*args,**kwargs): - return self - -# ----------------------------------------------------------------------------- -# === Lexing Engine === -# -# The following Lexer class implements the lexer runtime. There are only -# a few public methods and attributes: -# -# input() - Store a new string in the lexer -# token() - Get the next token -# clone() - Clone the lexer -# -# lineno - Current line number -# lexpos - Current position in the input string -# ----------------------------------------------------------------------------- - -class Lexer: - def __init__(self): - self.lexre = None # Master regular expression. This is a list of - # tuples (re,findex) where re is a compiled - # regular expression and findex is a list - # mapping regex group numbers to rules - self.lexretext = None # Current regular expression strings - self.lexstatere = {} # Dictionary mapping lexer states to master regexs - self.lexstateretext = {} # Dictionary mapping lexer states to regex strings - self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names - self.lexstate = "INITIAL" # Current lexer state - self.lexstatestack = [] # Stack of lexer states - self.lexstateinfo = None # State information - self.lexstateignore = {} # Dictionary of ignored characters for each state - self.lexstateerrorf = {} # Dictionary of error functions for each state - self.lexreflags = 0 # Optional re compile flags - self.lexdata = None # Actual input data (as a string) - self.lexpos = 0 # Current position in input text - self.lexlen = 0 # Length of the input text - self.lexerrorf = None # Error rule (if any) - self.lextokens = None # List of valid tokens - self.lexignore = "" # Ignored characters - self.lexliterals = "" # Literal characters that can be passed through - self.lexmodule = None # Module - self.lineno = 1 # Current line number - self.lexoptimize = 0 # Optimized mode - - def clone(self,object=None): - c = copy.copy(self) - - # If the object parameter has been supplied, it means we are attaching the - # lexer to a new object. In this case, we have to rebind all methods in - # the lexstatere and lexstateerrorf tables. - - if object: - newtab = { } - for key, ritem in self.lexstatere.items(): - newre = [] - for cre, findex in ritem: - newfindex = [] - for f in findex: - if not f or not f[0]: - newfindex.append(f) - continue - newfindex.append((getattr(object,f[0].__name__),f[1])) - newre.append((cre,newfindex)) - newtab[key] = newre - c.lexstatere = newtab - c.lexstateerrorf = { } - for key, ef in self.lexstateerrorf.items(): - c.lexstateerrorf[key] = getattr(object,ef.__name__) - c.lexmodule = object - return c - - # ------------------------------------------------------------ - # writetab() - Write lexer information to a table file - # ------------------------------------------------------------ - def writetab(self,tabfile,outputdir=""): - if isinstance(tabfile,types.ModuleType): - return - basetabfilename = tabfile.split(".")[-1] - filename = os.path.join(outputdir,basetabfilename)+".py" - tf = open(filename,"w") - tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__)) - tf.write("_tabversion = %s\n" % repr(__version__)) - tf.write("_lextokens = %s\n" % repr(self.lextokens)) - tf.write("_lexreflags = %s\n" % repr(self.lexreflags)) - tf.write("_lexliterals = %s\n" % repr(self.lexliterals)) - tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo)) - - tabre = { } - # Collect all functions in the initial state - initial = self.lexstatere["INITIAL"] - initialfuncs = [] - for part in initial: - for f in part[1]: - if f and f[0]: - initialfuncs.append(f) - - for key, lre in self.lexstatere.items(): - titem = [] - for i in range(len(lre)): - titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1],self.lexstaterenames[key][i]))) - tabre[key] = titem - - tf.write("_lexstatere = %s\n" % repr(tabre)) - tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore)) - - taberr = { } - for key, ef in self.lexstateerrorf.items(): - if ef: - taberr[key] = ef.__name__ - else: - taberr[key] = None - tf.write("_lexstateerrorf = %s\n" % repr(taberr)) - tf.close() - - # ------------------------------------------------------------ - # readtab() - Read lexer information from a tab file - # ------------------------------------------------------------ - def readtab(self,tabfile,fdict): - if isinstance(tabfile,types.ModuleType): - lextab = tabfile - else: - if sys.version_info[0] < 3: - exec("import %s as lextab" % tabfile) - else: - env = { } - exec("import %s as lextab" % tabfile, env,env) - lextab = env['lextab'] - - if getattr(lextab,"_tabversion","0.0") != __version__: - raise ImportError("Inconsistent PLY version") - - self.lextokens = lextab._lextokens - self.lexreflags = lextab._lexreflags - self.lexliterals = lextab._lexliterals - self.lexstateinfo = lextab._lexstateinfo - self.lexstateignore = lextab._lexstateignore - self.lexstatere = { } - self.lexstateretext = { } - for key,lre in lextab._lexstatere.items(): - titem = [] - txtitem = [] - for i in range(len(lre)): - titem.append((re.compile(lre[i][0],lextab._lexreflags | re.VERBOSE),_names_to_funcs(lre[i][1],fdict))) - txtitem.append(lre[i][0]) - self.lexstatere[key] = titem - self.lexstateretext[key] = txtitem - self.lexstateerrorf = { } - for key,ef in lextab._lexstateerrorf.items(): - self.lexstateerrorf[key] = fdict[ef] - self.begin('INITIAL') - - # ------------------------------------------------------------ - # input() - Push a new string into the lexer - # ------------------------------------------------------------ - def input(self,s): - # Pull off the first character to see if s looks like a string - c = s[:1] - if not isinstance(c,StringTypes): - raise ValueError("Expected a string") - self.lexdata = s - self.lexpos = 0 - self.lexlen = len(s) - - # ------------------------------------------------------------ - # begin() - Changes the lexing state - # ------------------------------------------------------------ - def begin(self,state): - if not state in self.lexstatere: - raise ValueError("Undefined state") - self.lexre = self.lexstatere[state] - self.lexretext = self.lexstateretext[state] - self.lexignore = self.lexstateignore.get(state,"") - self.lexerrorf = self.lexstateerrorf.get(state,None) - self.lexstate = state - - # ------------------------------------------------------------ - # push_state() - Changes the lexing state and saves old on stack - # ------------------------------------------------------------ - def push_state(self,state): - self.lexstatestack.append(self.lexstate) - self.begin(state) - - # ------------------------------------------------------------ - # pop_state() - Restores the previous state - # ------------------------------------------------------------ - def pop_state(self): - self.begin(self.lexstatestack.pop()) - - # ------------------------------------------------------------ - # current_state() - Returns the current lexing state - # ------------------------------------------------------------ - def current_state(self): - return self.lexstate - - # ------------------------------------------------------------ - # skip() - Skip ahead n characters - # ------------------------------------------------------------ - def skip(self,n): - self.lexpos += n - - # ------------------------------------------------------------ - # opttoken() - Return the next token from the Lexer - # - # Note: This function has been carefully implemented to be as fast - # as possible. Don't make changes unless you really know what - # you are doing - # ------------------------------------------------------------ - def token(self): - # Make local copies of frequently referenced attributes - lexpos = self.lexpos - lexlen = self.lexlen - lexignore = self.lexignore - lexdata = self.lexdata - - while lexpos < lexlen: - # This code provides some short-circuit code for whitespace, tabs, and other ignored characters - if lexdata[lexpos] in lexignore: - lexpos += 1 - continue - - # Look for a regular expression match - for lexre,lexindexfunc in self.lexre: - m = lexre.match(lexdata,lexpos) - if not m: continue - - # Create a token for return - tok = LexToken() - tok.value = m.group() - tok.lineno = self.lineno - tok.lexpos = lexpos - - i = m.lastindex - func,tok.type = lexindexfunc[i] - - if not func: - # If no token type was set, it's an ignored token - if tok.type: - self.lexpos = m.end() - return tok - else: - lexpos = m.end() - break - - lexpos = m.end() - - # If token is processed by a function, call it - - tok.lexer = self # Set additional attributes useful in token rules - self.lexmatch = m - self.lexpos = lexpos - - newtok = func(tok) - - # Every function must return a token, if nothing, we just move to next token - if not newtok: - lexpos = self.lexpos # This is here in case user has updated lexpos. - lexignore = self.lexignore # This is here in case there was a state change - break - - # Verify type of the token. If not in the token map, raise an error - if not self.lexoptimize: - if not newtok.type in self.lextokens: - raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( - func_code(func).co_filename, func_code(func).co_firstlineno, - func.__name__, newtok.type),lexdata[lexpos:]) - - return newtok - else: - # No match, see if in literals - if lexdata[lexpos] in self.lexliterals: - tok = LexToken() - tok.value = lexdata[lexpos] - tok.lineno = self.lineno - tok.type = tok.value - tok.lexpos = lexpos - self.lexpos = lexpos + 1 - return tok - - # No match. Call t_error() if defined. - if self.lexerrorf: - tok = LexToken() - tok.value = self.lexdata[lexpos:] - tok.lineno = self.lineno - tok.type = "error" - tok.lexer = self - tok.lexpos = lexpos - self.lexpos = lexpos - newtok = self.lexerrorf(tok) - if lexpos == self.lexpos: - # Error method didn't change text position at all. This is an error. - raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) - lexpos = self.lexpos - if not newtok: continue - return newtok - - self.lexpos = lexpos - raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:]) - - self.lexpos = lexpos + 1 - if self.lexdata is None: - raise RuntimeError("No input string given with input()") - return None - - # Iterator interface - def __iter__(self): - return self - - def next(self): - t = self.token() - if t is None: - raise StopIteration - return t - - __next__ = next - -# ----------------------------------------------------------------------------- -# ==== Lex Builder === -# -# The functions and classes below are used to collect lexing information -# and build a Lexer object from it. -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# get_caller_module_dict() -# -# This function returns a dictionary containing all of the symbols defined within -# a caller further down the call stack. This is used to get the environment -# associated with the yacc() call if none was provided. -# ----------------------------------------------------------------------------- - -def get_caller_module_dict(levels): - try: - raise RuntimeError - except RuntimeError: - e,b,t = sys.exc_info() - f = t.tb_frame - while levels > 0: - f = f.f_back - levels -= 1 - ldict = f.f_globals.copy() - if f.f_globals != f.f_locals: - ldict.update(f.f_locals) - - return ldict - -# ----------------------------------------------------------------------------- -# _funcs_to_names() -# -# Given a list of regular expression functions, this converts it to a list -# suitable for output to a table file -# ----------------------------------------------------------------------------- - -def _funcs_to_names(funclist,namelist): - result = [] - for f,name in zip(funclist,namelist): - if f and f[0]: - result.append((name, f[1])) - else: - result.append(f) - return result - -# ----------------------------------------------------------------------------- -# _names_to_funcs() -# -# Given a list of regular expression function names, this converts it back to -# functions. -# ----------------------------------------------------------------------------- - -def _names_to_funcs(namelist,fdict): - result = [] - for n in namelist: - if n and n[0]: - result.append((fdict[n[0]],n[1])) - else: - result.append(n) - return result - -# ----------------------------------------------------------------------------- -# _form_master_re() -# -# This function takes a list of all of the regex components and attempts to -# form the master regular expression. Given limitations in the Python re -# module, it may be necessary to break the master regex into separate expressions. -# ----------------------------------------------------------------------------- - -def _form_master_re(relist,reflags,ldict,toknames): - if not relist: return [] - regex = "|".join(relist) - try: - lexre = re.compile(regex,re.VERBOSE | reflags) - - # Build the index to function map for the matching engine - lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1) - lexindexnames = lexindexfunc[:] - - for f,i in lexre.groupindex.items(): - handle = ldict.get(f,None) - if type(handle) in (types.FunctionType, types.MethodType): - lexindexfunc[i] = (handle,toknames[f]) - lexindexnames[i] = f - elif handle is not None: - lexindexnames[i] = f - if f.find("ignore_") > 0: - lexindexfunc[i] = (None,None) - else: - lexindexfunc[i] = (None, toknames[f]) - - return [(lexre,lexindexfunc)],[regex],[lexindexnames] - except Exception: - m = int(len(relist)/2) - if m == 0: m = 1 - llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames) - rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames) - return llist+rlist, lre+rre, lnames+rnames - -# ----------------------------------------------------------------------------- -# def _statetoken(s,names) -# -# Given a declaration name s of the form "t_" and a dictionary whose keys are -# state names, this function returns a tuple (states,tokenname) where states -# is a tuple of state names and tokenname is the name of the token. For example, -# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') -# ----------------------------------------------------------------------------- - -def _statetoken(s,names): - nonstate = 1 - parts = s.split("_") - for i in range(1,len(parts)): - if not parts[i] in names and parts[i] != 'ANY': break - if i > 1: - states = tuple(parts[1:i]) - else: - states = ('INITIAL',) - - if 'ANY' in states: - states = tuple(names) - - tokenname = "_".join(parts[i:]) - return (states,tokenname) - - -# ----------------------------------------------------------------------------- -# LexerReflect() -# -# This class represents information needed to build a lexer as extracted from a -# user's input file. -# ----------------------------------------------------------------------------- -class LexerReflect(object): - def __init__(self,ldict,log=None,reflags=0): - self.ldict = ldict - self.error_func = None - self.tokens = [] - self.reflags = reflags - self.stateinfo = { 'INITIAL' : 'inclusive'} - self.files = {} - self.error = 0 - - if log is None: - self.log = PlyLogger(sys.stderr) - else: - self.log = log - - # Get all of the basic information - def get_all(self): - self.get_tokens() - self.get_literals() - self.get_states() - self.get_rules() - - # Validate all of the information - def validate_all(self): - self.validate_tokens() - self.validate_literals() - self.validate_rules() - return self.error - - # Get the tokens map - def get_tokens(self): - tokens = self.ldict.get("tokens",None) - if not tokens: - self.log.error("No token list is defined") - self.error = 1 - return - - if not isinstance(tokens,(list, tuple)): - self.log.error("tokens must be a list or tuple") - self.error = 1 - return - - if not tokens: - self.log.error("tokens is empty") - self.error = 1 - return - - self.tokens = tokens - - # Validate the tokens - def validate_tokens(self): - terminals = {} - for n in self.tokens: - if not _is_identifier.match(n): - self.log.error("Bad token name '%s'",n) - self.error = 1 - if n in terminals: - self.log.warning("Token '%s' multiply defined", n) - terminals[n] = 1 - - # Get the literals specifier - def get_literals(self): - self.literals = self.ldict.get("literals","") - - # Validate literals - def validate_literals(self): - try: - for c in self.literals: - if not isinstance(c,StringTypes) or len(c) > 1: - self.log.error("Invalid literal %s. Must be a single character", repr(c)) - self.error = 1 - continue - - except TypeError: - self.log.error("Invalid literals specification. literals must be a sequence of characters") - self.error = 1 - - def get_states(self): - self.states = self.ldict.get("states",None) - # Build statemap - if self.states: - if not isinstance(self.states,(tuple,list)): - self.log.error("states must be defined as a tuple or list") - self.error = 1 - else: - for s in self.states: - if not isinstance(s,tuple) or len(s) != 2: - self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')",repr(s)) - self.error = 1 - continue - name, statetype = s - if not isinstance(name,StringTypes): - self.log.error("State name %s must be a string", repr(name)) - self.error = 1 - continue - if not (statetype == 'inclusive' or statetype == 'exclusive'): - self.log.error("State type for state %s must be 'inclusive' or 'exclusive'",name) - self.error = 1 - continue - if name in self.stateinfo: - self.log.error("State '%s' already defined",name) - self.error = 1 - continue - self.stateinfo[name] = statetype - - # Get all of the symbols with a t_ prefix and sort them into various - # categories (functions, strings, error functions, and ignore characters) - - def get_rules(self): - tsymbols = [f for f in self.ldict if f[:2] == 't_' ] - - # Now build up a list of functions and a list of strings - - self.toknames = { } # Mapping of symbols to token names - self.funcsym = { } # Symbols defined as functions - self.strsym = { } # Symbols defined as strings - self.ignore = { } # Ignore strings by state - self.errorf = { } # Error functions by state - - for s in self.stateinfo: - self.funcsym[s] = [] - self.strsym[s] = [] - - if len(tsymbols) == 0: - self.log.error("No rules of the form t_rulename are defined") - self.error = 1 - return - - for f in tsymbols: - t = self.ldict[f] - states, tokname = _statetoken(f,self.stateinfo) - self.toknames[f] = tokname - - if hasattr(t,"__call__"): - if tokname == 'error': - for s in states: - self.errorf[s] = t - elif tokname == 'ignore': - line = func_code(t).co_firstlineno - file = func_code(t).co_filename - self.log.error("%s:%d: Rule '%s' must be defined as a string",file,line,t.__name__) - self.error = 1 - else: - for s in states: - self.funcsym[s].append((f,t)) - elif isinstance(t, StringTypes): - if tokname == 'ignore': - for s in states: - self.ignore[s] = t - if "\\" in t: - self.log.warning("%s contains a literal backslash '\\'",f) - - elif tokname == 'error': - self.log.error("Rule '%s' must be defined as a function", f) - self.error = 1 - else: - for s in states: - self.strsym[s].append((f,t)) - else: - self.log.error("%s not defined as a function or string", f) - self.error = 1 - - # Sort the functions by line number - for f in self.funcsym.values(): - if sys.version_info[0] < 3: - f.sort(lambda x,y: cmp(func_code(x[1]).co_firstlineno,func_code(y[1]).co_firstlineno)) - else: - # Python 3.0 - f.sort(key=lambda x: func_code(x[1]).co_firstlineno) - - # Sort the strings by regular expression length - for s in self.strsym.values(): - if sys.version_info[0] < 3: - s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1]))) - else: - # Python 3.0 - s.sort(key=lambda x: len(x[1]),reverse=True) - - # Validate all of the t_rules collected - def validate_rules(self): - for state in self.stateinfo: - # Validate all rules defined by functions - - - - for fname, f in self.funcsym[state]: - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - self.files[file] = 1 - - tokname = self.toknames[fname] - if isinstance(f, types.MethodType): - reqargs = 2 - else: - reqargs = 1 - nargs = func_code(f).co_argcount - if nargs > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) - self.error = 1 - continue - - if nargs < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) - self.error = 1 - continue - - if not f.__doc__: - self.log.error("%s:%d: No regular expression defined for rule '%s'",file,line,f.__name__) - self.error = 1 - continue - - try: - c = re.compile("(?P<%s>%s)" % (fname,f.__doc__), re.VERBOSE | self.reflags) - if c.match(""): - self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file,line,f.__name__) - self.error = 1 - except re.error: - _etype, e, _etrace = sys.exc_info() - self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file,line,f.__name__,e) - if '#' in f.__doc__: - self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'",file,line, f.__name__) - self.error = 1 - - # Validate all rules defined by strings - for name,r in self.strsym[state]: - tokname = self.toknames[name] - if tokname == 'error': - self.log.error("Rule '%s' must be defined as a function", name) - self.error = 1 - continue - - if not tokname in self.tokens and tokname.find("ignore_") < 0: - self.log.error("Rule '%s' defined for an unspecified token %s",name,tokname) - self.error = 1 - continue - - try: - c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | self.reflags) - if (c.match("")): - self.log.error("Regular expression for rule '%s' matches empty string",name) - self.error = 1 - except re.error: - _etype, e, _etrace = sys.exc_info() - self.log.error("Invalid regular expression for rule '%s'. %s",name,e) - if '#' in r: - self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'",name) - self.error = 1 - - if not self.funcsym[state] and not self.strsym[state]: - self.log.error("No rules defined for state '%s'",state) - self.error = 1 - - # Validate the error function - efunc = self.errorf.get(state,None) - if efunc: - f = efunc - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - self.files[file] = 1 - - if isinstance(f, types.MethodType): - reqargs = 2 - else: - reqargs = 1 - nargs = func_code(f).co_argcount - if nargs > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) - self.error = 1 - - if nargs < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) - self.error = 1 - - for f in self.files: - self.validate_file(f) - - - # ----------------------------------------------------------------------------- - # validate_file() - # - # This checks to see if there are duplicated t_rulename() functions or strings - # in the parser input file. This is done using a simple regular expression - # match on each line in the given file. - # ----------------------------------------------------------------------------- - - def validate_file(self,filename): - import os.path - base,ext = os.path.splitext(filename) - if ext != '.py': return # No idea what the file is. Return OK - - try: - f = open(filename) - lines = f.readlines() - f.close() - except IOError: - return # Couldn't find the file. Don't worry about it - - fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') - sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') - - counthash = { } - linen = 1 - for l in lines: - m = fre.match(l) - if not m: - m = sre.match(l) - if m: - name = m.group(1) - prev = counthash.get(name) - if not prev: - counthash[name] = linen - else: - self.log.error("%s:%d: Rule %s redefined. Previously defined on line %d",filename,linen,name,prev) - self.error = 1 - linen += 1 - -# ----------------------------------------------------------------------------- -# lex(module) -# -# Build all of the regular expression rules from definitions in the supplied module -# ----------------------------------------------------------------------------- -def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0,outputdir="", debuglog=None, errorlog=None): - global lexer - ldict = None - stateinfo = { 'INITIAL' : 'inclusive'} - lexobj = Lexer() - lexobj.lexoptimize = optimize - global token,input - - if errorlog is None: - errorlog = PlyLogger(sys.stderr) - - if debug: - if debuglog is None: - debuglog = PlyLogger(sys.stderr) - - # Get the module dictionary used for the lexer - if object: module = object - - if module: - _items = [(k,getattr(module,k)) for k in dir(module)] - ldict = dict(_items) - else: - ldict = get_caller_module_dict(2) - - # Collect parser information from the dictionary - linfo = LexerReflect(ldict,log=errorlog,reflags=reflags) - linfo.get_all() - if not optimize: - if linfo.validate_all(): - raise SyntaxError("Can't build lexer") - - if optimize and lextab: - try: - lexobj.readtab(lextab,ldict) - token = lexobj.token - input = lexobj.input - lexer = lexobj - return lexobj - - except ImportError: - pass - - # Dump some basic debugging information - if debug: - debuglog.info("lex: tokens = %r", linfo.tokens) - debuglog.info("lex: literals = %r", linfo.literals) - debuglog.info("lex: states = %r", linfo.stateinfo) - - # Build a dictionary of valid token names - lexobj.lextokens = { } - for n in linfo.tokens: - lexobj.lextokens[n] = 1 - - # Get literals specification - if isinstance(linfo.literals,(list,tuple)): - lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) - else: - lexobj.lexliterals = linfo.literals - - # Get the stateinfo dictionary - stateinfo = linfo.stateinfo - - regexs = { } - # Build the master regular expressions - for state in stateinfo: - regex_list = [] - - # Add rules defined by functions first - for fname, f in linfo.funcsym[state]: - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - regex_list.append("(?P<%s>%s)" % (fname,f.__doc__)) - if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f.__doc__, state) - - # Now add all of the simple rules - for name,r in linfo.strsym[state]: - regex_list.append("(?P<%s>%s)" % (name,r)) - if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state) - - regexs[state] = regex_list - - # Build the master regular expressions - - if debug: - debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====") - - for state in regexs: - lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,linfo.toknames) - lexobj.lexstatere[state] = lexre - lexobj.lexstateretext[state] = re_text - lexobj.lexstaterenames[state] = re_names - if debug: - for i in range(len(re_text)): - debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_text[i]) - - # For inclusive states, we need to add the regular expressions from the INITIAL state - for state,stype in stateinfo.items(): - if state != "INITIAL" and stype == 'inclusive': - lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) - lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) - lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) - - lexobj.lexstateinfo = stateinfo - lexobj.lexre = lexobj.lexstatere["INITIAL"] - lexobj.lexretext = lexobj.lexstateretext["INITIAL"] - lexobj.lexreflags = reflags - - # Set up ignore variables - lexobj.lexstateignore = linfo.ignore - lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","") - - # Set up error functions - lexobj.lexstateerrorf = linfo.errorf - lexobj.lexerrorf = linfo.errorf.get("INITIAL",None) - if not lexobj.lexerrorf: - errorlog.warning("No t_error rule is defined") - - # Check state information for ignore and error rules - for s,stype in stateinfo.items(): - if stype == 'exclusive': - if not s in linfo.errorf: - errorlog.warning("No error rule is defined for exclusive state '%s'", s) - if not s in linfo.ignore and lexobj.lexignore: - errorlog.warning("No ignore rule is defined for exclusive state '%s'", s) - elif stype == 'inclusive': - if not s in linfo.errorf: - linfo.errorf[s] = linfo.errorf.get("INITIAL",None) - if not s in linfo.ignore: - linfo.ignore[s] = linfo.ignore.get("INITIAL","") - - # Create global versions of the token() and input() functions - token = lexobj.token - input = lexobj.input - lexer = lexobj - - # If in optimize mode, we write the lextab - if lextab and optimize: - lexobj.writetab(lextab,outputdir) - - return lexobj - -# ----------------------------------------------------------------------------- -# runmain() -# -# This runs the lexer as a main program -# ----------------------------------------------------------------------------- - -def runmain(lexer=None,data=None): - if not data: - try: - filename = sys.argv[1] - f = open(filename) - data = f.read() - f.close() - except IndexError: - sys.stdout.write("Reading from standard input (type EOF to end):\n") - data = sys.stdin.read() - - if lexer: - _input = lexer.input - else: - _input = input - _input(data) - if lexer: - _token = lexer.token - else: - _token = token - - while 1: - tok = _token() - if not tok: break - sys.stdout.write("(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno,tok.lexpos)) - -# ----------------------------------------------------------------------------- -# @TOKEN(regex) -# -# This decorator function can be used to set the regex expression on a function -# when its docstring might need to be set in an alternative way -# ----------------------------------------------------------------------------- - -def TOKEN(r): - def set_doc(f): - if hasattr(r,"__call__"): - f.__doc__ = r.__doc__ - else: - f.__doc__ = r - return f - return set_doc - -# Alternative spelling of the TOKEN decorator -Token = TOKEN - diff --git a/ply/ply/yacc.py b/ply/ply/yacc.py deleted file mode 100644 index f70439e..0000000 --- a/ply/ply/yacc.py +++ /dev/null @@ -1,3276 +0,0 @@ -# ----------------------------------------------------------------------------- -# ply: yacc.py -# -# Copyright (C) 2001-2011, -# David M. Beazley (Dabeaz LLC) -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * Neither the name of the David Beazley or Dabeaz LLC may be used to -# endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ----------------------------------------------------------------------------- -# -# This implements an LR parser that is constructed from grammar rules defined -# as Python functions. The grammer is specified by supplying the BNF inside -# Python documentation strings. The inspiration for this technique was borrowed -# from John Aycock's Spark parsing system. PLY might be viewed as cross between -# Spark and the GNU bison utility. -# -# The current implementation is only somewhat object-oriented. The -# LR parser itself is defined in terms of an object (which allows multiple -# parsers to co-exist). However, most of the variables used during table -# construction are defined in terms of global variables. Users shouldn't -# notice unless they are trying to define multiple parsers at the same -# time using threads (in which case they should have their head examined). -# -# This implementation supports both SLR and LALR(1) parsing. LALR(1) -# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), -# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, -# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced -# by the more efficient DeRemer and Pennello algorithm. -# -# :::::::: WARNING ::::::: -# -# Construction of LR parsing tables is fairly complicated and expensive. -# To make this module run fast, a *LOT* of work has been put into -# optimization---often at the expensive of readability and what might -# consider to be good Python "coding style." Modify the code at your -# own risk! -# ---------------------------------------------------------------------------- - -__version__ = "3.4" -__tabversion__ = "3.2" # Table version - -#----------------------------------------------------------------------------- -# === User configurable parameters === -# -# Change these to modify the default behavior of yacc (if you wish) -#----------------------------------------------------------------------------- - -yaccdebug = 1 # Debugging mode. If set, yacc generates a - # a 'parser.out' file in the current directory - -debug_file = 'parser.out' # Default name of the debugging file -tab_module = 'parsetab' # Default name of the table module -default_lr = 'LALR' # Default LR table generation method - -error_count = 3 # Number of symbols that must be shifted to leave recovery mode - -yaccdevel = 0 # Set to True if developing yacc. This turns off optimized - # implementations of certain functions. - -resultlimit = 40 # Size limit of results when running in debug mode. - -pickle_protocol = 0 # Protocol to use when writing pickle files - -import re, types, sys, os.path - -# Compatibility function for python 2.6/3.0 -if sys.version_info[0] < 3: - def func_code(f): - return f.func_code -else: - def func_code(f): - return f.__code__ - -# Compatibility -try: - MAXINT = sys.maxint -except AttributeError: - MAXINT = sys.maxsize - -# Python 2.x/3.0 compatibility. -def load_ply_lex(): - if sys.version_info[0] < 3: - import lex - else: - import ply.lex as lex - return lex - -# This object is a stand-in for a logging object created by the -# logging module. PLY will use this by default to create things -# such as the parser.out file. If a user wants more detailed -# information, they can create their own logging object and pass -# it into PLY. - -class PlyLogger(object): - def __init__(self,f): - self.f = f - def debug(self,msg,*args,**kwargs): - self.f.write((msg % args) + "\n") - info = debug - - def warning(self,msg,*args,**kwargs): - self.f.write("WARNING: "+ (msg % args) + "\n") - - def error(self,msg,*args,**kwargs): - self.f.write("ERROR: " + (msg % args) + "\n") - - critical = debug - -# Null logger is used when no output is generated. Does nothing. -class NullLogger(object): - def __getattribute__(self,name): - return self - def __call__(self,*args,**kwargs): - return self - -# Exception raised for yacc-related errors -class YaccError(Exception): pass - -# Format the result message that the parser produces when running in debug mode. -def format_result(r): - repr_str = repr(r) - if '\n' in repr_str: repr_str = repr(repr_str) - if len(repr_str) > resultlimit: - repr_str = repr_str[:resultlimit]+" ..." - result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str) - return result - - -# Format stack entries when the parser is running in debug mode -def format_stack_entry(r): - repr_str = repr(r) - if '\n' in repr_str: repr_str = repr(repr_str) - if len(repr_str) < 16: - return repr_str - else: - return "<%s @ 0x%x>" % (type(r).__name__,id(r)) - -#----------------------------------------------------------------------------- -# === LR Parsing Engine === -# -# The following classes are used for the LR parser itself. These are not -# used during table construction and are independent of the actual LR -# table generation algorithm -#----------------------------------------------------------------------------- - -# This class is used to hold non-terminal grammar symbols during parsing. -# It normally has the following attributes set: -# .type = Grammar symbol type -# .value = Symbol value -# .lineno = Starting line number -# .endlineno = Ending line number (optional, set automatically) -# .lexpos = Starting lex position -# .endlexpos = Ending lex position (optional, set automatically) - -class YaccSymbol: - def __str__(self): return self.type - def __repr__(self): return str(self) - -# This class is a wrapper around the objects actually passed to each -# grammar rule. Index lookup and assignment actually assign the -# .value attribute of the underlying YaccSymbol object. -# The lineno() method returns the line number of a given -# item (or 0 if not defined). The linespan() method returns -# a tuple of (startline,endline) representing the range of lines -# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) -# representing the range of positional information for a symbol. - -class YaccProduction: - def __init__(self,s,stack=None): - self.slice = s - self.stack = stack - self.lexer = None - self.parser= None - def __getitem__(self,n): - if n >= 0: return self.slice[n].value - else: return self.stack[n].value - - def __setitem__(self,n,v): - self.slice[n].value = v - - def __getslice__(self,i,j): - return [s.value for s in self.slice[i:j]] - - def __len__(self): - return len(self.slice) - - def lineno(self,n): - return getattr(self.slice[n],"lineno",0) - - def set_lineno(self,n,lineno): - self.slice[n].lineno = lineno - - def linespan(self,n): - startline = getattr(self.slice[n],"lineno",0) - endline = getattr(self.slice[n],"endlineno",startline) - return startline,endline - - def lexpos(self,n): - return getattr(self.slice[n],"lexpos",0) - - def lexspan(self,n): - startpos = getattr(self.slice[n],"lexpos",0) - endpos = getattr(self.slice[n],"endlexpos",startpos) - return startpos,endpos - - def error(self): - raise SyntaxError - - -# ----------------------------------------------------------------------------- -# == LRParser == -# -# The LR Parsing engine. -# ----------------------------------------------------------------------------- - -class LRParser: - def __init__(self,lrtab,errorf): - self.productions = lrtab.lr_productions - self.action = lrtab.lr_action - self.goto = lrtab.lr_goto - self.errorfunc = errorf - - def errok(self): - self.errorok = 1 - - def restart(self): - del self.statestack[:] - del self.symstack[:] - sym = YaccSymbol() - sym.type = '$end' - self.symstack.append(sym) - self.statestack.append(0) - - def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): - if debug or yaccdevel: - if isinstance(debug,int): - debug = PlyLogger(sys.stderr) - return self.parsedebug(input,lexer,debug,tracking,tokenfunc) - elif tracking: - return self.parseopt(input,lexer,debug,tracking,tokenfunc) - else: - return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc) - - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # parsedebug(). - # - # This is the debugging enabled version of parse(). All changes made to the - # parsing engine should be made here. For the non-debugging version, - # copy this code to a method parseopt() and delete all of the sections - # enclosed in: - # - # #--! DEBUG - # statements - # #--! DEBUG - # - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery - - # --! DEBUG - debug.info("PLY: PARSE DEBUG START") - # --! DEBUG - - # If no lexer was given, we will try to use the lex module - if not lexer: - lex = load_ply_lex() - lexer = lex.lexer - - # Set up the lexer and parser objects on pslice - pslice.lexer = lexer - pslice.parser = self - - # If input was supplied, pass to lexer - if input is not None: - lexer.input(input) - - if tokenfunc is None: - # Tokenize function - get_token = lexer.token - else: - get_token = tokenfunc - - # Set up the state and symbol stacks - - statestack = [ ] # Stack of parsing states - self.statestack = statestack - symstack = [ ] # Stack of grammar symbols - self.symstack = symstack - - pslice.stack = symstack # Put in the production - errtoken = None # Err token - - # The start state is assumed to be (0,$end) - - statestack.append(0) - sym = YaccSymbol() - sym.type = "$end" - symstack.append(sym) - state = 0 - while 1: - # Get the next symbol on the input. If a lookahead symbol - # is already set, we just use that. Otherwise, we'll pull - # the next token off of the lookaheadstack or from the lexer - - # --! DEBUG - debug.debug('') - debug.debug('State : %s', state) - # --! DEBUG - - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = "$end" - - # --! DEBUG - debug.debug('Stack : %s', - ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - # --! DEBUG - - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) - - if t is not None: - if t > 0: - # shift a symbol on the stack - statestack.append(t) - state = t - - # --! DEBUG - debug.debug("Action : Shift and goto state %s", t) - # --! DEBUG - - symstack.append(lookahead) - lookahead = None - - # Decrease error count on successful shift - if errorcount: errorcount -=1 - continue - - if t < 0: - # reduce a symbol on the stack, emit a production - p = prod[-t] - pname = p.name - plen = p.len - - # Get production function - sym = YaccSymbol() - sym.type = pname # Production name - sym.value = None - - # --! DEBUG - if plen: - debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t) - else: - debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t) - - # --! DEBUG - - if plen: - targ = symstack[-plen-1:] - targ[0] = sym - - # --! TRACKING - if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1,"endlineno",t1.lineno) - sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) - - # --! TRACKING - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # below as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - del symstack[-plen:] - del statestack[-plen:] - p.callable(pslice) - # --! DEBUG - debug.info("Result : %s", format_result(pslice[0])) - # --! DEBUG - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = 0 - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - else: - - # --! TRACKING - if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - # --! TRACKING - - targ = [ sym ] - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # above as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - p.callable(pslice) - # --! DEBUG - debug.info("Result : %s", format_result(pslice[0])) - # --! DEBUG - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = 0 - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - if t == 0: - n = symstack[-1] - result = getattr(n,"value",None) - # --! DEBUG - debug.info("Done : Returning %s", format_result(result)) - debug.info("PLY: PARSE DEBUG END") - # --! DEBUG - return result - - if t == None: - - # --! DEBUG - debug.error('Error : %s', - ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - # --! DEBUG - - # We have some kind of parsing error here. To handle - # this, we are going to push the current token onto - # the tokenstack and replace it with an 'error' token. - # If there are any synchronization rules, they may - # catch it. - # - # In addition to pushing the error token, we call call - # the user defined p_error() function if this is the - # first syntax error. This function is only called if - # errorcount == 0. - if errorcount == 0 or self.errorok: - errorcount = error_count - self.errorok = 0 - errtoken = lookahead - if errtoken.type == "$end": - errtoken = None # End of file! - if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): - errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - - if self.errorok: - # User must have done some kind of panic - # mode recovery on their own. The - # returned token is the next lookahead - lookahead = tok - errtoken = None - continue - else: - if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 - if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) - else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) - else: - sys.stderr.write("yacc: Parse error in input. EOF\n") - return - - else: - errorcount = error_count - - # case 1: the statestack only has 1 entry on it. If we're in this state, the - # entire parse has been rolled back and we're completely hosed. The token is - # discarded and we just keep going. - - if len(statestack) <= 1 and lookahead.type != "$end": - lookahead = None - errtoken = None - state = 0 - # Nuke the pushback stack - del lookaheadstack[:] - continue - - # case 2: the statestack has a couple of entries on it, but we're - # at the end of the file. nuke the top entry and generate an error token - - # Start nuking entries on the stack - if lookahead.type == "$end": - # Whoa. We're really hosed here. Bail out - return - - if lookahead.type != 'error': - sym = symstack[-1] - if sym.type == 'error': - # Hmmm. Error is on top of stack, we'll just nuke input - # symbol and continue - lookahead = None - continue - t = YaccSymbol() - t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno - t.value = lookahead - lookaheadstack.append(lookahead) - lookahead = t - else: - symstack.pop() - statestack.pop() - state = statestack[-1] # Potential bug fix - - continue - - # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # parseopt(). - # - # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY. - # Edit the debug version above, then copy any modifications to the method - # below while removing #--! DEBUG sections. - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - - def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery - - # If no lexer was given, we will try to use the lex module - if not lexer: - lex = load_ply_lex() - lexer = lex.lexer - - # Set up the lexer and parser objects on pslice - pslice.lexer = lexer - pslice.parser = self - - # If input was supplied, pass to lexer - if input is not None: - lexer.input(input) - - if tokenfunc is None: - # Tokenize function - get_token = lexer.token - else: - get_token = tokenfunc - - # Set up the state and symbol stacks - - statestack = [ ] # Stack of parsing states - self.statestack = statestack - symstack = [ ] # Stack of grammar symbols - self.symstack = symstack - - pslice.stack = symstack # Put in the production - errtoken = None # Err token - - # The start state is assumed to be (0,$end) - - statestack.append(0) - sym = YaccSymbol() - sym.type = '$end' - symstack.append(sym) - state = 0 - while 1: - # Get the next symbol on the input. If a lookahead symbol - # is already set, we just use that. Otherwise, we'll pull - # the next token off of the lookaheadstack or from the lexer - - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' - - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) - - if t is not None: - if t > 0: - # shift a symbol on the stack - statestack.append(t) - state = t - - symstack.append(lookahead) - lookahead = None - - # Decrease error count on successful shift - if errorcount: errorcount -=1 - continue - - if t < 0: - # reduce a symbol on the stack, emit a production - p = prod[-t] - pname = p.name - plen = p.len - - # Get production function - sym = YaccSymbol() - sym.type = pname # Production name - sym.value = None - - if plen: - targ = symstack[-plen-1:] - targ[0] = sym - - # --! TRACKING - if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1,"endlineno",t1.lineno) - sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) - - # --! TRACKING - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # below as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - del symstack[-plen:] - del statestack[-plen:] - p.callable(pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = 0 - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - else: - - # --! TRACKING - if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - # --! TRACKING - - targ = [ sym ] - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # above as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - p.callable(pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = 0 - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - if t == 0: - n = symstack[-1] - return getattr(n,"value",None) - - if t == None: - - # We have some kind of parsing error here. To handle - # this, we are going to push the current token onto - # the tokenstack and replace it with an 'error' token. - # If there are any synchronization rules, they may - # catch it. - # - # In addition to pushing the error token, we call call - # the user defined p_error() function if this is the - # first syntax error. This function is only called if - # errorcount == 0. - if errorcount == 0 or self.errorok: - errorcount = error_count - self.errorok = 0 - errtoken = lookahead - if errtoken.type == '$end': - errtoken = None # End of file! - if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): - errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - - if self.errorok: - # User must have done some kind of panic - # mode recovery on their own. The - # returned token is the next lookahead - lookahead = tok - errtoken = None - continue - else: - if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 - if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) - else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) - else: - sys.stderr.write("yacc: Parse error in input. EOF\n") - return - - else: - errorcount = error_count - - # case 1: the statestack only has 1 entry on it. If we're in this state, the - # entire parse has been rolled back and we're completely hosed. The token is - # discarded and we just keep going. - - if len(statestack) <= 1 and lookahead.type != '$end': - lookahead = None - errtoken = None - state = 0 - # Nuke the pushback stack - del lookaheadstack[:] - continue - - # case 2: the statestack has a couple of entries on it, but we're - # at the end of the file. nuke the top entry and generate an error token - - # Start nuking entries on the stack - if lookahead.type == '$end': - # Whoa. We're really hosed here. Bail out - return - - if lookahead.type != 'error': - sym = symstack[-1] - if sym.type == 'error': - # Hmmm. Error is on top of stack, we'll just nuke input - # symbol and continue - lookahead = None - continue - t = YaccSymbol() - t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno - t.value = lookahead - lookaheadstack.append(lookahead) - lookahead = t - else: - symstack.pop() - statestack.pop() - state = statestack[-1] # Potential bug fix - - continue - - # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # parseopt_notrack(). - # - # Optimized version of parseopt() with line number tracking removed. - # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove - # code in the #--! TRACKING sections - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery - - # If no lexer was given, we will try to use the lex module - if not lexer: - lex = load_ply_lex() - lexer = lex.lexer - - # Set up the lexer and parser objects on pslice - pslice.lexer = lexer - pslice.parser = self - - # If input was supplied, pass to lexer - if input is not None: - lexer.input(input) - - if tokenfunc is None: - # Tokenize function - get_token = lexer.token - else: - get_token = tokenfunc - - # Set up the state and symbol stacks - - statestack = [ ] # Stack of parsing states - self.statestack = statestack - symstack = [ ] # Stack of grammar symbols - self.symstack = symstack - - pslice.stack = symstack # Put in the production - errtoken = None # Err token - - # The start state is assumed to be (0,$end) - - statestack.append(0) - sym = YaccSymbol() - sym.type = '$end' - symstack.append(sym) - state = 0 - while 1: - # Get the next symbol on the input. If a lookahead symbol - # is already set, we just use that. Otherwise, we'll pull - # the next token off of the lookaheadstack or from the lexer - - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' - - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) - - if t is not None: - if t > 0: - # shift a symbol on the stack - statestack.append(t) - state = t - - symstack.append(lookahead) - lookahead = None - - # Decrease error count on successful shift - if errorcount: errorcount -=1 - continue - - if t < 0: - # reduce a symbol on the stack, emit a production - p = prod[-t] - pname = p.name - plen = p.len - - # Get production function - sym = YaccSymbol() - sym.type = pname # Production name - sym.value = None - - if plen: - targ = symstack[-plen-1:] - targ[0] = sym - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # below as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - del symstack[-plen:] - del statestack[-plen:] - p.callable(pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = 0 - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - else: - - targ = [ sym ] - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # above as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - p.callable(pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = 0 - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - if t == 0: - n = symstack[-1] - return getattr(n,"value",None) - - if t == None: - - # We have some kind of parsing error here. To handle - # this, we are going to push the current token onto - # the tokenstack and replace it with an 'error' token. - # If there are any synchronization rules, they may - # catch it. - # - # In addition to pushing the error token, we call call - # the user defined p_error() function if this is the - # first syntax error. This function is only called if - # errorcount == 0. - if errorcount == 0 or self.errorok: - errorcount = error_count - self.errorok = 0 - errtoken = lookahead - if errtoken.type == '$end': - errtoken = None # End of file! - if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): - errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - - if self.errorok: - # User must have done some kind of panic - # mode recovery on their own. The - # returned token is the next lookahead - lookahead = tok - errtoken = None - continue - else: - if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 - if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) - else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) - else: - sys.stderr.write("yacc: Parse error in input. EOF\n") - return - - else: - errorcount = error_count - - # case 1: the statestack only has 1 entry on it. If we're in this state, the - # entire parse has been rolled back and we're completely hosed. The token is - # discarded and we just keep going. - - if len(statestack) <= 1 and lookahead.type != '$end': - lookahead = None - errtoken = None - state = 0 - # Nuke the pushback stack - del lookaheadstack[:] - continue - - # case 2: the statestack has a couple of entries on it, but we're - # at the end of the file. nuke the top entry and generate an error token - - # Start nuking entries on the stack - if lookahead.type == '$end': - # Whoa. We're really hosed here. Bail out - return - - if lookahead.type != 'error': - sym = symstack[-1] - if sym.type == 'error': - # Hmmm. Error is on top of stack, we'll just nuke input - # symbol and continue - lookahead = None - continue - t = YaccSymbol() - t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno - t.value = lookahead - lookaheadstack.append(lookahead) - lookahead = t - else: - symstack.pop() - statestack.pop() - state = statestack[-1] # Potential bug fix - - continue - - # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") - -# ----------------------------------------------------------------------------- -# === Grammar Representation === -# -# The following functions, classes, and variables are used to represent and -# manipulate the rules that make up a grammar. -# ----------------------------------------------------------------------------- - -import re - -# regex matching identifiers -_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') - -# ----------------------------------------------------------------------------- -# class Production: -# -# This class stores the raw information about a single production or grammar rule. -# A grammar rule refers to a specification such as this: -# -# expr : expr PLUS term -# -# Here are the basic attributes defined on all productions -# -# name - Name of the production. For example 'expr' -# prod - A list of symbols on the right side ['expr','PLUS','term'] -# prec - Production precedence level -# number - Production number. -# func - Function that executes on reduce -# file - File where production function is defined -# lineno - Line number where production function is defined -# -# The following attributes are defined or optional. -# -# len - Length of the production (number of symbols on right hand side) -# usyms - Set of unique symbols found in the production -# ----------------------------------------------------------------------------- - -class Production(object): - reduced = 0 - def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0): - self.name = name - self.prod = tuple(prod) - self.number = number - self.func = func - self.callable = None - self.file = file - self.line = line - self.prec = precedence - - # Internal settings used during table construction - - self.len = len(self.prod) # Length of the production - - # Create a list of unique production symbols used in the production - self.usyms = [ ] - for s in self.prod: - if s not in self.usyms: - self.usyms.append(s) - - # List of all LR items for the production - self.lr_items = [] - self.lr_next = None - - # Create a string representation - if self.prod: - self.str = "%s -> %s" % (self.name," ".join(self.prod)) - else: - self.str = "%s -> " % self.name - - def __str__(self): - return self.str - - def __repr__(self): - return "Production("+str(self)+")" - - def __len__(self): - return len(self.prod) - - def __nonzero__(self): - return 1 - - def __getitem__(self,index): - return self.prod[index] - - # Return the nth lr_item from the production (or None if at the end) - def lr_item(self,n): - if n > len(self.prod): return None - p = LRItem(self,n) - - # Precompute the list of productions immediately following. Hack. Remove later - try: - p.lr_after = Prodnames[p.prod[n+1]] - except (IndexError,KeyError): - p.lr_after = [] - try: - p.lr_before = p.prod[n-1] - except IndexError: - p.lr_before = None - - return p - - # Bind the production function name to a callable - def bind(self,pdict): - if self.func: - self.callable = pdict[self.func] - -# This class serves as a minimal standin for Production objects when -# reading table data from files. It only contains information -# actually used by the LR parsing engine, plus some additional -# debugging information. -class MiniProduction(object): - def __init__(self,str,name,len,func,file,line): - self.name = name - self.len = len - self.func = func - self.callable = None - self.file = file - self.line = line - self.str = str - def __str__(self): - return self.str - def __repr__(self): - return "MiniProduction(%s)" % self.str - - # Bind the production function name to a callable - def bind(self,pdict): - if self.func: - self.callable = pdict[self.func] - - -# ----------------------------------------------------------------------------- -# class LRItem -# -# This class represents a specific stage of parsing a production rule. For -# example: -# -# expr : expr . PLUS term -# -# In the above, the "." represents the current location of the parse. Here -# basic attributes: -# -# name - Name of the production. For example 'expr' -# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] -# number - Production number. -# -# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' -# then lr_next refers to 'expr -> expr PLUS . term' -# lr_index - LR item index (location of the ".") in the prod list. -# lookaheads - LALR lookahead symbols for this item -# len - Length of the production (number of symbols on right hand side) -# lr_after - List of all productions that immediately follow -# lr_before - Grammar symbol immediately before -# ----------------------------------------------------------------------------- - -class LRItem(object): - def __init__(self,p,n): - self.name = p.name - self.prod = list(p.prod) - self.number = p.number - self.lr_index = n - self.lookaheads = { } - self.prod.insert(n,".") - self.prod = tuple(self.prod) - self.len = len(self.prod) - self.usyms = p.usyms - - def __str__(self): - if self.prod: - s = "%s -> %s" % (self.name," ".join(self.prod)) - else: - s = "%s -> " % self.name - return s - - def __repr__(self): - return "LRItem("+str(self)+")" - -# ----------------------------------------------------------------------------- -# rightmost_terminal() -# -# Return the rightmost terminal from a list of symbols. Used in add_production() -# ----------------------------------------------------------------------------- -def rightmost_terminal(symbols, terminals): - i = len(symbols) - 1 - while i >= 0: - if symbols[i] in terminals: - return symbols[i] - i -= 1 - return None - -# ----------------------------------------------------------------------------- -# === GRAMMAR CLASS === -# -# The following class represents the contents of the specified grammar along -# with various computed properties such as first sets, follow sets, LR items, etc. -# This data is used for critical parts of the table generation process later. -# ----------------------------------------------------------------------------- - -class GrammarError(YaccError): pass - -class Grammar(object): - def __init__(self,terminals): - self.Productions = [None] # A list of all of the productions. The first - # entry is always reserved for the purpose of - # building an augmented grammar - - self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all - # productions of that nonterminal. - - self.Prodmap = { } # A dictionary that is only used to detect duplicate - # productions. - - self.Terminals = { } # A dictionary mapping the names of terminal symbols to a - # list of the rules where they are used. - - for term in terminals: - self.Terminals[term] = [] - - self.Terminals['error'] = [] - - self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list - # of rule numbers where they are used. - - self.First = { } # A dictionary of precomputed FIRST(x) symbols - - self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols - - self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the - # form ('right',level) or ('nonassoc', level) or ('left',level) - - self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer. - # This is only used to provide error checking and to generate - # a warning about unused precedence rules. - - self.Start = None # Starting symbol for the grammar - - - def __len__(self): - return len(self.Productions) - - def __getitem__(self,index): - return self.Productions[index] - - # ----------------------------------------------------------------------------- - # set_precedence() - # - # Sets the precedence for a given terminal. assoc is the associativity such as - # 'left','right', or 'nonassoc'. level is a numeric level. - # - # ----------------------------------------------------------------------------- - - def set_precedence(self,term,assoc,level): - assert self.Productions == [None],"Must call set_precedence() before add_production()" - if term in self.Precedence: - raise GrammarError("Precedence already specified for terminal '%s'" % term) - if assoc not in ['left','right','nonassoc']: - raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") - self.Precedence[term] = (assoc,level) - - # ----------------------------------------------------------------------------- - # add_production() - # - # Given an action function, this function assembles a production rule and - # computes its precedence level. - # - # The production rule is supplied as a list of symbols. For example, - # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and - # symbols ['expr','PLUS','term']. - # - # Precedence is determined by the precedence of the right-most non-terminal - # or the precedence of a terminal specified by %prec. - # - # A variety of error checks are performed to make sure production symbols - # are valid and that %prec is used correctly. - # ----------------------------------------------------------------------------- - - def add_production(self,prodname,syms,func=None,file='',line=0): - - if prodname in self.Terminals: - raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname)) - if prodname == 'error': - raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname)) - if not _is_identifier.match(prodname): - raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname)) - - # Look for literal tokens - for n,s in enumerate(syms): - if s[0] in "'\"": - try: - c = eval(s) - if (len(c) > 1): - raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname)) - if not c in self.Terminals: - self.Terminals[c] = [] - syms[n] = c - continue - except SyntaxError: - pass - if not _is_identifier.match(s) and s != '%prec': - raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname)) - - # Determine the precedence level - if '%prec' in syms: - if syms[-1] == '%prec': - raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line)) - if syms[-2] != '%prec': - raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line)) - precname = syms[-1] - prodprec = self.Precedence.get(precname,None) - if not prodprec: - raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname)) - else: - self.UsedPrecedence[precname] = 1 - del syms[-2:] # Drop %prec from the rule - else: - # If no %prec, precedence is determined by the rightmost terminal symbol - precname = rightmost_terminal(syms,self.Terminals) - prodprec = self.Precedence.get(precname,('right',0)) - - # See if the rule is already in the rulemap - map = "%s -> %s" % (prodname,syms) - if map in self.Prodmap: - m = self.Prodmap[map] - raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) + - "Previous definition at %s:%d" % (m.file, m.line)) - - # From this point on, everything is valid. Create a new Production instance - pnumber = len(self.Productions) - if not prodname in self.Nonterminals: - self.Nonterminals[prodname] = [ ] - - # Add the production number to Terminals and Nonterminals - for t in syms: - if t in self.Terminals: - self.Terminals[t].append(pnumber) - else: - if not t in self.Nonterminals: - self.Nonterminals[t] = [ ] - self.Nonterminals[t].append(pnumber) - - # Create a production and add it to the list of productions - p = Production(pnumber,prodname,syms,prodprec,func,file,line) - self.Productions.append(p) - self.Prodmap[map] = p - - # Add to the global productions list - try: - self.Prodnames[prodname].append(p) - except KeyError: - self.Prodnames[prodname] = [ p ] - return 0 - - # ----------------------------------------------------------------------------- - # set_start() - # - # Sets the starting symbol and creates the augmented grammar. Production - # rule 0 is S' -> start where start is the start symbol. - # ----------------------------------------------------------------------------- - - def set_start(self,start=None): - if not start: - start = self.Productions[1].name - if start not in self.Nonterminals: - raise GrammarError("start symbol %s undefined" % start) - self.Productions[0] = Production(0,"S'",[start]) - self.Nonterminals[start].append(0) - self.Start = start - - # ----------------------------------------------------------------------------- - # find_unreachable() - # - # Find all of the nonterminal symbols that can't be reached from the starting - # symbol. Returns a list of nonterminals that can't be reached. - # ----------------------------------------------------------------------------- - - def find_unreachable(self): - - # Mark all symbols that are reachable from a symbol s - def mark_reachable_from(s): - if reachable[s]: - # We've already reached symbol s. - return - reachable[s] = 1 - for p in self.Prodnames.get(s,[]): - for r in p.prod: - mark_reachable_from(r) - - reachable = { } - for s in list(self.Terminals) + list(self.Nonterminals): - reachable[s] = 0 - - mark_reachable_from( self.Productions[0].prod[0] ) - - return [s for s in list(self.Nonterminals) - if not reachable[s]] - - # ----------------------------------------------------------------------------- - # infinite_cycles() - # - # This function looks at the various parsing rules and tries to detect - # infinite recursion cycles (grammar rules where there is no possible way - # to derive a string of only terminals). - # ----------------------------------------------------------------------------- - - def infinite_cycles(self): - terminates = {} - - # Terminals: - for t in self.Terminals: - terminates[t] = 1 - - terminates['$end'] = 1 - - # Nonterminals: - - # Initialize to false: - for n in self.Nonterminals: - terminates[n] = 0 - - # Then propagate termination until no change: - while 1: - some_change = 0 - for (n,pl) in self.Prodnames.items(): - # Nonterminal n terminates iff any of its productions terminates. - for p in pl: - # Production p terminates iff all of its rhs symbols terminate. - for s in p.prod: - if not terminates[s]: - # The symbol s does not terminate, - # so production p does not terminate. - p_terminates = 0 - break - else: - # didn't break from the loop, - # so every symbol s terminates - # so production p terminates. - p_terminates = 1 - - if p_terminates: - # symbol n terminates! - if not terminates[n]: - terminates[n] = 1 - some_change = 1 - # Don't need to consider any more productions for this n. - break - - if not some_change: - break - - infinite = [] - for (s,term) in terminates.items(): - if not term: - if not s in self.Prodnames and not s in self.Terminals and s != 'error': - # s is used-but-not-defined, and we've already warned of that, - # so it would be overkill to say that it's also non-terminating. - pass - else: - infinite.append(s) - - return infinite - - - # ----------------------------------------------------------------------------- - # undefined_symbols() - # - # Find all symbols that were used the grammar, but not defined as tokens or - # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol - # and prod is the production where the symbol was used. - # ----------------------------------------------------------------------------- - def undefined_symbols(self): - result = [] - for p in self.Productions: - if not p: continue - - for s in p.prod: - if not s in self.Prodnames and not s in self.Terminals and s != 'error': - result.append((s,p)) - return result - - # ----------------------------------------------------------------------------- - # unused_terminals() - # - # Find all terminals that were defined, but not used by the grammar. Returns - # a list of all symbols. - # ----------------------------------------------------------------------------- - def unused_terminals(self): - unused_tok = [] - for s,v in self.Terminals.items(): - if s != 'error' and not v: - unused_tok.append(s) - - return unused_tok - - # ------------------------------------------------------------------------------ - # unused_rules() - # - # Find all grammar rules that were defined, but not used (maybe not reachable) - # Returns a list of productions. - # ------------------------------------------------------------------------------ - - def unused_rules(self): - unused_prod = [] - for s,v in self.Nonterminals.items(): - if not v: - p = self.Prodnames[s][0] - unused_prod.append(p) - return unused_prod - - # ----------------------------------------------------------------------------- - # unused_precedence() - # - # Returns a list of tuples (term,precedence) corresponding to precedence - # rules that were never used by the grammar. term is the name of the terminal - # on which precedence was applied and precedence is a string such as 'left' or - # 'right' corresponding to the type of precedence. - # ----------------------------------------------------------------------------- - - def unused_precedence(self): - unused = [] - for termname in self.Precedence: - if not (termname in self.Terminals or termname in self.UsedPrecedence): - unused.append((termname,self.Precedence[termname][0])) - - return unused - - # ------------------------------------------------------------------------- - # _first() - # - # Compute the value of FIRST1(beta) where beta is a tuple of symbols. - # - # During execution of compute_first1, the result may be incomplete. - # Afterward (e.g., when called from compute_follow()), it will be complete. - # ------------------------------------------------------------------------- - def _first(self,beta): - - # We are computing First(x1,x2,x3,...,xn) - result = [ ] - for x in beta: - x_produces_empty = 0 - - # Add all the non- symbols of First[x] to the result. - for f in self.First[x]: - if f == '': - x_produces_empty = 1 - else: - if f not in result: result.append(f) - - if x_produces_empty: - # We have to consider the next x in beta, - # i.e. stay in the loop. - pass - else: - # We don't have to consider any further symbols in beta. - break - else: - # There was no 'break' from the loop, - # so x_produces_empty was true for all x in beta, - # so beta produces empty as well. - result.append('') - - return result - - # ------------------------------------------------------------------------- - # compute_first() - # - # Compute the value of FIRST1(X) for all symbols - # ------------------------------------------------------------------------- - def compute_first(self): - if self.First: - return self.First - - # Terminals: - for t in self.Terminals: - self.First[t] = [t] - - self.First['$end'] = ['$end'] - - # Nonterminals: - - # Initialize to the empty set: - for n in self.Nonterminals: - self.First[n] = [] - - # Then propagate symbols until no change: - while 1: - some_change = 0 - for n in self.Nonterminals: - for p in self.Prodnames[n]: - for f in self._first(p.prod): - if f not in self.First[n]: - self.First[n].append( f ) - some_change = 1 - if not some_change: - break - - return self.First - - # --------------------------------------------------------------------- - # compute_follow() - # - # Computes all of the follow sets for every non-terminal symbol. The - # follow set is the set of all symbols that might follow a given - # non-terminal. See the Dragon book, 2nd Ed. p. 189. - # --------------------------------------------------------------------- - def compute_follow(self,start=None): - # If already computed, return the result - if self.Follow: - return self.Follow - - # If first sets not computed yet, do that first. - if not self.First: - self.compute_first() - - # Add '$end' to the follow list of the start symbol - for k in self.Nonterminals: - self.Follow[k] = [ ] - - if not start: - start = self.Productions[1].name - - self.Follow[start] = [ '$end' ] - - while 1: - didadd = 0 - for p in self.Productions[1:]: - # Here is the production set - for i in range(len(p.prod)): - B = p.prod[i] - if B in self.Nonterminals: - # Okay. We got a non-terminal in a production - fst = self._first(p.prod[i+1:]) - hasempty = 0 - for f in fst: - if f != '' and f not in self.Follow[B]: - self.Follow[B].append(f) - didadd = 1 - if f == '': - hasempty = 1 - if hasempty or i == (len(p.prod)-1): - # Add elements of follow(a) to follow(b) - for f in self.Follow[p.name]: - if f not in self.Follow[B]: - self.Follow[B].append(f) - didadd = 1 - if not didadd: break - return self.Follow - - - # ----------------------------------------------------------------------------- - # build_lritems() - # - # This function walks the list of productions and builds a complete set of the - # LR items. The LR items are stored in two ways: First, they are uniquely - # numbered and placed in the list _lritems. Second, a linked list of LR items - # is built for each production. For example: - # - # E -> E PLUS E - # - # Creates the list - # - # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] - # ----------------------------------------------------------------------------- - - def build_lritems(self): - for p in self.Productions: - lastlri = p - i = 0 - lr_items = [] - while 1: - if i > len(p): - lri = None - else: - lri = LRItem(p,i) - # Precompute the list of productions immediately following - try: - lri.lr_after = self.Prodnames[lri.prod[i+1]] - except (IndexError,KeyError): - lri.lr_after = [] - try: - lri.lr_before = lri.prod[i-1] - except IndexError: - lri.lr_before = None - - lastlri.lr_next = lri - if not lri: break - lr_items.append(lri) - lastlri = lri - i += 1 - p.lr_items = lr_items - -# ----------------------------------------------------------------------------- -# == Class LRTable == -# -# This basic class represents a basic table of LR parsing information. -# Methods for generating the tables are not defined here. They are defined -# in the derived class LRGeneratedTable. -# ----------------------------------------------------------------------------- - -class VersionError(YaccError): pass - -class LRTable(object): - def __init__(self): - self.lr_action = None - self.lr_goto = None - self.lr_productions = None - self.lr_method = None - - def read_table(self,module): - if isinstance(module,types.ModuleType): - parsetab = module - else: - if sys.version_info[0] < 3: - exec("import %s as parsetab" % module) - else: - env = { } - exec("import %s as parsetab" % module, env, env) - parsetab = env['parsetab'] - - if parsetab._tabversion != __tabversion__: - raise VersionError("yacc table file version is out of date") - - self.lr_action = parsetab._lr_action - self.lr_goto = parsetab._lr_goto - - self.lr_productions = [] - for p in parsetab._lr_productions: - self.lr_productions.append(MiniProduction(*p)) - - self.lr_method = parsetab._lr_method - return parsetab._lr_signature - - def read_pickle(self,filename): - try: - import cPickle as pickle - except ImportError: - import pickle - - in_f = open(filename,"rb") - - tabversion = pickle.load(in_f) - if tabversion != __tabversion__: - raise VersionError("yacc table file version is out of date") - self.lr_method = pickle.load(in_f) - signature = pickle.load(in_f) - self.lr_action = pickle.load(in_f) - self.lr_goto = pickle.load(in_f) - productions = pickle.load(in_f) - - self.lr_productions = [] - for p in productions: - self.lr_productions.append(MiniProduction(*p)) - - in_f.close() - return signature - - # Bind all production function names to callable objects in pdict - def bind_callables(self,pdict): - for p in self.lr_productions: - p.bind(pdict) - -# ----------------------------------------------------------------------------- -# === LR Generator === -# -# The following classes and functions are used to generate LR parsing tables on -# a grammar. -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# digraph() -# traverse() -# -# The following two functions are used to compute set valued functions -# of the form: -# -# F(x) = F'(x) U U{F(y) | x R y} -# -# This is used to compute the values of Read() sets as well as FOLLOW sets -# in LALR(1) generation. -# -# Inputs: X - An input set -# R - A relation -# FP - Set-valued function -# ------------------------------------------------------------------------------ - -def digraph(X,R,FP): - N = { } - for x in X: - N[x] = 0 - stack = [] - F = { } - for x in X: - if N[x] == 0: traverse(x,N,stack,F,X,R,FP) - return F - -def traverse(x,N,stack,F,X,R,FP): - stack.append(x) - d = len(stack) - N[x] = d - F[x] = FP(x) # F(X) <- F'(x) - - rel = R(x) # Get y's related to x - for y in rel: - if N[y] == 0: - traverse(y,N,stack,F,X,R,FP) - N[x] = min(N[x],N[y]) - for a in F.get(y,[]): - if a not in F[x]: F[x].append(a) - if N[x] == d: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] - element = stack.pop() - while element != x: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] - element = stack.pop() - -class LALRError(YaccError): pass - -# ----------------------------------------------------------------------------- -# == LRGeneratedTable == -# -# This class implements the LR table generation algorithm. There are no -# public methods except for write() -# ----------------------------------------------------------------------------- - -class LRGeneratedTable(LRTable): - def __init__(self,grammar,method='LALR',log=None): - if method not in ['SLR','LALR']: - raise LALRError("Unsupported method %s" % method) - - self.grammar = grammar - self.lr_method = method - - # Set up the logger - if not log: - log = NullLogger() - self.log = log - - # Internal attributes - self.lr_action = {} # Action table - self.lr_goto = {} # Goto table - self.lr_productions = grammar.Productions # Copy of grammar Production array - self.lr_goto_cache = {} # Cache of computed gotos - self.lr0_cidhash = {} # Cache of closures - - self._add_count = 0 # Internal counter used to detect cycles - - # Diagonistic information filled in by the table generator - self.sr_conflict = 0 - self.rr_conflict = 0 - self.conflicts = [] # List of conflicts - - self.sr_conflicts = [] - self.rr_conflicts = [] - - # Build the tables - self.grammar.build_lritems() - self.grammar.compute_first() - self.grammar.compute_follow() - self.lr_parse_table() - - # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. - - def lr0_closure(self,I): - self._add_count += 1 - - # Add everything in I to J - J = I[:] - didadd = 1 - while didadd: - didadd = 0 - for j in J: - for x in j.lr_after: - if getattr(x,"lr0_added",0) == self._add_count: continue - # Add B --> .G to J - J.append(x.lr_next) - x.lr0_added = self._add_count - didadd = 1 - - return J - - # Compute the LR(0) goto function goto(I,X) where I is a set - # of LR(0) items and X is a grammar symbol. This function is written - # in a way that guarantees uniqueness of the generated goto sets - # (i.e. the same goto set will never be returned as two different Python - # objects). With uniqueness, we can later do fast set comparisons using - # id(obj) instead of element-wise comparison. - - def lr0_goto(self,I,x): - # First we look for a previously cached entry - g = self.lr_goto_cache.get((id(I),x),None) - if g: return g - - # Now we generate the goto set in a way that guarantees uniqueness - # of the result - - s = self.lr_goto_cache.get(x,None) - if not s: - s = { } - self.lr_goto_cache[x] = s - - gs = [ ] - for p in I: - n = p.lr_next - if n and n.lr_before == x: - s1 = s.get(id(n),None) - if not s1: - s1 = { } - s[id(n)] = s1 - gs.append(n) - s = s1 - g = s.get('$end',None) - if not g: - if gs: - g = self.lr0_closure(gs) - s['$end'] = g - else: - s['$end'] = gs - self.lr_goto_cache[(id(I),x)] = g - return g - - # Compute the LR(0) sets of item function - def lr0_items(self): - - C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ] - i = 0 - for I in C: - self.lr0_cidhash[id(I)] = i - i += 1 - - # Loop over the items in C and each grammar symbols - i = 0 - while i < len(C): - I = C[i] - i += 1 - - # Collect all of the symbols that could possibly be in the goto(I,X) sets - asyms = { } - for ii in I: - for s in ii.usyms: - asyms[s] = None - - for x in asyms: - g = self.lr0_goto(I,x) - if not g: continue - if id(g) in self.lr0_cidhash: continue - self.lr0_cidhash[id(g)] = len(C) - C.append(g) - - return C - - # ----------------------------------------------------------------------------- - # ==== LALR(1) Parsing ==== - # - # LALR(1) parsing is almost exactly the same as SLR except that instead of - # relying upon Follow() sets when performing reductions, a more selective - # lookahead set that incorporates the state of the LR(0) machine is utilized. - # Thus, we mainly just have to focus on calculating the lookahead sets. - # - # The method used here is due to DeRemer and Pennelo (1982). - # - # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) - # Lookahead Sets", ACM Transactions on Programming Languages and Systems, - # Vol. 4, No. 4, Oct. 1982, pp. 615-649 - # - # Further details can also be found in: - # - # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", - # McGraw-Hill Book Company, (1985). - # - # ----------------------------------------------------------------------------- - - # ----------------------------------------------------------------------------- - # compute_nullable_nonterminals() - # - # Creates a dictionary containing all of the non-terminals that might produce - # an empty production. - # ----------------------------------------------------------------------------- - - def compute_nullable_nonterminals(self): - nullable = {} - num_nullable = 0 - while 1: - for p in self.grammar.Productions[1:]: - if p.len == 0: - nullable[p.name] = 1 - continue - for t in p.prod: - if not t in nullable: break - else: - nullable[p.name] = 1 - if len(nullable) == num_nullable: break - num_nullable = len(nullable) - return nullable - - # ----------------------------------------------------------------------------- - # find_nonterminal_trans(C) - # - # Given a set of LR(0) items, this functions finds all of the non-terminal - # transitions. These are transitions in which a dot appears immediately before - # a non-terminal. Returns a list of tuples of the form (state,N) where state - # is the state number and N is the nonterminal symbol. - # - # The input C is the set of LR(0) items. - # ----------------------------------------------------------------------------- - - def find_nonterminal_transitions(self,C): - trans = [] - for state in range(len(C)): - for p in C[state]: - if p.lr_index < p.len - 1: - t = (state,p.prod[p.lr_index+1]) - if t[1] in self.grammar.Nonterminals: - if t not in trans: trans.append(t) - state = state + 1 - return trans - - # ----------------------------------------------------------------------------- - # dr_relation() - # - # Computes the DR(p,A) relationships for non-terminal transitions. The input - # is a tuple (state,N) where state is a number and N is a nonterminal symbol. - # - # Returns a list of terminals. - # ----------------------------------------------------------------------------- - - def dr_relation(self,C,trans,nullable): - dr_set = { } - state,N = trans - terms = [] - - g = self.lr0_goto(C[state],N) - for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index+1] - if a in self.grammar.Terminals: - if a not in terms: terms.append(a) - - # This extra bit is to handle the start state - if state == 0 and N == self.grammar.Productions[0].prod[0]: - terms.append('$end') - - return terms - - # ----------------------------------------------------------------------------- - # reads_relation() - # - # Computes the READS() relation (p,A) READS (t,C). - # ----------------------------------------------------------------------------- - - def reads_relation(self,C, trans, empty): - # Look for empty transitions - rel = [] - state, N = trans - - g = self.lr0_goto(C[state],N) - j = self.lr0_cidhash.get(id(g),-1) - for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index + 1] - if a in empty: - rel.append((j,a)) - - return rel - - # ----------------------------------------------------------------------------- - # compute_lookback_includes() - # - # Determines the lookback and includes relations - # - # LOOKBACK: - # - # This relation is determined by running the LR(0) state machine forward. - # For example, starting with a production "N : . A B C", we run it forward - # to obtain "N : A B C ." We then build a relationship between this final - # state and the starting state. These relationships are stored in a dictionary - # lookdict. - # - # INCLUDES: - # - # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). - # - # This relation is used to determine non-terminal transitions that occur - # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) - # if the following holds: - # - # B -> LAT, where T -> epsilon and p' -L-> p - # - # L is essentially a prefix (which may be empty), T is a suffix that must be - # able to derive an empty string. State p' must lead to state p with the string L. - # - # ----------------------------------------------------------------------------- - - def compute_lookback_includes(self,C,trans,nullable): - - lookdict = {} # Dictionary of lookback relations - includedict = {} # Dictionary of include relations - - # Make a dictionary of non-terminal transitions - dtrans = {} - for t in trans: - dtrans[t] = 1 - - # Loop over all transitions and compute lookbacks and includes - for state,N in trans: - lookb = [] - includes = [] - for p in C[state]: - if p.name != N: continue - - # Okay, we have a name match. We now follow the production all the way - # through the state machine until we get the . on the right hand side - - lr_index = p.lr_index - j = state - while lr_index < p.len - 1: - lr_index = lr_index + 1 - t = p.prod[lr_index] - - # Check to see if this symbol and state are a non-terminal transition - if (j,t) in dtrans: - # Yes. Okay, there is some chance that this is an includes relation - # the only way to know for certain is whether the rest of the - # production derives empty - - li = lr_index + 1 - while li < p.len: - if p.prod[li] in self.grammar.Terminals: break # No forget it - if not p.prod[li] in nullable: break - li = li + 1 - else: - # Appears to be a relation between (j,t) and (state,N) - includes.append((j,t)) - - g = self.lr0_goto(C[j],t) # Go to next set - j = self.lr0_cidhash.get(id(g),-1) # Go to next state - - # When we get here, j is the final state, now we have to locate the production - for r in C[j]: - if r.name != p.name: continue - if r.len != p.len: continue - i = 0 - # This look is comparing a production ". A B C" with "A B C ." - while i < r.lr_index: - if r.prod[i] != p.prod[i+1]: break - i = i + 1 - else: - lookb.append((j,r)) - for i in includes: - if not i in includedict: includedict[i] = [] - includedict[i].append((state,N)) - lookdict[(state,N)] = lookb - - return lookdict,includedict - - # ----------------------------------------------------------------------------- - # compute_read_sets() - # - # Given a set of LR(0) items, this function computes the read sets. - # - # Inputs: C = Set of LR(0) items - # ntrans = Set of nonterminal transitions - # nullable = Set of empty transitions - # - # Returns a set containing the read sets - # ----------------------------------------------------------------------------- - - def compute_read_sets(self,C, ntrans, nullable): - FP = lambda x: self.dr_relation(C,x,nullable) - R = lambda x: self.reads_relation(C,x,nullable) - F = digraph(ntrans,R,FP) - return F - - # ----------------------------------------------------------------------------- - # compute_follow_sets() - # - # Given a set of LR(0) items, a set of non-terminal transitions, a readset, - # and an include set, this function computes the follow sets - # - # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} - # - # Inputs: - # ntrans = Set of nonterminal transitions - # readsets = Readset (previously computed) - # inclsets = Include sets (previously computed) - # - # Returns a set containing the follow sets - # ----------------------------------------------------------------------------- - - def compute_follow_sets(self,ntrans,readsets,inclsets): - FP = lambda x: readsets[x] - R = lambda x: inclsets.get(x,[]) - F = digraph(ntrans,R,FP) - return F - - # ----------------------------------------------------------------------------- - # add_lookaheads() - # - # Attaches the lookahead symbols to grammar rules. - # - # Inputs: lookbacks - Set of lookback relations - # followset - Computed follow set - # - # This function directly attaches the lookaheads to productions contained - # in the lookbacks set - # ----------------------------------------------------------------------------- - - def add_lookaheads(self,lookbacks,followset): - for trans,lb in lookbacks.items(): - # Loop over productions in lookback - for state,p in lb: - if not state in p.lookaheads: - p.lookaheads[state] = [] - f = followset.get(trans,[]) - for a in f: - if a not in p.lookaheads[state]: p.lookaheads[state].append(a) - - # ----------------------------------------------------------------------------- - # add_lalr_lookaheads() - # - # This function does all of the work of adding lookahead information for use - # with LALR parsing - # ----------------------------------------------------------------------------- - - def add_lalr_lookaheads(self,C): - # Determine all of the nullable nonterminals - nullable = self.compute_nullable_nonterminals() - - # Find all non-terminal transitions - trans = self.find_nonterminal_transitions(C) - - # Compute read sets - readsets = self.compute_read_sets(C,trans,nullable) - - # Compute lookback/includes relations - lookd, included = self.compute_lookback_includes(C,trans,nullable) - - # Compute LALR FOLLOW sets - followsets = self.compute_follow_sets(trans,readsets,included) - - # Add all of the lookaheads - self.add_lookaheads(lookd,followsets) - - # ----------------------------------------------------------------------------- - # lr_parse_table() - # - # This function constructs the parse tables for SLR or LALR - # ----------------------------------------------------------------------------- - def lr_parse_table(self): - Productions = self.grammar.Productions - Precedence = self.grammar.Precedence - goto = self.lr_goto # Goto array - action = self.lr_action # Action array - log = self.log # Logger for output - - actionp = { } # Action production array (temporary) - - log.info("Parsing method: %s", self.lr_method) - - # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items - # This determines the number of states - - C = self.lr0_items() - - if self.lr_method == 'LALR': - self.add_lalr_lookaheads(C) - - # Build the parser table, state by state - st = 0 - for I in C: - # Loop over each production in I - actlist = [ ] # List of actions - st_action = { } - st_actionp = { } - st_goto = { } - log.info("") - log.info("state %d", st) - log.info("") - for p in I: - log.info(" (%d) %s", p.number, str(p)) - log.info("") - - for p in I: - if p.len == p.lr_index + 1: - if p.name == "S'": - # Start symbol. Accept! - st_action["$end"] = 0 - st_actionp["$end"] = p - else: - # We are at the end of a production. Reduce! - if self.lr_method == 'LALR': - laheads = p.lookaheads[st] - else: - laheads = self.grammar.Follow[p.name] - for a in laheads: - actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) - r = st_action.get(a,None) - if r is not None: - # Whoa. Have a shift/reduce or reduce/reduce conflict - if r > 0: - # Need to decide on shift or reduce here - # By default we favor shifting. Need to add - # some precedence rules here. - sprec,slevel = Productions[st_actionp[a].number].prec - rprec,rlevel = Precedence.get(a,('right',0)) - if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): - # We really need to reduce here. - st_action[a] = -p.number - st_actionp[a] = p - if not slevel and not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as reduce",a) - self.sr_conflicts.append((st,a,'reduce')) - Productions[p.number].reduced += 1 - elif (slevel == rlevel) and (rprec == 'nonassoc'): - st_action[a] = None - else: - # Hmmm. Guess we'll keep the shift - if not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as shift",a) - self.sr_conflicts.append((st,a,'shift')) - elif r < 0: - # Reduce/reduce conflict. In this case, we favor the rule - # that was defined first in the grammar file - oldp = Productions[-r] - pp = Productions[p.number] - if oldp.line > pp.line: - st_action[a] = -p.number - st_actionp[a] = p - chosenp,rejectp = pp,oldp - Productions[p.number].reduced += 1 - Productions[oldp.number].reduced -= 1 - else: - chosenp,rejectp = oldp,pp - self.rr_conflicts.append((st,chosenp,rejectp)) - log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a]) - else: - raise LALRError("Unknown conflict in state %d" % st) - else: - st_action[a] = -p.number - st_actionp[a] = p - Productions[p.number].reduced += 1 - else: - i = p.lr_index - a = p.prod[i+1] # Get symbol right after the "." - if a in self.grammar.Terminals: - g = self.lr0_goto(I,a) - j = self.lr0_cidhash.get(id(g),-1) - if j >= 0: - # We are in a shift state - actlist.append((a,p,"shift and go to state %d" % j)) - r = st_action.get(a,None) - if r is not None: - # Whoa have a shift/reduce or shift/shift conflict - if r > 0: - if r != j: - raise LALRError("Shift/shift conflict in state %d" % st) - elif r < 0: - # Do a precedence check. - # - if precedence of reduce rule is higher, we reduce. - # - if precedence of reduce is same and left assoc, we reduce. - # - otherwise we shift - rprec,rlevel = Productions[st_actionp[a].number].prec - sprec,slevel = Precedence.get(a,('right',0)) - if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): - # We decide to shift here... highest precedence to shift - Productions[st_actionp[a].number].reduced -= 1 - st_action[a] = j - st_actionp[a] = p - if not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as shift",a) - self.sr_conflicts.append((st,a,'shift')) - elif (slevel == rlevel) and (rprec == 'nonassoc'): - st_action[a] = None - else: - # Hmmm. Guess we'll keep the reduce - if not slevel and not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as reduce",a) - self.sr_conflicts.append((st,a,'reduce')) - - else: - raise LALRError("Unknown conflict in state %d" % st) - else: - st_action[a] = j - st_actionp[a] = p - - # Print the actions associated with each terminal - _actprint = { } - for a,p,m in actlist: - if a in st_action: - if p is st_actionp[a]: - log.info(" %-15s %s",a,m) - _actprint[(a,m)] = 1 - log.info("") - # Print the actions that were not used. (debugging) - not_used = 0 - for a,p,m in actlist: - if a in st_action: - if p is not st_actionp[a]: - if not (a,m) in _actprint: - log.debug(" ! %-15s [ %s ]",a,m) - not_used = 1 - _actprint[(a,m)] = 1 - if not_used: - log.debug("") - - # Construct the goto table for this state - - nkeys = { } - for ii in I: - for s in ii.usyms: - if s in self.grammar.Nonterminals: - nkeys[s] = None - for n in nkeys: - g = self.lr0_goto(I,n) - j = self.lr0_cidhash.get(id(g),-1) - if j >= 0: - st_goto[n] = j - log.info(" %-30s shift and go to state %d",n,j) - - action[st] = st_action - actionp[st] = st_actionp - goto[st] = st_goto - st += 1 - - - # ----------------------------------------------------------------------------- - # write() - # - # This function writes the LR parsing tables to a file - # ----------------------------------------------------------------------------- - - def write_table(self,modulename,outputdir='',signature=""): - basemodulename = modulename.split(".")[-1] - filename = os.path.join(outputdir,basemodulename) + ".py" - try: - f = open(filename,"w") - - f.write(""" -# %s -# This file is automatically generated. Do not edit. -_tabversion = %r - -_lr_method = %r - -_lr_signature = %r - """ % (filename, __tabversion__, self.lr_method, signature)) - - # Change smaller to 0 to go back to original tables - smaller = 1 - - # Factor out names to try and make smaller - if smaller: - items = { } - - for s,nd in self.lr_action.items(): - for name,v in nd.items(): - i = items.get(name) - if not i: - i = ([],[]) - items[name] = i - i[0].append(s) - i[1].append(v) - - f.write("\n_lr_action_items = {") - for k,v in items.items(): - f.write("%r:([" % k) - for i in v[0]: - f.write("%r," % i) - f.write("],[") - for i in v[1]: - f.write("%r," % i) - - f.write("]),") - f.write("}\n") - - f.write(""" -_lr_action = { } -for _k, _v in _lr_action_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_action: _lr_action[_x] = { } - _lr_action[_x][_k] = _y -del _lr_action_items -""") - - else: - f.write("\n_lr_action = { "); - for k,v in self.lr_action.items(): - f.write("(%r,%r):%r," % (k[0],k[1],v)) - f.write("}\n"); - - if smaller: - # Factor out names to try and make smaller - items = { } - - for s,nd in self.lr_goto.items(): - for name,v in nd.items(): - i = items.get(name) - if not i: - i = ([],[]) - items[name] = i - i[0].append(s) - i[1].append(v) - - f.write("\n_lr_goto_items = {") - for k,v in items.items(): - f.write("%r:([" % k) - for i in v[0]: - f.write("%r," % i) - f.write("],[") - for i in v[1]: - f.write("%r," % i) - - f.write("]),") - f.write("}\n") - - f.write(""" -_lr_goto = { } -for _k, _v in _lr_goto_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_goto: _lr_goto[_x] = { } - _lr_goto[_x][_k] = _y -del _lr_goto_items -""") - else: - f.write("\n_lr_goto = { "); - for k,v in self.lr_goto.items(): - f.write("(%r,%r):%r," % (k[0],k[1],v)) - f.write("}\n"); - - # Write production table - f.write("_lr_productions = [\n") - for p in self.lr_productions: - if p.func: - f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line)) - else: - f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len)) - f.write("]\n") - f.close() - - except IOError: - e = sys.exc_info()[1] - sys.stderr.write("Unable to create '%s'\n" % filename) - sys.stderr.write(str(e)+"\n") - return - - - # ----------------------------------------------------------------------------- - # pickle_table() - # - # This function pickles the LR parsing tables to a supplied file object - # ----------------------------------------------------------------------------- - - def pickle_table(self,filename,signature=""): - try: - import cPickle as pickle - except ImportError: - import pickle - outf = open(filename,"wb") - pickle.dump(__tabversion__,outf,pickle_protocol) - pickle.dump(self.lr_method,outf,pickle_protocol) - pickle.dump(signature,outf,pickle_protocol) - pickle.dump(self.lr_action,outf,pickle_protocol) - pickle.dump(self.lr_goto,outf,pickle_protocol) - - outp = [] - for p in self.lr_productions: - if p.func: - outp.append((p.str,p.name, p.len, p.func,p.file,p.line)) - else: - outp.append((str(p),p.name,p.len,None,None,None)) - pickle.dump(outp,outf,pickle_protocol) - outf.close() - -# ----------------------------------------------------------------------------- -# === INTROSPECTION === -# -# The following functions and classes are used to implement the PLY -# introspection features followed by the yacc() function itself. -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# get_caller_module_dict() -# -# This function returns a dictionary containing all of the symbols defined within -# a caller further down the call stack. This is used to get the environment -# associated with the yacc() call if none was provided. -# ----------------------------------------------------------------------------- - -def get_caller_module_dict(levels): - try: - raise RuntimeError - except RuntimeError: - e,b,t = sys.exc_info() - f = t.tb_frame - while levels > 0: - f = f.f_back - levels -= 1 - ldict = f.f_globals.copy() - if f.f_globals != f.f_locals: - ldict.update(f.f_locals) - - return ldict - -# ----------------------------------------------------------------------------- -# parse_grammar() -# -# This takes a raw grammar rule string and parses it into production data -# ----------------------------------------------------------------------------- -def parse_grammar(doc,file,line): - grammar = [] - # Split the doc string into lines - pstrings = doc.splitlines() - lastp = None - dline = line - for ps in pstrings: - dline += 1 - p = ps.split() - if not p: continue - try: - if p[0] == '|': - # This is a continuation of a previous rule - if not lastp: - raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline)) - prodname = lastp - syms = p[1:] - else: - prodname = p[0] - lastp = prodname - syms = p[2:] - assign = p[1] - if assign != ':' and assign != '::=': - raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline)) - - grammar.append((file,dline,prodname,syms)) - except SyntaxError: - raise - except Exception: - raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip())) - - return grammar - -# ----------------------------------------------------------------------------- -# ParserReflect() -# -# This class represents information extracted for building a parser including -# start symbol, error function, tokens, precedence list, action functions, -# etc. -# ----------------------------------------------------------------------------- -class ParserReflect(object): - def __init__(self,pdict,log=None): - self.pdict = pdict - self.start = None - self.error_func = None - self.tokens = None - self.files = {} - self.grammar = [] - self.error = 0 - - if log is None: - self.log = PlyLogger(sys.stderr) - else: - self.log = log - - # Get all of the basic information - def get_all(self): - self.get_start() - self.get_error_func() - self.get_tokens() - self.get_precedence() - self.get_pfunctions() - - # Validate all of the information - def validate_all(self): - self.validate_start() - self.validate_error_func() - self.validate_tokens() - self.validate_precedence() - self.validate_pfunctions() - self.validate_files() - return self.error - - # Compute a signature over the grammar - def signature(self): - try: - from hashlib import md5 - except ImportError: - from md5 import md5 - try: - sig = md5() - if self.start: - sig.update(self.start.encode('latin-1')) - if self.prec: - sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1')) - if self.tokens: - sig.update(" ".join(self.tokens).encode('latin-1')) - for f in self.pfuncs: - if f[3]: - sig.update(f[3].encode('latin-1')) - except (TypeError,ValueError): - pass - return sig.digest() - - # ----------------------------------------------------------------------------- - # validate_file() - # - # This method checks to see if there are duplicated p_rulename() functions - # in the parser module file. Without this function, it is really easy for - # users to make mistakes by cutting and pasting code fragments (and it's a real - # bugger to try and figure out why the resulting parser doesn't work). Therefore, - # we just do a little regular expression pattern matching of def statements - # to try and detect duplicates. - # ----------------------------------------------------------------------------- - - def validate_files(self): - # Match def p_funcname( - fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') - - for filename in self.files.keys(): - base,ext = os.path.splitext(filename) - if ext != '.py': return 1 # No idea. Assume it's okay. - - try: - f = open(filename) - lines = f.readlines() - f.close() - except IOError: - continue - - counthash = { } - for linen,l in enumerate(lines): - linen += 1 - m = fre.match(l) - if m: - name = m.group(1) - prev = counthash.get(name) - if not prev: - counthash[name] = linen - else: - self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev) - - # Get the start symbol - def get_start(self): - self.start = self.pdict.get('start') - - # Validate the start symbol - def validate_start(self): - if self.start is not None: - if not isinstance(self.start,str): - self.log.error("'start' must be a string") - - # Look for error handler - def get_error_func(self): - self.error_func = self.pdict.get('p_error') - - # Validate the error function - def validate_error_func(self): - if self.error_func: - if isinstance(self.error_func,types.FunctionType): - ismethod = 0 - elif isinstance(self.error_func, types.MethodType): - ismethod = 1 - else: - self.log.error("'p_error' defined, but is not a function or method") - self.error = 1 - return - - eline = func_code(self.error_func).co_firstlineno - efile = func_code(self.error_func).co_filename - self.files[efile] = 1 - - if (func_code(self.error_func).co_argcount != 1+ismethod): - self.log.error("%s:%d: p_error() requires 1 argument",efile,eline) - self.error = 1 - - # Get the tokens map - def get_tokens(self): - tokens = self.pdict.get("tokens",None) - if not tokens: - self.log.error("No token list is defined") - self.error = 1 - return - - if not isinstance(tokens,(list, tuple)): - self.log.error("tokens must be a list or tuple") - self.error = 1 - return - - if not tokens: - self.log.error("tokens is empty") - self.error = 1 - return - - self.tokens = tokens - - # Validate the tokens - def validate_tokens(self): - # Validate the tokens. - if 'error' in self.tokens: - self.log.error("Illegal token name 'error'. Is a reserved word") - self.error = 1 - return - - terminals = {} - for n in self.tokens: - if n in terminals: - self.log.warning("Token '%s' multiply defined", n) - terminals[n] = 1 - - # Get the precedence map (if any) - def get_precedence(self): - self.prec = self.pdict.get("precedence",None) - - # Validate and parse the precedence map - def validate_precedence(self): - preclist = [] - if self.prec: - if not isinstance(self.prec,(list,tuple)): - self.log.error("precedence must be a list or tuple") - self.error = 1 - return - for level,p in enumerate(self.prec): - if not isinstance(p,(list,tuple)): - self.log.error("Bad precedence table") - self.error = 1 - return - - if len(p) < 2: - self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p) - self.error = 1 - return - assoc = p[0] - if not isinstance(assoc,str): - self.log.error("precedence associativity must be a string") - self.error = 1 - return - for term in p[1:]: - if not isinstance(term,str): - self.log.error("precedence items must be strings") - self.error = 1 - return - preclist.append((term,assoc,level+1)) - self.preclist = preclist - - # Get all p_functions from the grammar - def get_pfunctions(self): - p_functions = [] - for name, item in self.pdict.items(): - if name[:2] != 'p_': continue - if name == 'p_error': continue - if isinstance(item,(types.FunctionType,types.MethodType)): - line = func_code(item).co_firstlineno - file = func_code(item).co_filename - p_functions.append((line,file,name,item.__doc__)) - - # Sort all of the actions by line number - p_functions.sort() - self.pfuncs = p_functions - - - # Validate all of the p_functions - def validate_pfunctions(self): - grammar = [] - # Check for non-empty symbols - if len(self.pfuncs) == 0: - self.log.error("no rules of the form p_rulename are defined") - self.error = 1 - return - - for line, file, name, doc in self.pfuncs: - func = self.pdict[name] - if isinstance(func, types.MethodType): - reqargs = 2 - else: - reqargs = 1 - if func_code(func).co_argcount > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__) - self.error = 1 - elif func_code(func).co_argcount < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__) - self.error = 1 - elif not func.__doc__: - self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__) - else: - try: - parsed_g = parse_grammar(doc,file,line) - for g in parsed_g: - grammar.append((name, g)) - except SyntaxError: - e = sys.exc_info()[1] - self.log.error(str(e)) - self.error = 1 - - # Looks like a valid grammar rule - # Mark the file in which defined. - self.files[file] = 1 - - # Secondary validation step that looks for p_ definitions that are not functions - # or functions that look like they might be grammar rules. - - for n,v in self.pdict.items(): - if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue - if n[0:2] == 't_': continue - if n[0:2] == 'p_' and n != 'p_error': - self.log.warning("'%s' not defined as a function", n) - if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or - (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)): - try: - doc = v.__doc__.split(" ") - if doc[1] == ':': - self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix", - func_code(v).co_filename, func_code(v).co_firstlineno,n) - except Exception: - pass - - self.grammar = grammar - -# ----------------------------------------------------------------------------- -# yacc(module) -# -# Build a parser -# ----------------------------------------------------------------------------- - -def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, - check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='', - debuglog=None, errorlog = None, picklefile=None): - - global parse # Reference to the parsing method of the last built parser - - # If pickling is enabled, table files are not created - - if picklefile: - write_tables = 0 - - if errorlog is None: - errorlog = PlyLogger(sys.stderr) - - # Get the module dictionary used for the parser - if module: - _items = [(k,getattr(module,k)) for k in dir(module)] - pdict = dict(_items) - else: - pdict = get_caller_module_dict(2) - - # Collect parser information from the dictionary - pinfo = ParserReflect(pdict,log=errorlog) - pinfo.get_all() - - if pinfo.error: - raise YaccError("Unable to build parser") - - # Check signature against table files (if any) - signature = pinfo.signature() - - # Read the tables - try: - lr = LRTable() - if picklefile: - read_signature = lr.read_pickle(picklefile) - else: - read_signature = lr.read_table(tabmodule) - if optimize or (read_signature == signature): - try: - lr.bind_callables(pinfo.pdict) - parser = LRParser(lr,pinfo.error_func) - parse = parser.parse - return parser - except Exception: - e = sys.exc_info()[1] - errorlog.warning("There was a problem loading the table file: %s", repr(e)) - except VersionError: - e = sys.exc_info() - errorlog.warning(str(e)) - except Exception: - pass - - if debuglog is None: - if debug: - debuglog = PlyLogger(open(debugfile,"w")) - else: - debuglog = NullLogger() - - debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__) - - - errors = 0 - - # Validate the parser information - if pinfo.validate_all(): - raise YaccError("Unable to build parser") - - if not pinfo.error_func: - errorlog.warning("no p_error() function is defined") - - # Create a grammar object - grammar = Grammar(pinfo.tokens) - - # Set precedence level for terminals - for term, assoc, level in pinfo.preclist: - try: - grammar.set_precedence(term,assoc,level) - except GrammarError: - e = sys.exc_info()[1] - errorlog.warning("%s",str(e)) - - # Add productions to the grammar - for funcname, gram in pinfo.grammar: - file, line, prodname, syms = gram - try: - grammar.add_production(prodname,syms,funcname,file,line) - except GrammarError: - e = sys.exc_info()[1] - errorlog.error("%s",str(e)) - errors = 1 - - # Set the grammar start symbols - try: - if start is None: - grammar.set_start(pinfo.start) - else: - grammar.set_start(start) - except GrammarError: - e = sys.exc_info()[1] - errorlog.error(str(e)) - errors = 1 - - if errors: - raise YaccError("Unable to build parser") - - # Verify the grammar structure - undefined_symbols = grammar.undefined_symbols() - for sym, prod in undefined_symbols: - errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym) - errors = 1 - - unused_terminals = grammar.unused_terminals() - if unused_terminals: - debuglog.info("") - debuglog.info("Unused terminals:") - debuglog.info("") - for term in unused_terminals: - errorlog.warning("Token '%s' defined, but not used", term) - debuglog.info(" %s", term) - - # Print out all productions to the debug log - if debug: - debuglog.info("") - debuglog.info("Grammar") - debuglog.info("") - for n,p in enumerate(grammar.Productions): - debuglog.info("Rule %-5d %s", n, p) - - # Find unused non-terminals - unused_rules = grammar.unused_rules() - for prod in unused_rules: - errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name) - - if len(unused_terminals) == 1: - errorlog.warning("There is 1 unused token") - if len(unused_terminals) > 1: - errorlog.warning("There are %d unused tokens", len(unused_terminals)) - - if len(unused_rules) == 1: - errorlog.warning("There is 1 unused rule") - if len(unused_rules) > 1: - errorlog.warning("There are %d unused rules", len(unused_rules)) - - if debug: - debuglog.info("") - debuglog.info("Terminals, with rules where they appear") - debuglog.info("") - terms = list(grammar.Terminals) - terms.sort() - for term in terms: - debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]])) - - debuglog.info("") - debuglog.info("Nonterminals, with rules where they appear") - debuglog.info("") - nonterms = list(grammar.Nonterminals) - nonterms.sort() - for nonterm in nonterms: - debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]])) - debuglog.info("") - - if check_recursion: - unreachable = grammar.find_unreachable() - for u in unreachable: - errorlog.warning("Symbol '%s' is unreachable",u) - - infinite = grammar.infinite_cycles() - for inf in infinite: - errorlog.error("Infinite recursion detected for symbol '%s'", inf) - errors = 1 - - unused_prec = grammar.unused_precedence() - for term, assoc in unused_prec: - errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term) - errors = 1 - - if errors: - raise YaccError("Unable to build parser") - - # Run the LRGeneratedTable on the grammar - if debug: - errorlog.debug("Generating %s tables", method) - - lr = LRGeneratedTable(grammar,method,debuglog) - - if debug: - num_sr = len(lr.sr_conflicts) - - # Report shift/reduce and reduce/reduce conflicts - if num_sr == 1: - errorlog.warning("1 shift/reduce conflict") - elif num_sr > 1: - errorlog.warning("%d shift/reduce conflicts", num_sr) - - num_rr = len(lr.rr_conflicts) - if num_rr == 1: - errorlog.warning("1 reduce/reduce conflict") - elif num_rr > 1: - errorlog.warning("%d reduce/reduce conflicts", num_rr) - - # Write out conflicts to the output file - if debug and (lr.sr_conflicts or lr.rr_conflicts): - debuglog.warning("") - debuglog.warning("Conflicts:") - debuglog.warning("") - - for state, tok, resolution in lr.sr_conflicts: - debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution) - - already_reported = {} - for state, rule, rejected in lr.rr_conflicts: - if (state,id(rule),id(rejected)) in already_reported: - continue - debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) - debuglog.warning("rejected rule (%s) in state %d", rejected,state) - errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) - errorlog.warning("rejected rule (%s) in state %d", rejected, state) - already_reported[state,id(rule),id(rejected)] = 1 - - warned_never = [] - for state, rule, rejected in lr.rr_conflicts: - if not rejected.reduced and (rejected not in warned_never): - debuglog.warning("Rule (%s) is never reduced", rejected) - errorlog.warning("Rule (%s) is never reduced", rejected) - warned_never.append(rejected) - - # Write the table file if requested - if write_tables: - lr.write_table(tabmodule,outputdir,signature) - - # Write a pickled version of the tables - if picklefile: - lr.pickle_table(picklefile,signature) - - # Build the parser - lr.bind_callables(pinfo.pdict) - parser = LRParser(lr,pinfo.error_func) - - parse = parser.parse - return parser diff --git a/ply/setup.py b/ply/setup.py deleted file mode 100755 index 408d5b8..0000000 --- a/ply/setup.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/python -try: - from setuptools import setup -except ImportError: - from distutils.core import setup - -setup(name = "ply", - description="Python Lex & Yacc", - long_description = """ -PLY is yet another implementation of lex and yacc for Python. Some notable -features include the fact that its implemented entirely in Python and it -uses LALR(1) parsing which is efficient and well suited for larger grammars. - -PLY provides most of the standard lex/yacc features including support for empty -productions, precedence rules, error recovery, and support for ambiguous grammars. - -PLY is extremely easy to use and provides very extensive error checking. -It is compatible with both Python 2 and Python 3. -""", - license="""BSD""", - version = "3.4", - author = "David Beazley", - author_email = "dave@dabeaz.com", - maintainer = "David Beazley", - maintainer_email = "dave@dabeaz.com", - url = "http://www.dabeaz.com/ply/", - packages = ['ply'], - classifiers = [ - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 2', - ] - ) diff --git a/ply/test/README b/ply/test/README deleted file mode 100644 index dc74ba3..0000000 --- a/ply/test/README +++ /dev/null @@ -1,7 +0,0 @@ -This directory mostly contains tests for various types of error -conditions. To run: - - $ python testlex.py . - $ python testyacc.py . - -The script 'cleanup.sh' cleans up this directory to its original state. diff --git a/ply/test/calclex.py b/ply/test/calclex.py deleted file mode 100644 index 67d245f..0000000 --- a/ply/test/calclex.py +++ /dev/null @@ -1,49 +0,0 @@ -# ----------------------------------------------------------------------------- -# calclex.py -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.lex as lex - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - return t - -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -lex.lex() - - - diff --git a/ply/test/cleanup.sh b/ply/test/cleanup.sh deleted file mode 100755 index 9374f2c..0000000 --- a/ply/test/cleanup.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh - -rm -rf *~ *.pyc *.pyo *.dif *.out __pycache__ - diff --git a/ply/test/lex_closure.py b/ply/test/lex_closure.py deleted file mode 100644 index 30ee679..0000000 --- a/ply/test/lex_closure.py +++ /dev/null @@ -1,54 +0,0 @@ -# ----------------------------------------------------------------------------- -# lex_closure.py -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.lex as lex - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -def make_calc(): - - # Tokens - - t_PLUS = r'\+' - t_MINUS = r'-' - t_TIMES = r'\*' - t_DIVIDE = r'/' - t_EQUALS = r'=' - t_LPAREN = r'\(' - t_RPAREN = r'\)' - t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - - def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - return t - - t_ignore = " \t" - - def t_newline(t): - r'\n+' - t.lineno += t.value.count("\n") - - def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - - # Build the lexer - return lex.lex() - -make_calc() -lex.runmain(data="3+4") - - - diff --git a/ply/test/lex_doc1.py b/ply/test/lex_doc1.py deleted file mode 100644 index 8a2bfcc..0000000 --- a/ply/test/lex_doc1.py +++ /dev/null @@ -1,26 +0,0 @@ -# lex_doc1.py -# -# Missing documentation string - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -def t_NUMBER(t): - pass - -def t_error(t): - pass - -lex.lex() - - diff --git a/ply/test/lex_dup1.py b/ply/test/lex_dup1.py deleted file mode 100644 index fd04cdb..0000000 --- a/ply/test/lex_dup1.py +++ /dev/null @@ -1,29 +0,0 @@ -# lex_dup1.py -# -# Duplicated rule specifiers - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -t_NUMBER = r'\d+' - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_dup2.py b/ply/test/lex_dup2.py deleted file mode 100644 index 870e5e7..0000000 --- a/ply/test/lex_dup2.py +++ /dev/null @@ -1,33 +0,0 @@ -# lex_dup2.py -# -# Duplicated rule specifiers - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -def t_NUMBER(t): - r'\d+' - pass - -def t_NUMBER(t): - r'\d+' - pass - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_dup3.py b/ply/test/lex_dup3.py deleted file mode 100644 index 94b5592..0000000 --- a/ply/test/lex_dup3.py +++ /dev/null @@ -1,31 +0,0 @@ -# lex_dup3.py -# -# Duplicated rule specifiers - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_NUMBER(t): - r'\d+' - pass - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_empty.py b/ply/test/lex_empty.py deleted file mode 100644 index e0368bf..0000000 --- a/ply/test/lex_empty.py +++ /dev/null @@ -1,20 +0,0 @@ -# lex_empty.py -# -# No rules defined - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - - - -lex.lex() - - diff --git a/ply/test/lex_error1.py b/ply/test/lex_error1.py deleted file mode 100644 index 4508a80..0000000 --- a/ply/test/lex_error1.py +++ /dev/null @@ -1,24 +0,0 @@ -# lex_error1.py -# -# Missing t_error() rule - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - - - -lex.lex() - - diff --git a/ply/test/lex_error2.py b/ply/test/lex_error2.py deleted file mode 100644 index 8040d39..0000000 --- a/ply/test/lex_error2.py +++ /dev/null @@ -1,26 +0,0 @@ -# lex_error2.py -# -# t_error defined, but not function - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -t_error = "foo" - - - -lex.lex() - - diff --git a/ply/test/lex_error3.py b/ply/test/lex_error3.py deleted file mode 100644 index 1feefb6..0000000 --- a/ply/test/lex_error3.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_error3.py -# -# t_error defined as function, but with wrong # args - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_error4.py b/ply/test/lex_error4.py deleted file mode 100644 index f4f48db..0000000 --- a/ply/test/lex_error4.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_error4.py -# -# t_error defined as function, but too many args - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(t,s): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_hedit.py b/ply/test/lex_hedit.py deleted file mode 100644 index 34f15a1..0000000 --- a/ply/test/lex_hedit.py +++ /dev/null @@ -1,47 +0,0 @@ -# ----------------------------------------------------------------------------- -# hedit.py -# -# Paring of Fortran H Edit descriptions (Contributed by Pearu Peterson) -# -# These tokens can't be easily tokenized because they are of the following -# form: -# -# nHc1...cn -# -# where n is a positive integer and c1 ... cn are characters. -# -# This example shows how to modify the state of the lexer to parse -# such tokens -# ----------------------------------------------------------------------------- -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = ( - 'H_EDIT_DESCRIPTOR', - ) - -# Tokens -t_ignore = " \t\n" - -def t_H_EDIT_DESCRIPTOR(t): - r"\d+H.*" # This grabs all of the remaining text - i = t.value.index('H') - n = eval(t.value[:i]) - - # Adjust the tokenizing position - t.lexer.lexpos -= len(t.value) - (i+1+n) - t.value = t.value[i+1:i+1+n] - return t - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -lex.lex() -lex.runmain(data="3Habc 10Habcdefghij 2Hxy") - - - diff --git a/ply/test/lex_ignore.py b/ply/test/lex_ignore.py deleted file mode 100644 index 6c43b4c..0000000 --- a/ply/test/lex_ignore.py +++ /dev/null @@ -1,31 +0,0 @@ -# lex_ignore.py -# -# Improperly specific ignore declaration - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_ignore(t): - ' \t' - pass - -def t_error(t): - pass - -import sys - -lex.lex() - - diff --git a/ply/test/lex_ignore2.py b/ply/test/lex_ignore2.py deleted file mode 100644 index f60987a..0000000 --- a/ply/test/lex_ignore2.py +++ /dev/null @@ -1,29 +0,0 @@ -# lex_ignore2.py -# -# ignore declaration as a raw string - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -t_ignore = r' \t' - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_literal1.py b/ply/test/lex_literal1.py deleted file mode 100644 index db389c3..0000000 --- a/ply/test/lex_literal1.py +++ /dev/null @@ -1,25 +0,0 @@ -# lex_literal1.py -# -# Bad literal specification - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "NUMBER", - ] - -literals = ["+","-","**"] - -def t_NUMBER(t): - r'\d+' - return t - -def t_error(t): - pass - -lex.lex() - - diff --git a/ply/test/lex_literal2.py b/ply/test/lex_literal2.py deleted file mode 100644 index b50b92c..0000000 --- a/ply/test/lex_literal2.py +++ /dev/null @@ -1,25 +0,0 @@ -# lex_literal2.py -# -# Bad literal specification - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "NUMBER", - ] - -literals = 23 - -def t_NUMBER(t): - r'\d+' - return t - -def t_error(t): - pass - -lex.lex() - - diff --git a/ply/test/lex_many_tokens.py b/ply/test/lex_many_tokens.py deleted file mode 100644 index 77ae12b..0000000 --- a/ply/test/lex_many_tokens.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_many_tokens.py -# -# Test lex's ability to handle a large number of tokens (beyond the -# 100-group limit of the re module) - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = ["TOK%d" % i for i in range(1000)] - -for tok in tokens: - if sys.version_info[0] < 3: - exec("t_%s = '%s:'" % (tok,tok)) - else: - exec("t_%s = '%s:'" % (tok,tok), globals()) - -t_ignore = " \t" - -def t_error(t): - pass - -lex.lex(optimize=1,lextab="manytab") -lex.runmain(data="TOK34: TOK143: TOK269: TOK372: TOK452: TOK561: TOK999:") - - diff --git a/ply/test/lex_module.py b/ply/test/lex_module.py deleted file mode 100644 index 8bdd3ed..0000000 --- a/ply/test/lex_module.py +++ /dev/null @@ -1,10 +0,0 @@ -# lex_module.py -# - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex -import lex_module_import -lex.lex(module=lex_module_import) -lex.runmain(data="3+4") diff --git a/ply/test/lex_module_import.py b/ply/test/lex_module_import.py deleted file mode 100644 index df42082..0000000 --- a/ply/test/lex_module_import.py +++ /dev/null @@ -1,42 +0,0 @@ -# ----------------------------------------------------------------------------- -# lex_module_import.py -# -# A lexer defined in a module, but built in lex_module.py -# ----------------------------------------------------------------------------- - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - return t - -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - diff --git a/ply/test/lex_object.py b/ply/test/lex_object.py deleted file mode 100644 index 7e9f389..0000000 --- a/ply/test/lex_object.py +++ /dev/null @@ -1,55 +0,0 @@ -# ----------------------------------------------------------------------------- -# lex_object.py -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.lex as lex - -class CalcLexer: - tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - - # Tokens - - t_PLUS = r'\+' - t_MINUS = r'-' - t_TIMES = r'\*' - t_DIVIDE = r'/' - t_EQUALS = r'=' - t_LPAREN = r'\(' - t_RPAREN = r'\)' - t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - - def t_NUMBER(self,t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - return t - - t_ignore = " \t" - - def t_newline(self,t): - r'\n+' - t.lineno += t.value.count("\n") - - def t_error(self,t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - - -calc = CalcLexer() - -# Build the lexer -lex.lex(object=calc) -lex.runmain(data="3+4") - - - - diff --git a/ply/test/lex_opt_alias.py b/ply/test/lex_opt_alias.py deleted file mode 100644 index 5d5ed4c..0000000 --- a/ply/test/lex_opt_alias.py +++ /dev/null @@ -1,54 +0,0 @@ -# ----------------------------------------------------------------------------- -# lex_opt_alias.py -# -# Tests ability to match up functions with states, aliases, and -# lexing tables. -# ----------------------------------------------------------------------------- - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -tokens = ( - 'NAME','NUMBER', - ) - -states = (('instdef','inclusive'),('spam','exclusive')) - -literals = ['=','+','-','*','/', '(',')'] - -# Tokens - -def t_instdef_spam_BITS(t): - r'[01-]+' - return t - -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - return t - -t_ANY_NUMBER = NUMBER - -t_ignore = " \t" -t_spam_ignore = t_ignore - -def t_newline(t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -t_spam_error = t_error - -# Build the lexer -import ply.lex as lex -lex.lex(optimize=1,lextab="aliastab") -lex.runmain(data="3+4") diff --git a/ply/test/lex_optimize.py b/ply/test/lex_optimize.py deleted file mode 100644 index 0e447e6..0000000 --- a/ply/test/lex_optimize.py +++ /dev/null @@ -1,50 +0,0 @@ -# ----------------------------------------------------------------------------- -# lex_optimize.py -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.lex as lex - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - return t - -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -lex.lex(optimize=1) -lex.runmain(data="3+4") - - - diff --git a/ply/test/lex_optimize2.py b/ply/test/lex_optimize2.py deleted file mode 100644 index 64555f6..0000000 --- a/ply/test/lex_optimize2.py +++ /dev/null @@ -1,50 +0,0 @@ -# ----------------------------------------------------------------------------- -# lex_optimize2.py -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.lex as lex - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - return t - -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -lex.lex(optimize=1,lextab="opt2tab") -lex.runmain(data="3+4") - - - diff --git a/ply/test/lex_optimize3.py b/ply/test/lex_optimize3.py deleted file mode 100644 index c6c8cce..0000000 --- a/ply/test/lex_optimize3.py +++ /dev/null @@ -1,52 +0,0 @@ -# ----------------------------------------------------------------------------- -# lex_optimize3.py -# -# Writes table in a subdirectory structure. -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.lex as lex - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - return t - -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -lex.lex(optimize=1,lextab="lexdir.sub.calctab",outputdir="lexdir/sub") -lex.runmain(data="3+4") - - - diff --git a/ply/test/lex_re1.py b/ply/test/lex_re1.py deleted file mode 100644 index 5be7aef..0000000 --- a/ply/test/lex_re1.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_re1.py -# -# Bad regular expression in a string - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'(\d+' - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_re2.py b/ply/test/lex_re2.py deleted file mode 100644 index 8dfb8e3..0000000 --- a/ply/test/lex_re2.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_re2.py -# -# Regular expression rule matches empty string - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+?' -t_MINUS = r'-' -t_NUMBER = r'(\d+)' - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_re3.py b/ply/test/lex_re3.py deleted file mode 100644 index e179925..0000000 --- a/ply/test/lex_re3.py +++ /dev/null @@ -1,29 +0,0 @@ -# lex_re3.py -# -# Regular expression rule matches empty string - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - "POUND", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'(\d+)' -t_POUND = r'#' - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_rule1.py b/ply/test/lex_rule1.py deleted file mode 100644 index 0406c6f..0000000 --- a/ply/test/lex_rule1.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_rule1.py -# -# Rule function with incorrect number of arguments - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = 1 - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_rule2.py b/ply/test/lex_rule2.py deleted file mode 100644 index 1c29d87..0000000 --- a/ply/test/lex_rule2.py +++ /dev/null @@ -1,29 +0,0 @@ -# lex_rule2.py -# -# Rule function with incorrect number of arguments - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -def t_NUMBER(): - r'\d+' - return t - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_rule3.py b/ply/test/lex_rule3.py deleted file mode 100644 index 9ea94da..0000000 --- a/ply/test/lex_rule3.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_rule3.py -# -# Rule function with incorrect number of arguments - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -def t_NUMBER(t,s): - r'\d+' - return t - -def t_error(t): - pass - -lex.lex() - - diff --git a/ply/test/lex_state1.py b/ply/test/lex_state1.py deleted file mode 100644 index 7528c91..0000000 --- a/ply/test/lex_state1.py +++ /dev/null @@ -1,40 +0,0 @@ -# lex_state1.py -# -# Bad state declaration - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -states = 'comment' - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_state2.py b/ply/test/lex_state2.py deleted file mode 100644 index 3aef69e..0000000 --- a/ply/test/lex_state2.py +++ /dev/null @@ -1,40 +0,0 @@ -# lex_state2.py -# -# Bad state declaration - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -states = ('comment','example') - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_state3.py b/ply/test/lex_state3.py deleted file mode 100644 index 616e484..0000000 --- a/ply/test/lex_state3.py +++ /dev/null @@ -1,42 +0,0 @@ -# lex_state3.py -# -# Bad state declaration - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -comment = 1 -states = ((comment, 'inclusive'), - ('example', 'exclusive')) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_state4.py b/ply/test/lex_state4.py deleted file mode 100644 index 1825016..0000000 --- a/ply/test/lex_state4.py +++ /dev/null @@ -1,41 +0,0 @@ -# lex_state4.py -# -# Bad state declaration - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - - -states = (('comment', 'exclsive'),) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_state5.py b/ply/test/lex_state5.py deleted file mode 100644 index 4ce828e..0000000 --- a/ply/test/lex_state5.py +++ /dev/null @@ -1,40 +0,0 @@ -# lex_state5.py -# -# Bad state declaration - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -states = (('comment', 'exclusive'), - ('comment', 'exclusive')) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - - -lex.lex() - - diff --git a/ply/test/lex_state_noerror.py b/ply/test/lex_state_noerror.py deleted file mode 100644 index 90bbea8..0000000 --- a/ply/test/lex_state_noerror.py +++ /dev/null @@ -1,39 +0,0 @@ -# lex_state_noerror.py -# -# Declaration of a state for which no rules are defined - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -states = (('comment', 'exclusive'),) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - - -lex.lex() - - diff --git a/ply/test/lex_state_norule.py b/ply/test/lex_state_norule.py deleted file mode 100644 index 64ec6d3..0000000 --- a/ply/test/lex_state_norule.py +++ /dev/null @@ -1,40 +0,0 @@ -# lex_state_norule.py -# -# Declaration of a state for which no rules are defined - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -states = (('comment', 'exclusive'), - ('example', 'exclusive')) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - - -lex.lex() - - diff --git a/ply/test/lex_state_try.py b/ply/test/lex_state_try.py deleted file mode 100644 index fd5ba22..0000000 --- a/ply/test/lex_state_try.py +++ /dev/null @@ -1,45 +0,0 @@ -# lex_state_try.py -# -# Declaration of a state for which no rules are defined - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -states = (('comment', 'exclusive'),) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -t_ignore = " \t" - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - -t_comment_error = t_error -t_comment_ignore = t_ignore - -lex.lex() - -data = "3 + 4 /* This is a comment */ + 10" - -lex.runmain(data=data) diff --git a/ply/test/lex_token1.py b/ply/test/lex_token1.py deleted file mode 100644 index 6fca300..0000000 --- a/ply/test/lex_token1.py +++ /dev/null @@ -1,19 +0,0 @@ -# lex_token1.py -# -# Tests for absence of tokens variable - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(t): - pass - -lex.lex() - - diff --git a/ply/test/lex_token2.py b/ply/test/lex_token2.py deleted file mode 100644 index 6e65ab0..0000000 --- a/ply/test/lex_token2.py +++ /dev/null @@ -1,22 +0,0 @@ -# lex_token2.py -# -# Tests for tokens of wrong type - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = "PLUS MINUS NUMBER" - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(t): - pass - - -lex.lex() - - diff --git a/ply/test/lex_token3.py b/ply/test/lex_token3.py deleted file mode 100644 index 636452e..0000000 --- a/ply/test/lex_token3.py +++ /dev/null @@ -1,24 +0,0 @@ -# lex_token3.py -# -# tokens is right type, but is missing a token for one rule - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(t): - pass - -lex.lex() - - diff --git a/ply/test/lex_token4.py b/ply/test/lex_token4.py deleted file mode 100644 index 52947e9..0000000 --- a/ply/test/lex_token4.py +++ /dev/null @@ -1,26 +0,0 @@ -# lex_token4.py -# -# Bad token name - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "-", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(t): - pass - -lex.lex() - - diff --git a/ply/test/lex_token5.py b/ply/test/lex_token5.py deleted file mode 100644 index ef7a3c5..0000000 --- a/ply/test/lex_token5.py +++ /dev/null @@ -1,31 +0,0 @@ -# lex_token5.py -# -# Return a bad token name - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' - -def t_NUMBER(t): - r'\d+' - t.type = "NUM" - return t - -def t_error(t): - pass - -lex.lex() -lex.input("1234") -t = lex.token() - - diff --git a/ply/test/lex_token_dup.py b/ply/test/lex_token_dup.py deleted file mode 100644 index 384f4e9..0000000 --- a/ply/test/lex_token_dup.py +++ /dev/null @@ -1,29 +0,0 @@ -# lex_token_dup.py -# -# Duplicate token name in tokens - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - "MINUS" - ] - -t_PLUS = r'\+' -t_MINUS = r'-' - -def t_NUMBER(t): - r'\d+' - return t - -def t_error(t): - pass - -lex.lex() - - diff --git a/ply/test/testlex.py b/ply/test/testlex.py deleted file mode 100755 index 1f7dd1b..0000000 --- a/ply/test/testlex.py +++ /dev/null @@ -1,606 +0,0 @@ -# testlex.py - -import unittest -try: - import StringIO -except ImportError: - import io as StringIO - -import sys -import os -import imp -import warnings - -sys.path.insert(0,"..") -sys.tracebacklimit = 0 - -import ply.lex - -def make_pymodule_path(filename): - path = os.path.dirname(filename) - file = os.path.basename(filename) - mod, ext = os.path.splitext(file) - - if sys.hexversion >= 0x3020000: - modname = mod+"."+imp.get_tag()+ext - fullpath = os.path.join(path,'__pycache__',modname) - else: - fullpath = filename - return fullpath - -def pymodule_out_exists(filename): - return os.path.exists(make_pymodule_path(filename)) - -def pymodule_out_remove(filename): - os.remove(make_pymodule_path(filename)) - -def check_expected(result,expected): - if sys.version_info[0] >= 3: - if isinstance(result,str): - result = result.encode('ascii') - if isinstance(expected,str): - expected = expected.encode('ascii') - resultlines = result.splitlines() - expectedlines = expected.splitlines() - - - if len(resultlines) != len(expectedlines): - return False - - for rline,eline in zip(resultlines,expectedlines): - if not rline.endswith(eline): - return False - return True - -def run_import(module): - code = "import "+module - exec(code) - del sys.modules[module] - -# Tests related to errors and warnings when building lexers -class LexErrorWarningTests(unittest.TestCase): - def setUp(self): - sys.stderr = StringIO.StringIO() - sys.stdout = StringIO.StringIO() - if sys.hexversion >= 0x3020000: - warnings.filterwarnings('ignore',category=ResourceWarning) - - def tearDown(self): - sys.stderr = sys.__stderr__ - sys.stdout = sys.__stdout__ - def test_lex_doc1(self): - self.assertRaises(SyntaxError,run_import,"lex_doc1") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "lex_doc1.py:18: No regular expression defined for rule 't_NUMBER'\n")) - def test_lex_dup1(self): - self.assertRaises(SyntaxError,run_import,"lex_dup1") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "lex_dup1.py:20: Rule t_NUMBER redefined. Previously defined on line 18\n" )) - - def test_lex_dup2(self): - self.assertRaises(SyntaxError,run_import,"lex_dup2") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "lex_dup2.py:22: Rule t_NUMBER redefined. Previously defined on line 18\n" )) - - def test_lex_dup3(self): - self.assertRaises(SyntaxError,run_import,"lex_dup3") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "lex_dup3.py:20: Rule t_NUMBER redefined. Previously defined on line 18\n" )) - - def test_lex_empty(self): - self.assertRaises(SyntaxError,run_import,"lex_empty") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "No rules of the form t_rulename are defined\n" - "No rules defined for state 'INITIAL'\n")) - - def test_lex_error1(self): - run_import("lex_error1") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "No t_error rule is defined\n")) - - def test_lex_error2(self): - self.assertRaises(SyntaxError,run_import,"lex_error2") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Rule 't_error' must be defined as a function\n") - ) - - def test_lex_error3(self): - self.assertRaises(SyntaxError,run_import,"lex_error3") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "lex_error3.py:20: Rule 't_error' requires an argument\n")) - - def test_lex_error4(self): - self.assertRaises(SyntaxError,run_import,"lex_error4") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "lex_error4.py:20: Rule 't_error' has too many arguments\n")) - - def test_lex_ignore(self): - self.assertRaises(SyntaxError,run_import,"lex_ignore") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "lex_ignore.py:20: Rule 't_ignore' must be defined as a string\n")) - - def test_lex_ignore2(self): - run_import("lex_ignore2") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "t_ignore contains a literal backslash '\\'\n")) - - - def test_lex_re1(self): - self.assertRaises(SyntaxError,run_import,"lex_re1") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Invalid regular expression for rule 't_NUMBER'. unbalanced parenthesis\n")) - - def test_lex_re2(self): - self.assertRaises(SyntaxError,run_import,"lex_re2") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Regular expression for rule 't_PLUS' matches empty string\n")) - - def test_lex_re3(self): - self.assertRaises(SyntaxError,run_import,"lex_re3") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Invalid regular expression for rule 't_POUND'. unbalanced parenthesis\n" - "Make sure '#' in rule 't_POUND' is escaped with '\\#'\n")) - - def test_lex_rule1(self): - self.assertRaises(SyntaxError,run_import,"lex_rule1") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "t_NUMBER not defined as a function or string\n")) - - def test_lex_rule2(self): - self.assertRaises(SyntaxError,run_import,"lex_rule2") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "lex_rule2.py:18: Rule 't_NUMBER' requires an argument\n")) - - def test_lex_rule3(self): - self.assertRaises(SyntaxError,run_import,"lex_rule3") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "lex_rule3.py:18: Rule 't_NUMBER' has too many arguments\n")) - - - def test_lex_state1(self): - self.assertRaises(SyntaxError,run_import,"lex_state1") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "states must be defined as a tuple or list\n")) - - def test_lex_state2(self): - self.assertRaises(SyntaxError,run_import,"lex_state2") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Invalid state specifier 'comment'. Must be a tuple (statename,'exclusive|inclusive')\n" - "Invalid state specifier 'example'. Must be a tuple (statename,'exclusive|inclusive')\n")) - - def test_lex_state3(self): - self.assertRaises(SyntaxError,run_import,"lex_state3") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "State name 1 must be a string\n" - "No rules defined for state 'example'\n")) - - def test_lex_state4(self): - self.assertRaises(SyntaxError,run_import,"lex_state4") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "State type for state comment must be 'inclusive' or 'exclusive'\n")) - - - def test_lex_state5(self): - self.assertRaises(SyntaxError,run_import,"lex_state5") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "State 'comment' already defined\n")) - - def test_lex_state_noerror(self): - run_import("lex_state_noerror") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "No error rule is defined for exclusive state 'comment'\n")) - - def test_lex_state_norule(self): - self.assertRaises(SyntaxError,run_import,"lex_state_norule") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "No rules defined for state 'example'\n")) - - def test_lex_token1(self): - self.assertRaises(SyntaxError,run_import,"lex_token1") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "No token list is defined\n" - "Rule 't_NUMBER' defined for an unspecified token NUMBER\n" - "Rule 't_PLUS' defined for an unspecified token PLUS\n" - "Rule 't_MINUS' defined for an unspecified token MINUS\n" -)) - - def test_lex_token2(self): - self.assertRaises(SyntaxError,run_import,"lex_token2") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "tokens must be a list or tuple\n" - "Rule 't_NUMBER' defined for an unspecified token NUMBER\n" - "Rule 't_PLUS' defined for an unspecified token PLUS\n" - "Rule 't_MINUS' defined for an unspecified token MINUS\n" -)) - - def test_lex_token3(self): - self.assertRaises(SyntaxError,run_import,"lex_token3") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Rule 't_MINUS' defined for an unspecified token MINUS\n")) - - - def test_lex_token4(self): - self.assertRaises(SyntaxError,run_import,"lex_token4") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Bad token name '-'\n")) - - - def test_lex_token5(self): - try: - run_import("lex_token5") - except ply.lex.LexError: - e = sys.exc_info()[1] - self.assert_(check_expected(str(e),"lex_token5.py:19: Rule 't_NUMBER' returned an unknown token type 'NUM'")) - - def test_lex_token_dup(self): - run_import("lex_token_dup") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Token 'MINUS' multiply defined\n")) - - - def test_lex_literal1(self): - self.assertRaises(SyntaxError,run_import,"lex_literal1") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Invalid literal '**'. Must be a single character\n")) - - def test_lex_literal2(self): - self.assertRaises(SyntaxError,run_import,"lex_literal2") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Invalid literals specification. literals must be a sequence of characters\n")) - -import os -import subprocess -import shutil - -# Tests related to various build options associated with lexers -class LexBuildOptionTests(unittest.TestCase): - def setUp(self): - sys.stderr = StringIO.StringIO() - sys.stdout = StringIO.StringIO() - def tearDown(self): - sys.stderr = sys.__stderr__ - sys.stdout = sys.__stdout__ - try: - shutil.rmtree("lexdir") - except OSError: - pass - - def test_lex_module(self): - run_import("lex_module") - result = sys.stdout.getvalue() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - - def test_lex_object(self): - run_import("lex_object") - result = sys.stdout.getvalue() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - - def test_lex_closure(self): - run_import("lex_closure") - result = sys.stdout.getvalue() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - def test_lex_optimize(self): - try: - os.remove("lextab.py") - except OSError: - pass - try: - os.remove("lextab.pyc") - except OSError: - pass - try: - os.remove("lextab.pyo") - except OSError: - pass - run_import("lex_optimize") - - result = sys.stdout.getvalue() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("lextab.py")) - - - p = subprocess.Popen([sys.executable,'-O','lex_optimize.py'], - stdout=subprocess.PIPE) - result = p.stdout.read() - - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(pymodule_out_exists("lextab.pyo")) - - pymodule_out_remove("lextab.pyo") - p = subprocess.Popen([sys.executable,'-OO','lex_optimize.py'], - stdout=subprocess.PIPE) - result = p.stdout.read() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(pymodule_out_exists("lextab.pyo")) - try: - os.remove("lextab.py") - except OSError: - pass - try: - pymodule_out_remove("lextab.pyc") - except OSError: - pass - try: - pymodule_out_remove("lextab.pyo") - except OSError: - pass - - def test_lex_optimize2(self): - try: - os.remove("opt2tab.py") - except OSError: - pass - try: - os.remove("opt2tab.pyc") - except OSError: - pass - try: - os.remove("opt2tab.pyo") - except OSError: - pass - run_import("lex_optimize2") - result = sys.stdout.getvalue() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("opt2tab.py")) - - p = subprocess.Popen([sys.executable,'-O','lex_optimize2.py'], - stdout=subprocess.PIPE) - result = p.stdout.read() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(pymodule_out_exists("opt2tab.pyo")) - pymodule_out_remove("opt2tab.pyo") - p = subprocess.Popen([sys.executable,'-OO','lex_optimize2.py'], - stdout=subprocess.PIPE) - result = p.stdout.read() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(pymodule_out_exists("opt2tab.pyo")) - try: - os.remove("opt2tab.py") - except OSError: - pass - try: - pymodule_out_remove("opt2tab.pyc") - except OSError: - pass - try: - pymodule_out_remove("opt2tab.pyo") - except OSError: - pass - - def test_lex_optimize3(self): - try: - shutil.rmtree("lexdir") - except OSError: - pass - - os.mkdir("lexdir") - os.mkdir("lexdir/sub") - open("lexdir/__init__.py","w").write("") - open("lexdir/sub/__init__.py","w").write("") - run_import("lex_optimize3") - result = sys.stdout.getvalue() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("lexdir/sub/calctab.py")) - - p = subprocess.Popen([sys.executable,'-O','lex_optimize3.py'], - stdout=subprocess.PIPE) - result = p.stdout.read() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(pymodule_out_exists("lexdir/sub/calctab.pyo")) - pymodule_out_remove("lexdir/sub/calctab.pyo") - p = subprocess.Popen([sys.executable,'-OO','lex_optimize3.py'], - stdout=subprocess.PIPE) - result = p.stdout.read() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(pymodule_out_exists("lexdir/sub/calctab.pyo")) - try: - shutil.rmtree("lexdir") - except OSError: - pass - - def test_lex_opt_alias(self): - try: - os.remove("aliastab.py") - except OSError: - pass - try: - os.remove("aliastab.pyc") - except OSError: - pass - try: - os.remove("aliastab.pyo") - except OSError: - pass - run_import("lex_opt_alias") - result = sys.stdout.getvalue() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(+,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("aliastab.py")) - - p = subprocess.Popen([sys.executable,'-O','lex_opt_alias.py'], - stdout=subprocess.PIPE) - result = p.stdout.read() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(+,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(pymodule_out_exists("aliastab.pyo")) - pymodule_out_remove("aliastab.pyo") - p = subprocess.Popen([sys.executable,'-OO','lex_opt_alias.py'], - stdout=subprocess.PIPE) - result = p.stdout.read() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(+,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(pymodule_out_exists("aliastab.pyo")) - try: - os.remove("aliastab.py") - except OSError: - pass - try: - pymodule_out_remove("aliastab.pyc") - except OSError: - pass - try: - pymodule_out_remove("aliastab.pyo") - except OSError: - pass - - def test_lex_many_tokens(self): - try: - os.remove("manytab.py") - except OSError: - pass - try: - os.remove("manytab.pyc") - except OSError: - pass - try: - os.remove("manytab.pyo") - except OSError: - pass - run_import("lex_many_tokens") - result = sys.stdout.getvalue() - self.assert_(check_expected(result, - "(TOK34,'TOK34:',1,0)\n" - "(TOK143,'TOK143:',1,7)\n" - "(TOK269,'TOK269:',1,15)\n" - "(TOK372,'TOK372:',1,23)\n" - "(TOK452,'TOK452:',1,31)\n" - "(TOK561,'TOK561:',1,39)\n" - "(TOK999,'TOK999:',1,47)\n" - )) - - self.assert_(os.path.exists("manytab.py")) - - p = subprocess.Popen([sys.executable,'-O','lex_many_tokens.py'], - stdout=subprocess.PIPE) - result = p.stdout.read() - self.assert_(check_expected(result, - "(TOK34,'TOK34:',1,0)\n" - "(TOK143,'TOK143:',1,7)\n" - "(TOK269,'TOK269:',1,15)\n" - "(TOK372,'TOK372:',1,23)\n" - "(TOK452,'TOK452:',1,31)\n" - "(TOK561,'TOK561:',1,39)\n" - "(TOK999,'TOK999:',1,47)\n" - )) - - self.assert_(pymodule_out_exists("manytab.pyo")) - pymodule_out_remove("manytab.pyo") - try: - os.remove("manytab.py") - except OSError: - pass - try: - os.remove("manytab.pyc") - except OSError: - pass - try: - os.remove("manytab.pyo") - except OSError: - pass - -# Tests related to run-time behavior of lexers -class LexRunTests(unittest.TestCase): - def setUp(self): - sys.stderr = StringIO.StringIO() - sys.stdout = StringIO.StringIO() - def tearDown(self): - sys.stderr = sys.__stderr__ - sys.stdout = sys.__stdout__ - - def test_lex_hedit(self): - run_import("lex_hedit") - result = sys.stdout.getvalue() - self.assert_(check_expected(result, - "(H_EDIT_DESCRIPTOR,'abc',1,0)\n" - "(H_EDIT_DESCRIPTOR,'abcdefghij',1,6)\n" - "(H_EDIT_DESCRIPTOR,'xy',1,20)\n")) - - def test_lex_state_try(self): - run_import("lex_state_try") - result = sys.stdout.getvalue() - self.assert_(check_expected(result, - "(NUMBER,'3',1,0)\n" - "(PLUS,'+',1,2)\n" - "(NUMBER,'4',1,4)\n" - "Entering comment state\n" - "comment body LexToken(body_part,'This is a comment */',1,9)\n" - "(PLUS,'+',1,30)\n" - "(NUMBER,'10',1,32)\n" - )) - - - -unittest.main() diff --git a/ply/test/testyacc.py b/ply/test/testyacc.py deleted file mode 100644 index 2b06b44..0000000 --- a/ply/test/testyacc.py +++ /dev/null @@ -1,347 +0,0 @@ -# testyacc.py - -import unittest -try: - import StringIO -except ImportError: - import io as StringIO - -import sys -import os -import warnings - -sys.path.insert(0,"..") -sys.tracebacklimit = 0 - -import ply.yacc -import imp - -def make_pymodule_path(filename): - path = os.path.dirname(filename) - file = os.path.basename(filename) - mod, ext = os.path.splitext(file) - - if sys.hexversion >= 0x3020000: - modname = mod+"."+imp.get_tag()+ext - fullpath = os.path.join(path,'__pycache__',modname) - else: - fullpath = filename - return fullpath - -def pymodule_out_exists(filename): - return os.path.exists(make_pymodule_path(filename)) - -def pymodule_out_remove(filename): - os.remove(make_pymodule_path(filename)) - - -def check_expected(result,expected): - resultlines = [] - for line in result.splitlines(): - if line.startswith("WARNING: "): - line = line[9:] - elif line.startswith("ERROR: "): - line = line[7:] - resultlines.append(line) - - expectedlines = expected.splitlines() - if len(resultlines) != len(expectedlines): - return False - for rline,eline in zip(resultlines,expectedlines): - if not rline.endswith(eline): - return False - return True - -def run_import(module): - code = "import "+module - exec(code) - del sys.modules[module] - -# Tests related to errors and warnings when building parsers -class YaccErrorWarningTests(unittest.TestCase): - def setUp(self): - sys.stderr = StringIO.StringIO() - sys.stdout = StringIO.StringIO() - try: - os.remove("parsetab.py") - pymodule_out_remove("parsetab.pyc") - except OSError: - pass - - if sys.hexversion >= 0x3020000: - warnings.filterwarnings('ignore',category=ResourceWarning) - - def tearDown(self): - sys.stderr = sys.__stderr__ - sys.stdout = sys.__stdout__ - def test_yacc_badargs(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_badargs") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_badargs.py:23: Rule 'p_statement_assign' has too many arguments\n" - "yacc_badargs.py:27: Rule 'p_statement_expr' requires an argument\n" - )) - def test_yacc_badid(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_badid") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_badid.py:32: Illegal name 'bad&rule' in rule 'statement'\n" - "yacc_badid.py:36: Illegal rule name 'bad&rule'\n" - )) - - def test_yacc_badprec(self): - try: - run_import("yacc_badprec") - except ply.yacc.YaccError: - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "precedence must be a list or tuple\n" - )) - def test_yacc_badprec2(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_badprec2") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Bad precedence table\n" - )) - - def test_yacc_badprec3(self): - run_import("yacc_badprec3") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Precedence already specified for terminal 'MINUS'\n" - "Generating LALR tables\n" - - )) - - def test_yacc_badrule(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_badrule") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_badrule.py:24: Syntax error. Expected ':'\n" - "yacc_badrule.py:28: Syntax error in rule 'statement'\n" - "yacc_badrule.py:33: Syntax error. Expected ':'\n" - "yacc_badrule.py:42: Syntax error. Expected ':'\n" - )) - - def test_yacc_badtok(self): - try: - run_import("yacc_badtok") - except ply.yacc.YaccError: - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "tokens must be a list or tuple\n")) - - def test_yacc_dup(self): - run_import("yacc_dup") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_dup.py:27: Function p_statement redefined. Previously defined on line 23\n" - "Token 'EQUALS' defined, but not used\n" - "There is 1 unused token\n" - "Generating LALR tables\n" - - )) - def test_yacc_error1(self): - try: - run_import("yacc_error1") - except ply.yacc.YaccError: - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_error1.py:61: p_error() requires 1 argument\n")) - - def test_yacc_error2(self): - try: - run_import("yacc_error2") - except ply.yacc.YaccError: - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_error2.py:61: p_error() requires 1 argument\n")) - - def test_yacc_error3(self): - try: - run_import("yacc_error3") - except ply.yacc.YaccError: - e = sys.exc_info()[1] - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "'p_error' defined, but is not a function or method\n")) - - def test_yacc_error4(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_error4") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_error4.py:62: Illegal rule name 'error'. Already defined as a token\n" - )) - - def test_yacc_inf(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_inf") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Token 'NUMBER' defined, but not used\n" - "There is 1 unused token\n" - "Infinite recursion detected for symbol 'statement'\n" - "Infinite recursion detected for symbol 'expression'\n" - )) - def test_yacc_literal(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_literal") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_literal.py:36: Literal token '**' in rule 'expression' may only be a single character\n" - )) - def test_yacc_misplaced(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_misplaced") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_misplaced.py:32: Misplaced '|'\n" - )) - - def test_yacc_missing1(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_missing1") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_missing1.py:24: Symbol 'location' used, but not defined as a token or a rule\n" - )) - - def test_yacc_nested(self): - run_import("yacc_nested") - result = sys.stdout.getvalue() - self.assert_(check_expected(result, - "A\n" - "A\n" - "A\n", - )) - - def test_yacc_nodoc(self): - run_import("yacc_nodoc") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_nodoc.py:27: No documentation string specified in function 'p_statement_expr' (ignored)\n" - "Generating LALR tables\n" - )) - - def test_yacc_noerror(self): - run_import("yacc_noerror") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "no p_error() function is defined\n" - "Generating LALR tables\n" - )) - - def test_yacc_nop(self): - run_import("yacc_nop") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_nop.py:27: Possible grammar rule 'statement_expr' defined without p_ prefix\n" - "Generating LALR tables\n" - )) - - def test_yacc_notfunc(self): - run_import("yacc_notfunc") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "'p_statement_assign' not defined as a function\n" - "Token 'EQUALS' defined, but not used\n" - "There is 1 unused token\n" - "Generating LALR tables\n" - )) - def test_yacc_notok(self): - try: - run_import("yacc_notok") - except ply.yacc.YaccError: - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "No token list is defined\n")) - - def test_yacc_rr(self): - run_import("yacc_rr") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Generating LALR tables\n" - "1 reduce/reduce conflict\n" - "reduce/reduce conflict in state 15 resolved using rule (statement -> NAME EQUALS NUMBER)\n" - "rejected rule (expression -> NUMBER) in state 15\n" - - )) - - def test_yacc_rr_unused(self): - run_import("yacc_rr_unused") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "no p_error() function is defined\n" - "Generating LALR tables\n" - "3 reduce/reduce conflicts\n" - "reduce/reduce conflict in state 1 resolved using rule (rule3 -> A)\n" - "rejected rule (rule4 -> A) in state 1\n" - "reduce/reduce conflict in state 1 resolved using rule (rule3 -> A)\n" - "rejected rule (rule5 -> A) in state 1\n" - "reduce/reduce conflict in state 1 resolved using rule (rule4 -> A)\n" - "rejected rule (rule5 -> A) in state 1\n" - "Rule (rule5 -> A) is never reduced\n" - )) - - def test_yacc_simple(self): - run_import("yacc_simple") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Generating LALR tables\n" - )) - def test_yacc_sr(self): - run_import("yacc_sr") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Generating LALR tables\n" - "20 shift/reduce conflicts\n" - )) - - def test_yacc_term1(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_term1") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_term1.py:24: Illegal rule name 'NUMBER'. Already defined as a token\n" - )) - - def test_yacc_unused(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_unused") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_unused.py:62: Symbol 'COMMA' used, but not defined as a token or a rule\n" - "Symbol 'COMMA' is unreachable\n" - "Symbol 'exprlist' is unreachable\n" - )) - def test_yacc_unused_rule(self): - run_import("yacc_unused_rule") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_unused_rule.py:62: Rule 'integer' defined, but not used\n" - "There is 1 unused rule\n" - "Symbol 'integer' is unreachable\n" - "Generating LALR tables\n" - )) - - def test_yacc_uprec(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_uprec") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_uprec.py:37: Nothing known about the precedence of 'UMINUS'\n" - )) - - def test_yacc_uprec2(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_uprec2") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_uprec2.py:37: Syntax error. Nothing follows %prec\n" - )) - - def test_yacc_prec1(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_prec1") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Precedence rule 'left' defined for unknown symbol '+'\n" - "Precedence rule 'left' defined for unknown symbol '*'\n" - "Precedence rule 'left' defined for unknown symbol '-'\n" - "Precedence rule 'left' defined for unknown symbol '/'\n" - )) - - - -unittest.main() diff --git a/ply/test/yacc_badargs.py b/ply/test/yacc_badargs.py deleted file mode 100644 index 9a1d03f..0000000 --- a/ply/test/yacc_badargs.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badargs.py -# -# Rules with wrong # args -# ----------------------------------------------------------------------------- -import sys -sys.tracebacklimit = 0 -sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t,s): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_badid.py b/ply/test/yacc_badid.py deleted file mode 100644 index e4b9f5e..0000000 --- a/ply/test/yacc_badid.py +++ /dev/null @@ -1,77 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badid.py -# -# Attempt to define a rule with a bad-identifier name -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_statement_expr2(t): - 'statement : bad&rule' - pass - -def p_badrule(t): - 'bad&rule : expression' - pass - - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - pass - -yacc.yacc() - - - - diff --git a/ply/test/yacc_badprec.py b/ply/test/yacc_badprec.py deleted file mode 100644 index 3013bb6..0000000 --- a/ply/test/yacc_badprec.py +++ /dev/null @@ -1,64 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badprec.py -# -# Bad precedence specifier -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = "blah" - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_badprec2.py b/ply/test/yacc_badprec2.py deleted file mode 100644 index 83093b4..0000000 --- a/ply/test/yacc_badprec2.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badprec2.py -# -# Bad precedence -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - 42, - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_badprec3.py b/ply/test/yacc_badprec3.py deleted file mode 100644 index d925ecd..0000000 --- a/ply/test/yacc_badprec3.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badprec3.py -# -# Bad precedence -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE','MINUS'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_badrule.py b/ply/test/yacc_badrule.py deleted file mode 100644 index 92af646..0000000 --- a/ply/test/yacc_badrule.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badrule.py -# -# Syntax problems in the rule strings -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression: MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_badtok.py b/ply/test/yacc_badtok.py deleted file mode 100644 index fc4afe1..0000000 --- a/ply/test/yacc_badtok.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badtok.py -# -# A grammar, but tokens is a bad datatype -# ----------------------------------------------------------------------------- - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -tokens = "Hello" - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_dup.py b/ply/test/yacc_dup.py deleted file mode 100644 index 309ba32..0000000 --- a/ply/test/yacc_dup.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_dup.py -# -# Duplicated rule name -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_error1.py b/ply/test/yacc_error1.py deleted file mode 100644 index 10ac6a9..0000000 --- a/ply/test/yacc_error1.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_error1.py -# -# Bad p_error() function -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t,s): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_error2.py b/ply/test/yacc_error2.py deleted file mode 100644 index 7591418..0000000 --- a/ply/test/yacc_error2.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_error2.py -# -# Bad p_error() function -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_error3.py b/ply/test/yacc_error3.py deleted file mode 100644 index 4604a48..0000000 --- a/ply/test/yacc_error3.py +++ /dev/null @@ -1,67 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_error3.py -# -# Bad p_error() function -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -p_error = "blah" - -yacc.yacc() - - - - diff --git a/ply/test/yacc_error4.py b/ply/test/yacc_error4.py deleted file mode 100644 index 9c550cd..0000000 --- a/ply/test/yacc_error4.py +++ /dev/null @@ -1,72 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_error4.py -# -# Attempt to define a rule named 'error' -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error_handler(t): - 'error : NAME' - pass - -def p_error(t): - pass - -yacc.yacc() - - - - diff --git a/ply/test/yacc_inf.py b/ply/test/yacc_inf.py deleted file mode 100644 index efd3612..0000000 --- a/ply/test/yacc_inf.py +++ /dev/null @@ -1,56 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_inf.py -# -# Infinite recursion -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_literal.py b/ply/test/yacc_literal.py deleted file mode 100644 index 0d62803..0000000 --- a/ply/test/yacc_literal.py +++ /dev/null @@ -1,69 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_literal.py -# -# Grammar with bad literal characters -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','+','-'), - ('left','*','/'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression '+' expression - | expression '-' expression - | expression '*' expression - | expression '/' expression - | expression '**' expression ''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_misplaced.py b/ply/test/yacc_misplaced.py deleted file mode 100644 index 9159b01..0000000 --- a/ply/test/yacc_misplaced.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_misplaced.py -# -# A misplaced | in grammar rules -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - ''' | expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_missing1.py b/ply/test/yacc_missing1.py deleted file mode 100644 index d1b5105..0000000 --- a/ply/test/yacc_missing1.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_missing1.py -# -# Grammar with a missing rule -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : location EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_nested.py b/ply/test/yacc_nested.py deleted file mode 100644 index a1b061e..0000000 --- a/ply/test/yacc_nested.py +++ /dev/null @@ -1,33 +0,0 @@ -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") - -from ply import lex, yacc - -t_A = 'A' -t_B = 'B' -t_C = 'C' - -tokens = ('A', 'B', 'C') - -the_lexer = lex.lex() - -def t_error(t): - pass - -def p_error(p): - pass - -def p_start(t): - '''start : A nest C''' - pass - -def p_nest(t): - '''nest : B''' - print(t[-1]) - -the_parser = yacc.yacc(debug = False, write_tables = False) - -the_parser.parse('ABC', the_lexer) -the_parser.parse('ABC', the_lexer, tracking=True) -the_parser.parse('ABC', the_lexer, tracking=True, debug=1) diff --git a/ply/test/yacc_nodoc.py b/ply/test/yacc_nodoc.py deleted file mode 100644 index 0f61920..0000000 --- a/ply/test/yacc_nodoc.py +++ /dev/null @@ -1,67 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_nodoc.py -# -# Rule with a missing doc-string -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_noerror.py b/ply/test/yacc_noerror.py deleted file mode 100644 index b38c758..0000000 --- a/ply/test/yacc_noerror.py +++ /dev/null @@ -1,66 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_noerror.py -# -# No p_error() rule defined. -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - - -yacc.yacc() - - - - diff --git a/ply/test/yacc_nop.py b/ply/test/yacc_nop.py deleted file mode 100644 index 789a9cf..0000000 --- a/ply/test/yacc_nop.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_nop.py -# -# Possible grammar rule defined without p_ prefix -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_notfunc.py b/ply/test/yacc_notfunc.py deleted file mode 100644 index 5093a74..0000000 --- a/ply/test/yacc_notfunc.py +++ /dev/null @@ -1,66 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_notfunc.py -# -# p_rule not defined as a function -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -p_statement_assign = "Blah" - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_notok.py b/ply/test/yacc_notok.py deleted file mode 100644 index cff55a8..0000000 --- a/ply/test/yacc_notok.py +++ /dev/null @@ -1,67 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_notok.py -# -# A grammar, but we forgot to import the tokens list -# ----------------------------------------------------------------------------- - -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_prec1.py b/ply/test/yacc_prec1.py deleted file mode 100644 index 2ca6afc..0000000 --- a/ply/test/yacc_prec1.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_prec1.py -# -# Tests case where precedence specifier doesn't match up to terminals -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','+','-'), - ('left','*','/'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_rr.py b/ply/test/yacc_rr.py deleted file mode 100644 index e7336c2..0000000 --- a/ply/test/yacc_rr.py +++ /dev/null @@ -1,72 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_rr.py -# -# A grammar with a reduce/reduce conflict -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_assign_2(t): - 'statement : NAME EQUALS NUMBER' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_rr_unused.py b/ply/test/yacc_rr_unused.py deleted file mode 100644 index 1ca5f7e..0000000 --- a/ply/test/yacc_rr_unused.py +++ /dev/null @@ -1,30 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_rr_unused.py -# -# A grammar with reduce/reduce conflicts and a rule that never -# gets reduced. -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -tokens = ('A', 'B', 'C') - -def p_grammar(p): - ''' - rule1 : rule2 B - | rule2 C - - rule2 : rule3 B - | rule4 - | rule5 - - rule3 : A - - rule4 : A - - rule5 : A - ''' - -yacc.yacc() diff --git a/ply/test/yacc_simple.py b/ply/test/yacc_simple.py deleted file mode 100644 index bd989f4..0000000 --- a/ply/test/yacc_simple.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_simple.py -# -# A simple, properly specifier grammar -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_sr.py b/ply/test/yacc_sr.py deleted file mode 100644 index 69a1e9c..0000000 --- a/ply/test/yacc_sr.py +++ /dev/null @@ -1,63 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_sr.py -# -# A grammar with shift-reduce conflicts -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_term1.py b/ply/test/yacc_term1.py deleted file mode 100644 index eaa36e9..0000000 --- a/ply/test/yacc_term1.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_term1.py -# -# Terminal used on the left-hand-side -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'NUMBER : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_unused.py b/ply/test/yacc_unused.py deleted file mode 100644 index 55b677b..0000000 --- a/ply/test/yacc_unused.py +++ /dev/null @@ -1,77 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_unused.py -# -# A grammar with an unused rule -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_expr_list(t): - 'exprlist : exprlist COMMA expression' - pass - -def p_expr_list_2(t): - 'exprlist : expression' - pass - - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_unused_rule.py b/ply/test/yacc_unused_rule.py deleted file mode 100644 index 4868ef8..0000000 --- a/ply/test/yacc_unused_rule.py +++ /dev/null @@ -1,72 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_unused_rule.py -# -# Grammar with an unused rule -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_integer(t): - 'integer : NUMBER' - t[0] = t[1] - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_uprec.py b/ply/test/yacc_uprec.py deleted file mode 100644 index 569adb8..0000000 --- a/ply/test/yacc_uprec.py +++ /dev/null @@ -1,63 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_uprec.py -# -# A grammar with a bad %prec specifier -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_uprec2.py b/ply/test/yacc_uprec2.py deleted file mode 100644 index 73274bf..0000000 --- a/ply/test/yacc_uprec2.py +++ /dev/null @@ -1,63 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_uprec2.py -# -# A grammar with a bad %prec specifier -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/theme/base.css b/theme/base.css deleted file mode 100644 index 22c4ee2..0000000 --- a/theme/base.css +++ /dev/null @@ -1,194 +0,0 @@ - -html { - display: block; -} - -body { - font-family: 'Ubuntu',Tahoma,sans-serif; - padding-top: 40px; - padding-bottom: 40px; - font-size: 15px; - line-height: 150%; - margin: 0; - color: #333333; - background-color: #ffffff; - display: block; - margin-left: 250px; - margin-right: 50px; -}; - -.container{ - width:940px; - margin-right: auto; - margin-left: auto; - display: block; -}; - -.navbar { - z-index: 1; - overflow: visible; - color: #ffffff; - display: block; -} - -.navbar div { - display: block; - margin-left: 5px; - margin-right: 5px; -} - -.navbar-fixed-top { - width:210px; - display: block; - position: fixed; - padding-top: 0px; - top: 0; - height: 100%; - right: 0; - left: 0; - margin-bottom: 0; - background-color: #d44413; - border: 1px solid #c64012; - font-size: 15px; - font-weight: 200; - color: #ffffff; - text-shadow: 0 1px 0 #ce4213; - padding: 10px 20px 10px; - margin-left: -20px; - //overflow:scroll; - //overflow-x:hidden; -} -/* -.navbar ul { - font-size: 15px; -}; -*/ -h1, h2, h3, h4, h5, h6 { - display: block; - margin: 10px 0; - font-family: inherit; - font-weight: bold; - line-height: 1; - color: inherit; - text-rendering: optimizelegibility; -} - -p { - margin: 0 0 10px; - display: block; -} - -pre { - #margin-left: 20px; - display: block; - padding: 9.5px; - margin: 0 0 10px; - font-size: 13px; - line-height: 20px; - word-break: break-all; - word-wrap: break-word; - white-space: pre; - white-space: pre-wrap; - background-color: #f5f5f5; - border: 1px solid #ccc; - border: 1px solid rgba(0, 0, 0, 0.15); - border-radius: 4px; -} - - - -.code-function { - text-decoration:none; - color:#09857e; - font-weight:bold; -} - -.code-type { - text-decoration:none; - color:#376d0a; - font-weight:bold; -} - -.code-argument { - text-decoration:none; - color:#B80000; - font-weight:bold; -} - -.code-number { - text-decoration:none; - color:#007b00; -} - -.code-keyword { - text-decoration:none; - color:#215eb8; - font-weight:bold; -} -.code-storage-keyword { - text-decoration:none; - color:#466cb4; -} - -.code-doxygen { - text-decoration:none; - color:#bf3e00; - font-weight:bold; -} - -.code-comment { - text-decoration:none; - color:#b704b5; -} - -.code-preproc { - text-decoration:none; - color:#ac0000; -} - -.code-text-quote { - text-decoration:none; - color:#008e00; -} -.code-number { - text-decoration:none; - color:#007b00; -} -.code-member { - text-decoration:none; - color:#7c5406; -} -.code-input-function { - text-decoration:none; - color:#B80000; - font-weight:bold; -} -.code-function-name { - text-decoration:none; - color:#09857e; - font-weight:bold; -} -.code-function-system { - text-decoration:none; - color:#acaa00; -} -.code-generic-define { - text-decoration:none; - color:#3c850b; -} -.code-macro { - text-decoration:none; - color:#3c850b; -} -.code-operator { - text-decoration:none; - color:#1633a3; -} -.code-keyword { - text-decoration:none; - color:#466cb4; -} -.code-class { - text-decoration:none; - color:#006cb4; -} diff --git a/theme/menu.css b/theme/menu.css deleted file mode 100644 index 26ed389..0000000 --- a/theme/menu.css +++ /dev/null @@ -1,146 +0,0 @@ -/* CSS Document */ - -/*----------------MENU-----------------*/ -div#menu div{ - margin-top: 0px; - background: #6699FF; -} -/* permet l'affichage du haut du menu*/ -div#menu h2{ - color: #000000; - FONT-FAMILY: Arial; - FONT-SIZE: 9pt; - text-align:left; - margin: 0; - padding: 3px; - padding-left: 6px; - background: #1a62db; -} -div#menu h3{ - margin: 0; - padding: 6px; - background: #6699FF; -} - -div#menu a{ - color: #000000; - bgcolor=#6699FF; - FONT-FAMILY: Arial; - FONT-SIZE: 9pt; -} -div#menu li { - position: relative; - list-style:none; - margin:0px; - border-bottom: 1px solid #0008ab; -} -div#menu li.sousmenu { - background: url(sous_menu.gif) 95% 50% no-repeat; -} -div#menu li:hover { - background: #0008ab; -} -div#menu li.sousmenu:hover { - background: #0008ab; -} -div#menu ul ul { - position: absolute; - top: 0px; -} - -/*TAILLE PREMIERE COLONNE*/ -div#menu { - float: center; - width: 200px; - text-align:left; -} -div#menu ul { - margin: 0; - padding: 0; - width: 200px; - background: #6699FF; - border: 0px solid; -} -div#menu ul ul { - left: 199px; - display:none; - background: #FFFFFF; -} -div#menu li a { - display: block; - padding: 2px 0px 2px 4px; - text-decoration: none; - width: 191px; - border-left: 3px solid #6699FF; -} -div#menu form { - border-left: 8px solid #6699FF; - background: #6699FF; - FONT-FAMILY: Arial; - margin:0px; - FONT-SIZE: 8pt; -} -div#menu texte { - border-left: 8px solid #6699FF; - FONT-FAMILY: Arial; - FONT-SIZE: 9pt; - font-weight:bold; - border-bottom: 1px solid #6699FF; -} - -/*TAILLE DEUXIEME COLONE*/ - -div#menu ul.niveau1 ul { - left: 200px; - height: 500px; - border: 1px solid #0008ab; - background: #1a62db; - /* - overflow:scroll; - overflow-y:auto; - overflow-x:hidden; - */ -} -div#menu ul.niveau1 li { - background: #6699FF; -} -div#menu ul.niveau1 li.sousmenu:hover ul.niveau2 { - width:219px; - display:block; -} - - - -/*TAILLE TROISIEME COLONNE*/ -div#menu ul.niveau2 ul { - left: 219px; - height: 500px; -} -div#menu ul.niveau2 li a { - width: 200px; -} -div#menu ul.niveau2 li.sousmenu:hover ul.niveau3 { - width:10em; - display:block; -} - -/*TAILLE Quatrieme COLONNE*/ -div#menu ul.niveau3 ul { - left: 369px; - height: 500px; -} -div#menu ul.niveau3 li a { - width: 200px; -} -div#menu ul.niveau3 li.sousmenu:hover ul.niveau4 { - width:10em; - display:block; -} -/*TAILLE DEUXIEME COLONE BIS????*/ - -/*COULEUR DES BORDURES*/ -div#menu li a:hover { - border-left-color: #000ADE; - background: #6699FF; - font-weight:bold; -} \ No newline at end of file