diff --git a/codeBB/BB_Code.py b/codeBB/BB_Code.py deleted file mode 100644 index 8c0d337..0000000 --- a/codeBB/BB_Code.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -import codeHL -import re - - -## -## @brief Transcode balise : -## [code language=cpp] -## int main(void) { -## return 0; -## } -## [/code] -## @param[in] value String to transform. -## @return Transformed string. -## -def transcode(value): - #value = re.sub(r'\[code(( |\t|\n|\r)+style=(.*))?\](.*?)\[/code\]', - value = re.sub(r'\[code(( |\t|\n|\r)+style=(.*?))?\](.*?)\[/code\]', - replace_code, #r'
\4', - value, - flags=re.DOTALL) - - # TODO : remove the basic indentation of the element (to have a better display in the text tutorial ... - return value - - - -def replace_code(match): - if match.group() == "": - return "" - #debug.info("plop: " + str(match.groups())) - value = codeHL.transcode(match.groups()[2], match.groups()[3]) - return '
' + value + '' - diff --git a/codeBB/BB_Image.py b/codeBB/BB_Image.py deleted file mode 100644 index 7428cfb..0000000 --- a/codeBB/BB_Image.py +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -import re - - -## -## @brief Transcode balise: -## [img w=125 h=45]dossier/image.jpg[/img] -## [img w=125 h=45]http://plop.com/dossier/image.png[/img] -## @param[in] value String to transform. -## @return Transformed string. -## -def transcode(value): - - return value - - diff --git a/codeBB/BB_IndentAndDot.py b/codeBB/BB_IndentAndDot.py deleted file mode 100644 index a4de40b..0000000 --- a/codeBB/BB_IndentAndDot.py +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/python -import lutinDebug as debug -import sys -import lutinTools -import re - - -## -## @brief Transcode -## commencez les ligne par ":" comme: -## : 1 -## : 2 -## ::2.1 -## ::2.2 -## :::2.2.1 -## ::::2.2.1.1 -## :::::2.2.1.1.1 -## ::2.3 -## :3 -## resultat: -## -## 1 -## 2 -## 2.1 -## 2.2 -## 2.2.1 -## 2.2.1.1 -## 2.3 -## 3 -## -## note: lorsque vous sautez une ligne, la liste sarraite et en recommence une autre... -## -## Il est possible de mettre des ":" sans ligne appres ce qui genere une ligne vide.. -## -## AND DOT -## **Ma ligne2 star consecutives engendrent des points quelque soit la position dans la ligne... -## -## Resultat: -## -## * premiere ligne -## * deusieme ligne -## @param[in] value String to transform. -## @return Transformed string. -## -def transcode(value): - - value = re.sub(r'\n:', - r'\n:INDENT:', - value) - p = re.compile('((\:INDENT\:(.*?)\n)*)', - flags=re.DOTALL) - value = p.sub(replace_wiki_identation, - value) - - value = re.sub(r'\*\*(.*?)\n', - r'
\1 |
\n"); - if function['destructor'] : - file.write("~") - lenght = 1; - elif function['constructor'] : - lenght = 0; - else : - typeData, typeLen = display_type(function["rtnType"], myDoc); - file.write(typeData + " ") - lenght = typeLen+1; - - parameterPos = lenght + len(function["name"]) + 1; - file.write("" + function["name"] + "(") - isFirst = True - for param in function["parameters"]: - if isFirst == False: - file.write(",\n") - file.write(white_space(parameterPos)) - typeData, typeLen = display_type(param["type"], myDoc); - file.write(typeData) - if param['name'] != "": - file.write(" ") - file.write("" + param['name'] + "") - isFirst = False - file.write(");") - file.write("\n"); - file.write("
\n"); - file.write("\n"); - file.write("
\n"); - file.write("\n"); - file.write("
\n"); - for function in localClass["methods"]["public"]: - if function['destructor'] \ - or function['constructor'] : - display_reduct_function(function, file, "+ ", sizeReturn, sizefunction, myDoc) - for function in localClass["methods"]["protected"]: - if function['destructor'] \ - or function['constructor'] : - display_reduct_function(function, file, "# ", sizeReturn, sizefunction, myDoc) - for function in localClass["methods"]["private"]: - if function['destructor'] \ - or function['constructor'] : - display_reduct_function(function, file, "- ", sizeReturn, sizefunction, myDoc) - - file.write("\n"); - - file.write("
\n"); - for function in localClass["methods"]["public"]: - if not function['destructor'] \ - and not function['constructor'] : - display_reduct_function(function, file, "+ ", sizeReturn, sizefunction, myDoc) - for function in localClass["methods"]["protected"]: - if not function['destructor'] \ - and not function['constructor'] : - display_reduct_function(function, file, "# ", sizeReturn, sizefunction, myDoc) - for function in localClass["methods"]["private"]: - if not function['destructor'] \ - and not function['constructor'] : - display_reduct_function(function, file, "- ", sizeReturn, sizefunction, myDoc) - file.write("\n"); - file.write("\n") - file.write("\n") - - - heritage = myDoc.get_heritage_list(className) - heritageDown = myDoc.get_down_heritage_list(className) - if len(heritage) > 1 \ - or len(heritageDown) > 0: - file.write("
\n") - level = 0; - for heritedClass in heritage: - if level != 0: - file.write(white_space(level*4) + "+--> ") - if heritedClass != className: - name, link = myDoc.get_class_link(heritedClass) - file.write("" + name + "\n") - else: - file.write("" + heritedClass + "\n") - level += 1; - for heritedClass in heritageDown: - file.write(white_space(level*4) + "+--> ") - name, link = myDoc.get_class_link(heritedClass) - file.write("" + name + "\n") - file.write("\n") - file.write("
['"]).*?(?P=quote)''' - - Patch submitted by Adam Ring. - -09/28/06: beazley - LALR(1) is now the default parsing method. To use SLR, use - yacc.yacc(method="SLR"). Note: there is no performance impact - on parsing when using LALR(1) instead of SLR. However, constructing - the parsing tables will take a little longer. - -09/26/06: beazley - Change to line number tracking. To modify line numbers, modify - the line number of the lexer itself. For example: - - def t_NEWLINE(t): - r'\n' - t.lexer.lineno += 1 - - This modification is both cleanup and a performance optimization. - In past versions, lex was monitoring every token for changes in - the line number. This extra processing is unnecessary for a vast - majority of tokens. Thus, this new approach cleans it up a bit. - - *** POTENTIAL INCOMPATIBILITY *** - You will need to change code in your lexer that updates the line - number. For example, "t.lineno += 1" becomes "t.lexer.lineno += 1" - -09/26/06: beazley - Added the lexing position to tokens as an attribute lexpos. This - is the raw index into the input text at which a token appears. - This information can be used to compute column numbers and other - details (e.g., scan backwards from lexpos to the first newline - to get a column position). - -09/25/06: beazley - Changed the name of the __copy__() method on the Lexer class - to clone(). This is used to clone a Lexer object (e.g., if - you're running different lexers at the same time). - -09/21/06: beazley - Limitations related to the use of the re module have been eliminated. - Several users reported problems with regular expressions exceeding - more than 100 named groups. To solve this, lex.py is now capable - of automatically splitting its master regular regular expression into - smaller expressions as needed. This should, in theory, make it - possible to specify an arbitrarily large number of tokens. - -09/21/06: beazley - Improved error checking in lex.py. Rules that match the empty string - are now rejected (otherwise they cause the lexer to enter an infinite - loop). An extra check for rules containing '#' has also been added. - Since lex compiles regular expressions in verbose mode, '#' is interpreted - as a regex comment, it is critical to use '\#' instead. - -09/18/06: beazley - Added a @TOKEN decorator function to lex.py that can be used to - define token rules where the documentation string might be computed - in some way. - - digit = r'([0-9])' - nondigit = r'([_A-Za-z])' - identifier = r'(' + nondigit + r'(' + digit + r'|' + nondigit + r')*)' - - from ply.lex import TOKEN - - @TOKEN(identifier) - def t_ID(t): - # Do whatever - - The @TOKEN decorator merely sets the documentation string of the - associated token function as needed for lex to work. - - Note: An alternative solution is the following: - - def t_ID(t): - # Do whatever - - t_ID.__doc__ = identifier - - Note: Decorators require the use of Python 2.4 or later. If compatibility - with old versions is needed, use the latter solution. - - The need for this feature was suggested by Cem Karan. - -09/14/06: beazley - Support for single-character literal tokens has been added to yacc. - These literals must be enclosed in quotes. For example: - - def p_expr(p): - "expr : expr '+' expr" - ... - - def p_expr(p): - 'expr : expr "-" expr' - ... - - In addition to this, it is necessary to tell the lexer module about - literal characters. This is done by defining the variable 'literals' - as a list of characters. This should be defined in the module that - invokes the lex.lex() function. For example: - - literals = ['+','-','*','/','(',')','='] - - or simply - - literals = '+=*/()=' - - It is important to note that literals can only be a single character. - When the lexer fails to match a token using its normal regular expression - rules, it will check the current character against the literal list. - If found, it will be returned with a token type set to match the literal - character. Otherwise, an illegal character will be signalled. - - -09/14/06: beazley - Modified PLY to install itself as a proper Python package called 'ply'. - This will make it a little more friendly to other modules. This - changes the usage of PLY only slightly. Just do this to import the - modules - - import ply.lex as lex - import ply.yacc as yacc - - Alternatively, you can do this: - - from ply import * - - Which imports both the lex and yacc modules. - Change suggested by Lee June. - -09/13/06: beazley - Changed the handling of negative indices when used in production rules. - A negative production index now accesses already parsed symbols on the - parsing stack. For example, - - def p_foo(p): - "foo: A B C D" - print p[1] # Value of 'A' symbol - print p[2] # Value of 'B' symbol - print p[-1] # Value of whatever symbol appears before A - # on the parsing stack. - - p[0] = some_val # Sets the value of the 'foo' grammer symbol - - This behavior makes it easier to work with embedded actions within the - parsing rules. For example, in C-yacc, it is possible to write code like - this: - - bar: A { printf("seen an A = %d\n", $1); } B { do_stuff; } - - In this example, the printf() code executes immediately after A has been - parsed. Within the embedded action code, $1 refers to the A symbol on - the stack. - - To perform this equivalent action in PLY, you need to write a pair - of rules like this: - - def p_bar(p): - "bar : A seen_A B" - do_stuff - - def p_seen_A(p): - "seen_A :" - print "seen an A =", p[-1] - - The second rule "seen_A" is merely a empty production which should be - reduced as soon as A is parsed in the "bar" rule above. The use - of the negative index p[-1] is used to access whatever symbol appeared - before the seen_A symbol. - - This feature also makes it possible to support inherited attributes. - For example: - - def p_decl(p): - "decl : scope name" - - def p_scope(p): - """scope : GLOBAL - | LOCAL""" - p[0] = p[1] - - def p_name(p): - "name : ID" - if p[-1] == "GLOBAL": - # ... - else if p[-1] == "LOCAL": - #... - - In this case, the name rule is inheriting an attribute from the - scope declaration that precedes it. - - *** POTENTIAL INCOMPATIBILITY *** - If you are currently using negative indices within existing grammar rules, - your code will break. This should be extremely rare if non-existent in - most cases. The argument to various grammar rules is not usually not - processed in the same way as a list of items. - -Version 2.0 ------------------------------- -09/07/06: beazley - Major cleanup and refactoring of the LR table generation code. Both SLR - and LALR(1) table generation is now performed by the same code base with - only minor extensions for extra LALR(1) processing. - -09/07/06: beazley - Completely reimplemented the entire LALR(1) parsing engine to use the - DeRemer and Pennello algorithm for calculating lookahead sets. This - significantly improves the performance of generating LALR(1) tables - and has the added feature of actually working correctly! If you - experienced weird behavior with LALR(1) in prior releases, this should - hopefully resolve all of those problems. Many thanks to - Andrew Waters and Markus Schoepflin for submitting bug reports - and helping me test out the revised LALR(1) support. - -Version 1.8 ------------------------------- -08/02/06: beazley - Fixed a problem related to the handling of default actions in LALR(1) - parsing. If you experienced subtle and/or bizarre behavior when trying - to use the LALR(1) engine, this may correct those problems. Patch - contributed by Russ Cox. Note: This patch has been superceded by - revisions for LALR(1) parsing in Ply-2.0. - -08/02/06: beazley - Added support for slicing of productions in yacc. - Patch contributed by Patrick Mezard. - -Version 1.7 ------------------------------- -03/02/06: beazley - Fixed infinite recursion problem ReduceToTerminals() function that - would sometimes come up in LALR(1) table generation. Reported by - Markus Schoepflin. - -03/01/06: beazley - Added "reflags" argument to lex(). For example: - - lex.lex(reflags=re.UNICODE) - - This can be used to specify optional flags to the re.compile() function - used inside the lexer. This may be necessary for special situations such - as processing Unicode (e.g., if you want escapes like \w and \b to consult - the Unicode character property database). The need for this suggested by - Andreas Jung. - -03/01/06: beazley - Fixed a bug with an uninitialized variable on repeated instantiations of parser - objects when the write_tables=0 argument was used. Reported by Michael Brown. - -03/01/06: beazley - Modified lex.py to accept Unicode strings both as the regular expressions for - tokens and as input. Hopefully this is the only change needed for Unicode support. - Patch contributed by Johan Dahl. - -03/01/06: beazley - Modified the class-based interface to work with new-style or old-style classes. - Patch contributed by Michael Brown (although I tweaked it slightly so it would work - with older versions of Python). - -Version 1.6 ------------------------------- -05/27/05: beazley - Incorporated patch contributed by Christopher Stawarz to fix an extremely - devious bug in LALR(1) parser generation. This patch should fix problems - numerous people reported with LALR parsing. - -05/27/05: beazley - Fixed problem with lex.py copy constructor. Reported by Dave Aitel, Aaron Lav, - and Thad Austin. - -05/27/05: beazley - Added outputdir option to yacc() to control output directory. Contributed - by Christopher Stawarz. - -05/27/05: beazley - Added rununit.py test script to run tests using the Python unittest module. - Contributed by Miki Tebeka. - -Version 1.5 ------------------------------- -05/26/04: beazley - Major enhancement. LALR(1) parsing support is now working. - This feature was implemented by Elias Ioup (ezioup@alumni.uchicago.edu) - and optimized by David Beazley. To use LALR(1) parsing do - the following: - - yacc.yacc(method="LALR") - - Computing LALR(1) parsing tables takes about twice as long as - the default SLR method. However, LALR(1) allows you to handle - more complex grammars. For example, the ANSI C grammar - (in example/ansic) has 13 shift-reduce conflicts with SLR, but - only has 1 shift-reduce conflict with LALR(1). - -05/20/04: beazley - Added a __len__ method to parser production lists. Can - be used in parser rules like this: - - def p_somerule(p): - """a : B C D - | E F" - if (len(p) == 3): - # Must have been first rule - elif (len(p) == 2): - # Must be second rule - - Suggested by Joshua Gerth and others. - -Version 1.4 ------------------------------- -04/23/04: beazley - Incorporated a variety of patches contributed by Eric Raymond. - These include: - - 0. Cleans up some comments so they don't wrap on an 80-column display. - 1. Directs compiler errors to stderr where they belong. - 2. Implements and documents automatic line counting when \n is ignored. - 3. Changes the way progress messages are dumped when debugging is on. - The new format is both less verbose and conveys more information than - the old, including shift and reduce actions. - -04/23/04: beazley - Added a Python setup.py file to simply installation. Contributed - by Adam Kerrison. - -04/23/04: beazley - Added patches contributed by Adam Kerrison. - - - Some output is now only shown when debugging is enabled. This - means that PLY will be completely silent when not in debugging mode. - - - An optional parameter "write_tables" can be passed to yacc() to - control whether or not parsing tables are written. By default, - it is true, but it can be turned off if you don't want the yacc - table file. Note: disabling this will cause yacc() to regenerate - the parsing table each time. - -04/23/04: beazley - Added patches contributed by David McNab. This patch addes two - features: - - - The parser can be supplied as a class instead of a module. - For an example of this, see the example/classcalc directory. - - - Debugging output can be directed to a filename of the user's - choice. Use - - yacc(debugfile="somefile.out") - - -Version 1.3 ------------------------------- -12/10/02: jmdyck - Various minor adjustments to the code that Dave checked in today. - Updated test/yacc_{inf,unused}.exp to reflect today's changes. - -12/10/02: beazley - Incorporated a variety of minor bug fixes to empty production - handling and infinite recursion checking. Contributed by - Michael Dyck. - -12/10/02: beazley - Removed bogus recover() method call in yacc.restart() - -Version 1.2 ------------------------------- -11/27/02: beazley - Lexer and parser objects are now available as an attribute - of tokens and slices respectively. For example: - - def t_NUMBER(t): - r'\d+' - print t.lexer - - def p_expr_plus(t): - 'expr: expr PLUS expr' - print t.lexer - print t.parser - - This can be used for state management (if needed). - -10/31/02: beazley - Modified yacc.py to work with Python optimize mode. To make - this work, you need to use - - yacc.yacc(optimize=1) - - Furthermore, you need to first run Python in normal mode - to generate the necessary parsetab.py files. After that, - you can use python -O or python -OO. - - Note: optimized mode turns off a lot of error checking. - Only use when you are sure that your grammar is working. - Make sure parsetab.py is up to date! - -10/30/02: beazley - Added cloning of Lexer objects. For example: - - import copy - l = lex.lex() - lc = copy.copy(l) - - l.input("Some text") - lc.input("Some other text") - ... - - This might be useful if the same "lexer" is meant to - be used in different contexts---or if multiple lexers - are running concurrently. - -10/30/02: beazley - Fixed subtle bug with first set computation and empty productions. - Patch submitted by Michael Dyck. - -10/30/02: beazley - Fixed error messages to use "filename:line: message" instead - of "filename:line. message". This makes error reporting more - friendly to emacs. Patch submitted by François Pinard. - -10/30/02: beazley - Improvements to parser.out file. Terminals and nonterminals - are sorted instead of being printed in random order. - Patch submitted by François Pinard. - -10/30/02: beazley - Improvements to parser.out file output. Rules are now printed - in a way that's easier to understand. Contributed by Russ Cox. - -10/30/02: beazley - Added 'nonassoc' associativity support. This can be used - to disable the chaining of operators like a < b < c. - To use, simply specify 'nonassoc' in the precedence table - - precedence = ( - ('nonassoc', 'LESSTHAN', 'GREATERTHAN'), # Nonassociative operators - ('left', 'PLUS', 'MINUS'), - ('left', 'TIMES', 'DIVIDE'), - ('right', 'UMINUS'), # Unary minus operator - ) - - Patch contributed by Russ Cox. - -10/30/02: beazley - Modified the lexer to provide optional support for Python -O and -OO - modes. To make this work, Python *first* needs to be run in - unoptimized mode. This reads the lexing information and creates a - file "lextab.py". Then, run lex like this: - - # module foo.py - ... - ... - lex.lex(optimize=1) - - Once the lextab file has been created, subsequent calls to - lex.lex() will read data from the lextab file instead of using - introspection. In optimized mode (-O, -OO) everything should - work normally despite the loss of doc strings. - - To change the name of the file 'lextab.py' use the following: - - lex.lex(lextab="footab") - - (this creates a file footab.py) - - -Version 1.1 October 25, 2001 ------------------------------- - -10/25/01: beazley - Modified the table generator to produce much more compact data. - This should greatly reduce the size of the parsetab.py[c] file. - Caveat: the tables still need to be constructed so a little more - work is done in parsetab on import. - -10/25/01: beazley - There may be a possible bug in the cycle detector that reports errors - about infinite recursion. I'm having a little trouble tracking it - down, but if you get this problem, you can disable the cycle - detector as follows: - - yacc.yacc(check_recursion = 0) - -10/25/01: beazley - Fixed a bug in lex.py that sometimes caused illegal characters to be - reported incorrectly. Reported by Sverre Jørgensen. - -7/8/01 : beazley - Added a reference to the underlying lexer object when tokens are handled by - functions. The lexer is available as the 'lexer' attribute. This - was added to provide better lexing support for languages such as Fortran - where certain types of tokens can't be conveniently expressed as regular - expressions (and where the tokenizing function may want to perform a - little backtracking). Suggested by Pearu Peterson. - -6/20/01 : beazley - Modified yacc() function so that an optional starting symbol can be specified. - For example: - - yacc.yacc(start="statement") - - Normally yacc always treats the first production rule as the starting symbol. - However, if you are debugging your grammar it may be useful to specify - an alternative starting symbol. Idea suggested by Rich Salz. - -Version 1.0 June 18, 2001 --------------------------- -Initial public offering - diff --git a/ply/PKG-INFO b/ply/PKG-INFO deleted file mode 100644 index 0080e02..0000000 --- a/ply/PKG-INFO +++ /dev/null @@ -1,22 +0,0 @@ -Metadata-Version: 1.0 -Name: ply -Version: 3.4 -Summary: Python Lex & Yacc -Home-page: http://www.dabeaz.com/ply/ -Author: David Beazley -Author-email: dave@dabeaz.com -License: BSD -Description: - PLY is yet another implementation of lex and yacc for Python. Some notable - features include the fact that its implemented entirely in Python and it - uses LALR(1) parsing which is efficient and well suited for larger grammars. - - PLY provides most of the standard lex/yacc features including support for empty - productions, precedence rules, error recovery, and support for ambiguous grammars. - - PLY is extremely easy to use and provides very extensive error checking. - It is compatible with both Python 2 and Python 3. - -Platform: UNKNOWN -Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 2 diff --git a/ply/README b/ply/README deleted file mode 100644 index f384d1a..0000000 --- a/ply/README +++ /dev/null @@ -1,271 +0,0 @@ -PLY (Python Lex-Yacc) Version 3.4 - -Copyright (C) 2001-2011, -David M. Beazley (Dabeaz LLC) -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. -* Neither the name of the David Beazley or Dabeaz LLC may be used to - endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -Introduction -============ - -PLY is a 100% Python implementation of the common parsing tools lex -and yacc. Here are a few highlights: - - - PLY is very closely modeled after traditional lex/yacc. - If you know how to use these tools in C, you will find PLY - to be similar. - - - PLY provides *very* extensive error reporting and diagnostic - information to assist in parser construction. The original - implementation was developed for instructional purposes. As - a result, the system tries to identify the most common types - of errors made by novice users. - - - PLY provides full support for empty productions, error recovery, - precedence specifiers, and moderately ambiguous grammars. - - - Parsing is based on LR-parsing which is fast, memory efficient, - better suited to large grammars, and which has a number of nice - properties when dealing with syntax errors and other parsing problems. - Currently, PLY builds its parsing tables using the LALR(1) - algorithm used in yacc. - - - PLY uses Python introspection features to build lexers and parsers. - This greatly simplifies the task of parser construction since it reduces - the number of files and eliminates the need to run a separate lex/yacc - tool before running your program. - - - PLY can be used to build parsers for "real" programming languages. - Although it is not ultra-fast due to its Python implementation, - PLY can be used to parse grammars consisting of several hundred - rules (as might be found for a language like C). The lexer and LR - parser are also reasonably efficient when parsing typically - sized programs. People have used PLY to build parsers for - C, C++, ADA, and other real programming languages. - -How to Use -========== - -PLY consists of two files : lex.py and yacc.py. These are contained -within the 'ply' directory which may also be used as a Python package. -To use PLY, simply copy the 'ply' directory to your project and import -lex and yacc from the associated 'ply' package. For example: - - import ply.lex as lex - import ply.yacc as yacc - -Alternatively, you can copy just the files lex.py and yacc.py -individually and use them as modules. For example: - - import lex - import yacc - -The file setup.py can be used to install ply using distutils. - -The file doc/ply.html contains complete documentation on how to use -the system. - -The example directory contains several different examples including a -PLY specification for ANSI C as given in K&R 2nd Ed. - -A simple example is found at the end of this document - -Requirements -============ -PLY requires the use of Python 2.2 or greater. However, you should -use the latest Python release if possible. It should work on just -about any platform. PLY has been tested with both CPython and Jython. -It also seems to work with IronPython. - -Resources -========= -More information about PLY can be obtained on the PLY webpage at: - - http://www.dabeaz.com/ply - -For a detailed overview of parsing theory, consult the excellent -book "Compilers : Principles, Techniques, and Tools" by Aho, Sethi, and -Ullman. The topics found in "Lex & Yacc" by Levine, Mason, and Brown -may also be useful. - -A Google group for PLY can be found at - - http://groups.google.com/group/ply-hack - -Acknowledgments -=============== -A special thanks is in order for all of the students in CS326 who -suffered through about 25 different versions of these tools :-). - -The CHANGES file acknowledges those who have contributed patches. - -Elias Ioup did the first implementation of LALR(1) parsing in PLY-1.x. -Andrew Waters and Markus Schoepflin were instrumental in reporting bugs -and testing a revised LALR(1) implementation for PLY-2.0. - -Special Note for PLY-3.0 -======================== -PLY-3.0 the first PLY release to support Python 3. However, backwards -compatibility with Python 2.2 is still preserved. PLY provides dual -Python 2/3 compatibility by restricting its implementation to a common -subset of basic language features. You should not convert PLY using -2to3--it is not necessary and may in fact break the implementation. - -Example -======= - -Here is a simple example showing a PLY implementation of a calculator -with variables. - -# ----------------------------------------------------------------------------- -# calc.py -# -# A simple calculator with variables. -# ----------------------------------------------------------------------------- - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - t.value = int(t.value) - return t - -# Ignored characters -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -import ply.lex as lex -lex.lex() - -# Precedence rules for the arithmetic operators -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names (for storing variables) -names = { } - -def p_statement_assign(p): - 'statement : NAME EQUALS expression' - names[p[1]] = p[3] - -def p_statement_expr(p): - 'statement : expression' - print(p[1]) - -def p_expression_binop(p): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] - -def p_expression_uminus(p): - 'expression : MINUS expression %prec UMINUS' - p[0] = -p[2] - -def p_expression_group(p): - 'expression : LPAREN expression RPAREN' - p[0] = p[2] - -def p_expression_number(p): - 'expression : NUMBER' - p[0] = p[1] - -def p_expression_name(p): - 'expression : NAME' - try: - p[0] = names[p[1]] - except LookupError: - print("Undefined name '%s'" % p[1]) - p[0] = 0 - -def p_error(p): - print("Syntax error at '%s'" % p.value) - -import ply.yacc as yacc -yacc.yacc() - -while 1: - try: - s = raw_input('calc > ') # use input() on Python 3 - except EOFError: - break - yacc.parse(s) - - -Bug Reports and Patches -======================= -My goal with PLY is to simply have a decent lex/yacc implementation -for Python. As a general rule, I don't spend huge amounts of time -working on it unless I receive very specific bug reports and/or -patches to fix problems. I also try to incorporate submitted feature -requests and enhancements into each new version. To contact me about -bugs and/or new features, please send email to dave@dabeaz.com. - -In addition there is a Google group for discussing PLY related issues at - - http://groups.google.com/group/ply-hack - --- Dave - - - - - - - - - diff --git a/ply/TODO b/ply/TODO deleted file mode 100644 index f4800aa..0000000 --- a/ply/TODO +++ /dev/null @@ -1,16 +0,0 @@ -The PLY to-do list: - -1. Finish writing the C Preprocessor module. Started in the - file ply/cpp.py - -2. Create and document libraries of useful tokens. - -3. Expand the examples/yply tool that parses bison/yacc - files. - -4. Think of various diabolical things to do with the - new yacc internals. For example, it is now possible - to specify grammrs using completely different schemes - than the reflection approach used by PLY. - - diff --git a/ply/doc/internal.html b/ply/doc/internal.html deleted file mode 100644 index 3fabfe2..0000000 --- a/ply/doc/internal.html +++ /dev/null @@ -1,874 +0,0 @@ - - -PLY Internals - - - -PLY Internals
- - -David M. Beazley
-dave@dabeaz.com
- - --PLY Version: 3.0 -
- - -
- -- - - -1. Introduction
- - -This document describes classes and functions that make up the internal -operation of PLY. Using this programming interface, it is possible to -manually build an parser using a different interface specification -than what PLY normally uses. For example, you could build a gramar -from information parsed in a completely different input format. Some of -these objects may be useful for building more advanced parsing engines -such as GLR. - --It should be stressed that using PLY at this level is not for the -faint of heart. Generally, it's assumed that you know a bit of -the underlying compiler theory and how an LR parser is put together. - -
2. Grammar Class
- - -The file ply.yacc defines a class Grammar that -is used to hold and manipulate information about a grammar -specification. It encapsulates the same basic information -about a grammar that is put into a YACC file including -the list of tokens, precedence rules, and grammar rules. -Various operations are provided to perform different validations -on the grammar. In addition, there are operations to compute -the first and follow sets that are needed by the various table -generation algorithms. - --Grammar(terminals) - -
-Creates a new grammar object. terminals is a list of strings -specifying the terminals for the grammar. An instance g of -Grammar has the following methods: -- --g.set_precedence(term,assoc,level) -
-Sets the precedence level and associativity for a given terminal term. -assoc is one of 'right', -'left', or 'nonassoc' and level is a positive integer. The higher -the value of level, the higher the precedence. Here is an example of typical -precedence settings: - ---g.set_precedence('PLUS', 'left',1) -g.set_precedence('MINUS', 'left',1) -g.set_precedence('TIMES', 'left',2) -g.set_precedence('DIVIDE','left',2) -g.set_precedence('UMINUS','left',3) -- -This method must be called prior to adding any productions to the -grammar with g.add_production(). The precedence of individual grammar -rules is determined by the precedence of the right-most terminal. - --g.add_production(name,syms,func=None,file='',line=0) -
-Adds a new grammar rule. name is the name of the rule, -syms is a list of symbols making up the right hand -side of the rule, func is the function to call when -reducing the rule. file and line specify -the filename and line number of the rule and are used for -generating error messages. - -- --The list of symbols in syms may include character -literals and %prec specifiers. Here are some -examples: - -
-g.add_production('expr',['expr','PLUS','term'],func,file,line) -g.add_production('expr',['expr','"+"','term'],func,file,line) -g.add_production('expr',['MINUS','expr','%prec','UMINUS'],func,file,line) -- --If any kind of error is detected, a GrammarError exception -is raised with a message indicating the reason for the failure. -
-g.set_start(start=None) -
-Sets the starting rule for the grammar. start is a string -specifying the name of the start rule. If start is omitted, -the first grammar rule added with add_production() is taken to be -the starting rule. This method must always be called after all -productions have been added. -- --g.find_unreachable() -
-Diagnostic function. Returns a list of all unreachable non-terminals -defined in the grammar. This is used to identify inactive parts of -the grammar specification. -- --g.infinite_cycle() -
-Diagnostic function. Returns a list of all non-terminals in the -grammar that result in an infinite cycle. This condition occurs if -there is no way for a grammar rule to expand to a string containing -only terminal symbols. -- --g.undefined_symbols() -
-Diagnostic function. Returns a list of tuples (name, prod) -corresponding to undefined symbols in the grammar. name is the -name of the undefined symbol and prod is an instance of -Production which has information about the production rule -where the undefined symbol was used. -- --g.unused_terminals() -
-Diagnostic function. Returns a list of terminals that were defined, -but never used in the grammar. -- --g.unused_rules() -
-Diagnostic function. Returns a list of Production instances -corresponding to production rules that were defined in the grammar, -but never used anywhere. This is slightly different -than find_unreachable(). -- --g.unused_precedence() -
-Diagnostic function. Returns a list of tuples (term, assoc) -corresponding to precedence rules that were set, but never used the -grammar. term is the terminal name and assoc is the -precedence associativity (e.g., 'left', 'right', -or 'nonassoc'. -- --g.compute_first() -
-Compute all of the first sets for all symbols in the grammar. Returns a dictionary -mapping symbol names to a list of all first symbols. -- --g.compute_follow() -
-Compute all of the follow sets for all non-terminals in the grammar. -The follow set is the set of all possible symbols that might follow a -given non-terminal. Returns a dictionary mapping non-terminal names -to a list of symbols. -- --g.build_lritems() -
-Calculates all of the LR items for all productions in the grammar. This -step is required before using the grammar for any kind of table generation. -See the section on LR items below. -- --The following attributes are set by the above methods and may be useful -in code that works with the grammar. All of these attributes should be -assumed to be read-only. Changing their values directly will likely -break the grammar. - -
-g.Productions -
-A list of all productions added. The first entry is reserved for -a production representing the starting rule. The objects in this list -are instances of the Production class, described shortly. -- --g.Prodnames -
-A dictionary mapping the names of nonterminals to a list of all -productions of that nonterminal. -- --g.Terminals -
-A dictionary mapping the names of terminals to a list of the -production numbers where they are used. -- --g.Nonterminals -
-A dictionary mapping the names of nonterminals to a list of the -production numbers where they are used. -- --g.First -
-A dictionary representing the first sets for all grammar symbols. This is -computed and returned by the compute_first() method. -- --g.Follow -
-A dictionary representing the follow sets for all grammar rules. This is -computed and returned by the compute_follow() method. -- --g.Start -
-Starting symbol for the grammar. Set by the set_start() method. -- -For the purposes of debugging, a Grammar object supports the __len__() and -__getitem__() special methods. Accessing g[n] returns the nth production -from the grammar. - - -3. Productions
- - -Grammar objects store grammar rules as instances of a Production class. This -class has no public constructor--you should only create productions by calling Grammar.add_production(). -The following attributes are available on a Production instance p. - --p.name -
-The name of the production. For a grammar rule such as A : B C D, this is 'A'. -- --p.prod -
-A tuple of symbols making up the right-hand side of the production. For a grammar rule such as A : B C D, this is ('B','C','D'). -- --p.number -
-Production number. An integer containing the index of the production in the grammar's Productions list. -- --p.func -
-The name of the reduction function associated with the production. -This is the function that will execute when reducing the entire -grammar rule during parsing. -- --p.callable -
-The callable object associated with the name in p.func. This is None -unless the production has been bound using bind(). -- --p.file -
-Filename associated with the production. Typically this is the file where the production was defined. Used for error messages. -- --p.lineno -
-Line number associated with the production. Typically this is the line number in p.file where the production was defined. Used for error messages. -- --p.prec -
-Precedence and associativity associated with the production. This is a tuple (assoc,level) where -assoc is one of 'left','right', or 'nonassoc' and level is -an integer. This value is determined by the precedence of the right-most terminal symbol in the production -or by use of the %prec specifier when adding the production. -- --p.usyms -
-A list of all unique symbols found in the production. -- --p.lr_items -
-A list of all LR items for this production. This attribute only has a meaningful value if the -Grammar.build_lritems() method has been called. The items in this list are -instances of LRItem described below. -- --p.lr_next -
-The head of a linked-list representation of the LR items in p.lr_items. -This attribute only has a meaningful value if the Grammar.build_lritems() -method has been called. Each LRItem instance has a lr_next attribute -to move to the next item. The list is terminated by None. -- --p.bind(dict) -
-Binds the production function name in p.func to a callable object in -dict. This operation is typically carried out in the last step -prior to running the parsing engine and is needed since parsing tables are typically -read from files which only include the function names, not the functions themselves. -- --Production objects support -the __len__(), __getitem__(), and __str__() -special methods. -len(p) returns the number of symbols in p.prod -and p[n] is the same as p.prod[n]. - -
4. LRItems
- - -The construction of parsing tables in an LR-based parser generator is primarily -done over a set of "LR Items". An LR item represents a stage of parsing one -of the grammar rules. To compute the LR items, it is first necessary to -call Grammar.build_lritems(). Once this step, all of the productions -in the grammar will have their LR items attached to them. - --Here is an interactive example that shows what LR items look like if you -interactively experiment. In this example, g is a Grammar -object. - -
-- -In the above code, p represents the first grammar rule. In -this case, a rule 'statement -> ID = expr'. - -->>> g.build_lritems() ->>> p = g[1] ->>> p -Production(statement -> ID = expr) ->>> ---Now, let's look at the LR items for p. - -
-- -In each LR item, the dot (.) represents a specific stage of parsing. In each LR item, the dot -is advanced by one symbol. It is only when the dot reaches the very end that a production -is successfully parsed. - -->>> p.lr_items -[LRItem(statement -> . ID = expr), - LRItem(statement -> ID . = expr), - LRItem(statement -> ID = . expr), - LRItem(statement -> ID = expr .)] ->>> ---An instance lr of LRItem has the following -attributes that hold information related to that specific stage of -parsing. - -
-lr.name -
-The name of the grammar rule. For example, 'statement' in the above example. -- --lr.prod -
-A tuple of symbols representing the right-hand side of the production, including the -special '.' character. For example, ('ID','.','=','expr'). -- --lr.number -
-An integer representing the production number in the grammar. -- --lr.usyms -
-A set of unique symbols in the production. Inherited from the original Production instance. -- --lr.lr_index -
-An integer representing the position of the dot (.). You should never use lr.prod.index() -to search for it--the result will be wrong if the grammar happens to also use (.) as a character -literal. -- --lr.lr_after -
-A list of all productions that can legally appear immediately to the right of the -dot (.). This list contains Production instances. This attribute -represents all of the possible branches a parse can take from the current position. -For example, suppose that lr represents a stage immediately before -an expression like this: - -- -->>> lr -LRItem(statement -> ID = . expr) ->>> -- -Then, the value of lr.lr_after might look like this, showing all productions that -can legally appear next: - -->>> lr.lr_after -[Production(expr -> expr PLUS expr), - Production(expr -> expr MINUS expr), - Production(expr -> expr TIMES expr), - Production(expr -> expr DIVIDE expr), - Production(expr -> MINUS expr), - Production(expr -> LPAREN expr RPAREN), - Production(expr -> NUMBER), - Production(expr -> ID)] ->>> -- --lr.lr_before -
-The grammar symbol that appears immediately before the dot (.) or None if -at the beginning of the parse. -- --lr.lr_next -
-A link to the next LR item, representing the next stage of the parse. None if lr -is the last LR item. -- -LRItem instances also support the __len__() and __getitem__() special methods. -len(lr) returns the number of items in lr.prod including the dot (.). lr[n] -returns lr.prod[n]. - --It goes without saying that all of the attributes associated with LR -items should be assumed to be read-only. Modifications will very -likely create a small black-hole that will consume you and your code. - -
5. LRTable
- - -The LRTable class is used to represent LR parsing table data. This -minimally includes the production list, action table, and goto table. - --LRTable() -
-Create an empty LRTable object. This object contains only the information needed to -run an LR parser. -- -An instance lrtab of LRTable has the following methods: - --lrtab.read_table(module) -
-Populates the LR table with information from the module specified in module. -module is either a module object already loaded with import or -the name of a Python module. If it's a string containing a module name, it is -loaded and parsing data is extracted. Returns the signature value that was used -when initially writing the tables. Raises a VersionError exception if -the module was created using an incompatible version of PLY. -- --lrtab.bind_callables(dict) -
-This binds all of the function names used in productions to callable objects -found in the dictionary dict. During table generation and when reading -LR tables from files, PLY only uses the names of action functions such as 'p_expr', -'p_statement', etc. In order to actually run the parser, these names -have to be bound to callable objects. This method is always called prior to -running a parser. -- -After lrtab has been populated, the following attributes are defined. - --lrtab.lr_method -
-The LR parsing method used (e.g., 'LALR') -- - --lrtab.lr_productions -
-The production list. If the parsing tables have been newly -constructed, this will be a list of Production instances. If -the parsing tables have been read from a file, it's a list -of MiniProduction instances. This, together -with lr_action and lr_goto contain all of the -information needed by the LR parsing engine. -- --lrtab.lr_action -
-The LR action dictionary that implements the underlying state machine. -The keys of this dictionary are the LR states. -- --lrtab.lr_goto -
-The LR goto table that contains information about grammar rule reductions. -- - -6. LRGeneratedTable
- - -The LRGeneratedTable class represents constructed LR parsing tables on a -grammar. It is a subclass of LRTable. - --LRGeneratedTable(grammar, method='LALR',log=None) -
-Create the LR parsing tables on a grammar. grammar is an instance of Grammar, -method is a string with the parsing method ('SLR' or 'LALR'), and -log is a logger object used to write debugging information. The debugging information -written to log is the same as what appears in the parser.out file created -by yacc. By supplying a custom logger with a different message format, it is possible to get -more information (e.g., the line number in yacc.py used for issuing each line of -output in the log). The result is an instance of LRGeneratedTable. -- --An instance lr of LRGeneratedTable has the following attributes. - -
-lr.grammar -
-A link to the Grammar object used to construct the parsing tables. -- --lr.lr_method -
-The LR parsing method used (e.g., 'LALR') -- - --lr.lr_productions -
-A reference to grammar.Productions. This, together with lr_action and lr_goto -contain all of the information needed by the LR parsing engine. -- --lr.lr_action -
-The LR action dictionary that implements the underlying state machine. The keys of this dictionary are -the LR states. -- --lr.lr_goto -
-The LR goto table that contains information about grammar rule reductions. -- --lr.sr_conflicts -
-A list of tuples (state,token,resolution) identifying all shift/reduce conflicts. state is the LR state -number where the conflict occurred, token is the token causing the conflict, and resolution is -a string describing the resolution taken. resolution is either 'shift' or 'reduce'. -- --lr.rr_conflicts -
-A list of tuples (state,rule,rejected) identifying all reduce/reduce conflicts. state is the -LR state number where the conflict occurred, rule is the production rule that was selected -and rejected is the production rule that was rejected. Both rule and rejected are -instances of Production. They can be inspected to provide the user with more information. -- --There are two public methods of LRGeneratedTable. - -
-lr.write_table(modulename,outputdir="",signature="") -
-Writes the LR parsing table information to a Python module. modulename is a string -specifying the name of a module such as "parsetab". outputdir is the name of a -directory where the module should be created. signature is a string representing a -grammar signature that's written into the output file. This can be used to detect when -the data stored in a module file is out-of-sync with the the grammar specification (and that -the tables need to be regenerated). If modulename is a string "parsetab", -this function creates a file called parsetab.py. If the module name represents a -package such as "foo.bar.parsetab", then only the last component, "parsetab" is -used. -- - -7. LRParser
- - -The LRParser class implements the low-level LR parsing engine. - - --LRParser(lrtab, error_func) -
-Create an LRParser. lrtab is an instance of LRTable -containing the LR production and state tables. error_func is the -error function to invoke in the event of a parsing error. -- -An instance p of LRParser has the following methods: - --p.parse(input=None,lexer=None,debug=0,tracking=0,tokenfunc=None) -
-Run the parser. input is a string, which if supplied is fed into the -lexer using its input() method. lexer is an instance of the -Lexer class to use for tokenizing. If not supplied, the last lexer -created with the lex module is used. debug is a boolean flag -that enables debugging. tracking is a boolean flag that tells the -parser to perform additional line number tracking. tokenfunc is a callable -function that returns the next token. If supplied, the parser will use it to get -all tokens. -- --p.restart() -
-Resets the parser state for a parse already in progress. -- -8. ParserReflect
- - --The ParserReflect class is used to collect parser specification data -from a Python module or object. This class is what collects all of the -p_rule() functions in a PLY file, performs basic error checking, -and collects all of the needed information to build a grammar. Most of the -high-level PLY interface as used by the yacc() function is actually -implemented by this class. - -
-ParserReflect(pdict, log=None) -
-Creates a ParserReflect instance. pdict is a dictionary -containing parser specification data. This dictionary typically corresponds -to the module or class dictionary of code that implements a PLY parser. -log is a logger instance that will be used to report error -messages. -- -An instance p of ParserReflect has the following methods: - --p.get_all() -
-Collect and store all required parsing information. -- --p.validate_all() -
-Validate all of the collected parsing information. This is a seprate step -from p.get_all() as a performance optimization. In order to -increase parser start-up time, a parser can elect to only validate the -parsing data when regenerating the parsing tables. The validation -step tries to collect as much information as possible rather than -raising an exception at the first sign of trouble. The attribute -p.error is set if there are any validation errors. The -value of this attribute is also returned. -- --p.signature() -
-Compute a signature representing the contents of the collected parsing -data. The signature value should change if anything in the parser -specification has changed in a way that would justify parser table -regeneration. This method can be called after p.get_all(), -but before p.validate_all(). -- -The following attributes are set in the process of collecting data: - --p.start -
-The grammar start symbol, if any. Taken from pdict['start']. -- --p.error_func -
-The error handling function or None. Taken from pdict['p_error']. -- --p.tokens -
-The token list. Taken from pdict['tokens']. -- --p.prec -
-The precedence specifier. Taken from pdict['precedence']. -- --p.preclist -
-A parsed version of the precedence specified. A list of tuples of the form -(token,assoc,level) where token is the terminal symbol, -assoc is the associativity (e.g., 'left') and level -is a numeric precedence level. -- --p.grammar -
-A list of tuples (name, rules) representing the grammar rules. name is the -name of a Python function or method in pdict that starts with "p_". -rules is a list of tuples (filename,line,prodname,syms) representing -the grammar rules found in the documentation string of that function. filename and line contain location -information that can be used for debugging. prodname is the name of the -production. syms is the right-hand side of the production. If you have a -function like this - -- --def p_expr(p): - '''expr : expr PLUS expr - | expr MINUS expr - | expr TIMES expr - | expr DIVIDE expr''' -- -then the corresponding entry in p.grammar might look like this: - --('p_expr', [ ('calc.py',10,'expr', ['expr','PLUS','expr']), - ('calc.py',11,'expr', ['expr','MINUS','expr']), - ('calc.py',12,'expr', ['expr','TIMES','expr']), - ('calc.py',13,'expr', ['expr','DIVIDE','expr']) - ]) ---p.pfuncs -
-A sorted list of tuples (line, file, name, doc) representing all of -the p_ functions found. line and file give location -information. name is the name of the function. doc is the -documentation string. This list is sorted in ascending order by line number. -- --p.files -
-A dictionary holding all of the source filenames that were encountered -while collecting parser information. Only the keys of this dictionary have -any meaning. -- --p.error -
-An attribute that indicates whether or not any critical errors -occurred in validation. If this is set, it means that that some kind -of problem was detected and that no further processing should be -performed. -- - -9. High-level operation
- - -Using all of the above classes requires some attention to detail. The yacc() -function carries out a very specific sequence of operations to create a grammar. -This same sequence should be emulated if you build an alternative PLY interface. - --
- - - - - - - - - - diff --git a/ply/doc/makedoc.py b/ply/doc/makedoc.py deleted file mode 100644 index 415a53a..0000000 --- a/ply/doc/makedoc.py +++ /dev/null @@ -1,194 +0,0 @@ -#!/usr/local/bin/python - -############################################################################### -# Takes a chapter as input and adds internal links and numbering to all -# of the H1, H2, H3, H4 and H5 sections. -# -# Every heading HTML tag (H1, H2 etc) is given an autogenerated name to link -# to. However, if the name is not an autogenerated name from a previous run, -# it will be kept. If it is autogenerated, it might change on subsequent runs -# of this program. Thus if you want to create links to one of the headings, -# then change the heading link name to something that does not look like an -# autogenerated link name. -############################################################################### - -import sys -import re -import string - -############################################################################### -# Functions -############################################################################### - -# Regexs for -alink = re.compile(r"", re.IGNORECASE) -heading = re.compile(r"(_nn\d)", re.IGNORECASE) - -def getheadingname(m): - autogeneratedheading = True; - if m.group(1) != None: - amatch = alink.match(m.group(1)) - if amatch: - # A non-autogenerated heading - keep it - headingname = amatch.group(1) - autogeneratedheading = heading.match(headingname) - if autogeneratedheading: - # The heading name was either non-existent or autogenerated, - # We can create a new heading / change the existing heading - headingname = "%s_nn%d" % (filenamebase, nameindex) - return headingname - -############################################################################### -# Main program -############################################################################### - -if len(sys.argv) != 2: - print "usage: makedoc.py filename" - sys.exit(1) - -filename = sys.argv[1] -filenamebase = string.split(filename,".")[0] - -section = 0 -subsection = 0 -subsubsection = 0 -subsubsubsection = 0 -nameindex = 0 - -name = "" - -# Regexs for- A ParserReflect object is created and raw grammar specification data is -collected. -
- A Grammar object is created and populated with information -from the specification data. -
- A LRGenerator object is created to run the LALR algorithm over -the Grammar object. -
- Productions in the LRGenerator and bound to callables using the bind_callables() -method. -
- A LRParser object is created from from the information in the -LRGenerator object. -
,...
sections - -h1 = re.compile(r".*?
(
", re.IGNORECASE) -h2 = re.compile(r".*?)*[\d\.\s]*(.*?) (
", re.IGNORECASE) -h3 = re.compile(r".*?)*[\d\.\s]*(.*?) (
", re.IGNORECASE) -h4 = re.compile(r".*?)*[\d\.\s]*(.*?) (
", re.IGNORECASE) -h5 = re.compile(r".*?)*[\d\.\s]*(.*?) (
", re.IGNORECASE) - -data = open(filename).read() # Read data -open(filename+".bak","w").write(data) # Make backup - -lines = data.splitlines() -result = [ ] # This is the result of postprocessing the file -index = "\n)*[\d\.\s]*(.*?) \n" # index contains the index for adding at the top of the file. Also printed to stdout. - -skip = 0 -skipspace = 0 - -for s in lines: - if s == "": - if not skip: - result.append("@INDEX@") - skip = 1 - else: - skip = 0 - continue; - if skip: - continue - - if not s and skipspace: - continue - - if skipspace: - result.append("") - result.append("") - skipspace = 0 - - m = h2.match(s) - if m: - prevheadingtext = m.group(2) - nameindex += 1 - section += 1 - headingname = getheadingname(m) - result.append("""\n\n" - -data = "\n".join(result) - -data = data.replace("@INDEX@",index) + "\n"; - -# Write the file back out -open(filename,"w").write(data) - - diff --git a/ply/doc/ply.html b/ply/doc/ply.html deleted file mode 100644 index fdcd88a..0000000 --- a/ply/doc/ply.html +++ /dev/null @@ -1,3262 +0,0 @@ - - -%d. %s
""" % (headingname,section, prevheadingtext)) - - if subsubsubsection: - index += "\n" - if subsubsection: - index += "\n" - if subsection: - index += "\n" - if section == 1: - index += "\n" - - index += """
\n" - if subsubsection: - index += "\n" - if subsection == 1: - index += "- %s\n""" % (headingname,prevheadingtext) - subsection = 0 - subsubsection = 0 - subsubsubsection = 0 - skipspace = 1 - continue - m = h3.match(s) - if m: - prevheadingtext = m.group(2) - nameindex += 1 - subsection += 1 - headingname = getheadingname(m) - result.append("""
%d.%d %s
""" % (headingname,section, subsection, prevheadingtext)) - - if subsubsubsection: - index += "\n" - - index += """
\n" - if subsubsection == 1: - index += "- %s\n""" % (headingname,prevheadingtext) - subsubsection = 0 - skipspace = 1 - continue - m = h4.match(s) - if m: - prevheadingtext = m.group(2) - nameindex += 1 - subsubsection += 1 - subsubsubsection = 0 - headingname = getheadingname(m) - result.append("""
%d.%d.%d %s
""" % (headingname,section, subsection, subsubsection, prevheadingtext)) - - if subsubsubsection: - index += "\n" - - index += """
\n" - -if subsection: - index += "\n" - -if section: - index += "\n" - -index += "- %s\n""" % (headingname,prevheadingtext) - skipspace = 1 - continue - m = h5.match(s) - if m: - prevheadingtext = m.group(2) - nameindex += 1 - subsubsubsection += 1 - headingname = getheadingname(m) - result.append("""
%d.%d.%d.%d %s
""" % (headingname,section, subsection, subsubsection, subsubsubsection, prevheadingtext)) - - if subsubsubsection == 1: - index += "\n" - - index += """
\n" - -if subsubsection: - index += "- %s\n""" % (headingname,prevheadingtext) - skipspace = 1 - continue - - result.append(s) - -if subsubsubsection: - index += "
PLY (Python Lex-Yacc) - - - -PLY (Python Lex-Yacc)
- - -David M. Beazley
-dave@dabeaz.com
- - --PLY Version: 3.4 -
- - -
-- - - - --
-- Preface and Requirements -
- Introduction -
- PLY Overview -
- Lex -
-
-- Lex Example -
- The tokens list -
- Specification of tokens -
- Token values -
- Discarded tokens -
- Line numbers and positional information -
- Ignored characters -
- Literal characters -
- Error handling -
- Building and using the lexer -
- The @TOKEN decorator -
- Optimized mode -
- Debugging -
- Alternative specification of lexers -
- Maintaining state -
- Lexer cloning -
- Internal lexer state -
- Conditional lexing and start conditions -
- Miscellaneous Issues -
- Parsing basics -
- Yacc - -
- Multiple Parsers and Lexers -
- Using Python's Optimized Mode -
- Advanced Debugging - -
- Where to go from here? -
1. Preface and Requirements
- - --This document provides an overview of lexing and parsing with PLY. -Given the intrinsic complexity of parsing, I would strongly advise -that you read (or at least skim) this entire document before jumping -into a big development project with PLY. -
- --PLY-3.0 is compatible with both Python 2 and Python 3. Be aware that -Python 3 support is new and has not been extensively tested (although -all of the examples and unit tests pass under Python 3.0). If you are -using Python 2, you should try to use Python 2.4 or newer. Although PLY -works with versions as far back as Python 2.2, some of its optional features -require more modern library modules. -
- -2. Introduction
- - -PLY is a pure-Python implementation of the popular compiler -construction tools lex and yacc. The main goal of PLY is to stay -fairly faithful to the way in which traditional lex/yacc tools work. -This includes supporting LALR(1) parsing as well as providing -extensive input validation, error reporting, and diagnostics. Thus, -if you've used yacc in another programming language, it should be -relatively straightforward to use PLY. - --Early versions of PLY were developed to support an Introduction to -Compilers Course I taught in 2001 at the University of Chicago. In this course, -students built a fully functional compiler for a simple Pascal-like -language. Their compiler, implemented entirely in Python, had to -include lexical analysis, parsing, type checking, type inference, -nested scoping, and code generation for the SPARC processor. -Approximately 30 different compiler implementations were completed in -this course. Most of PLY's interface and operation has been influenced by common -usability problems encountered by students. Since 2001, PLY has -continued to be improved as feedback has been received from users. -PLY-3.0 represents a major refactoring of the original implementation -with an eye towards future enhancements. - -
-Since PLY was primarily developed as an instructional tool, you will -find it to be fairly picky about token and grammar rule -specification. In part, this -added formality is meant to catch common programming mistakes made by -novice users. However, advanced users will also find such features to -be useful when building complicated grammars for real programming -languages. It should also be noted that PLY does not provide much in -the way of bells and whistles (e.g., automatic construction of -abstract syntax trees, tree traversal, etc.). Nor would I consider it -to be a parsing framework. Instead, you will find a bare-bones, yet -fully capable lex/yacc implementation written entirely in Python. - -
-The rest of this document assumes that you are somewhat familar with -parsing theory, syntax directed translation, and the use of compiler -construction tools such as lex and yacc in other programming -languages. If you are unfamilar with these topics, you will probably -want to consult an introductory text such as "Compilers: Principles, -Techniques, and Tools", by Aho, Sethi, and Ullman. O'Reilly's "Lex -and Yacc" by John Levine may also be handy. In fact, the O'Reilly book can be -used as a reference for PLY as the concepts are virtually identical. - -
3. PLY Overview
- - -PLY consists of two separate modules; lex.py and -yacc.py, both of which are found in a Python package -called ply. The lex.py module is used to break input text into a -collection of tokens specified by a collection of regular expression -rules. yacc.py is used to recognize language syntax that has -been specified in the form of a context free grammar. yacc.py uses LR parsing and generates its parsing tables -using either the LALR(1) (the default) or SLR table generation algorithms. - --The two tools are meant to work together. Specifically, -lex.py provides an external interface in the form of a -token() function that returns the next valid token on the -input stream. yacc.py calls this repeatedly to retrieve -tokens and invoke grammar rules. The output of yacc.py is -often an Abstract Syntax Tree (AST). However, this is entirely up to -the user. If desired, yacc.py can also be used to implement -simple one-pass compilers. - -
-Like its Unix counterpart, yacc.py provides most of the -features you expect including extensive error checking, grammar -validation, support for empty productions, error tokens, and ambiguity -resolution via precedence rules. In fact, everything that is possible in traditional yacc -should be supported in PLY. - -
-The primary difference between -yacc.py and Unix yacc is that yacc.py -doesn't involve a separate code-generation process. -Instead, PLY relies on reflection (introspection) -to build its lexers and parsers. Unlike traditional lex/yacc which -require a special input file that is converted into a separate source -file, the specifications given to PLY are valid Python -programs. This means that there are no extra source files nor is -there a special compiler construction step (e.g., running yacc to -generate Python code for the compiler). Since the generation of the -parsing tables is relatively expensive, PLY caches the results and -saves them to a file. If no changes are detected in the input source, -the tables are read from the cache. Otherwise, they are regenerated. - -
4. Lex
- - -lex.py is used to tokenize an input string. For example, suppose -you're writing a programming language and a user supplied the following input string: - --- -A tokenizer splits the string into individual tokens - --x = 3 + 42 * (s - t) ---- -Tokens are usually given names to indicate what they are. For example: - --'x','=', '3', '+', '42', '*', '(', 's', '-', 't', ')' ---- -More specifically, the input is broken into pairs of token types and values. For example: - --'ID','EQUALS','NUMBER','PLUS','NUMBER','TIMES', -'LPAREN','ID','MINUS','ID','RPAREN' ---- -The identification of tokens is typically done by writing a series of regular expression -rules. The next section shows how this is done using lex.py. - --('ID','x'), ('EQUALS','='), ('NUMBER','3'), -('PLUS','+'), ('NUMBER','42), ('TIMES','*'), -('LPAREN','('), ('ID','s'), ('MINUS','-'), -('ID','t'), ('RPAREN',')' --4.1 Lex Example
- - -The following example shows how lex.py is used to write a simple tokenizer. - ---To use the lexer, you first need to feed it some input text using -its input() method. After that, repeated calls -to token() produce tokens. The following code shows how this -works: - --# ------------------------------------------------------------ -# calclex.py -# -# tokenizer for a simple expression evaluator for -# numbers and +,-,*,/ -# ------------------------------------------------------------ -import ply.lex as lex - -# List of token names. This is always required -tokens = ( - 'NUMBER', - 'PLUS', - 'MINUS', - 'TIMES', - 'DIVIDE', - 'LPAREN', - 'RPAREN', -) - -# Regular expression rules for simple tokens -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_LPAREN = r'\(' -t_RPAREN = r'\)' - -# A regular expression rule with some action code -def t_NUMBER(t): - r'\d+' - t.value = int(t.value) - return t - -# Define a rule so we can track line numbers -def t_newline(t): - r'\n+' - t.lexer.lineno += len(t.value) - -# A string containing ignored characters (spaces and tabs) -t_ignore = ' \t' - -# Error handling rule -def t_error(t): - print "Illegal character '%s'" % t.value[0] - t.lexer.skip(1) - -# Build the lexer -lexer = lex.lex() - ---- -When executed, the example will produce the following output: - -- -# Test it out -data = ''' -3 + 4 * 10 - + -20 *2 -''' - -# Give the lexer some input -lexer.input(data) - -# Tokenize -while True: - tok = lexer.token() - if not tok: break # No more input - print tok ---- -Lexers also support the iteration protocol. So, you can write the above loop as follows: - --$ python example.py -LexToken(NUMBER,3,2,1) -LexToken(PLUS,'+',2,3) -LexToken(NUMBER,4,2,5) -LexToken(TIMES,'*',2,7) -LexToken(NUMBER,10,2,10) -LexToken(PLUS,'+',3,14) -LexToken(MINUS,'-',3,16) -LexToken(NUMBER,20,3,18) -LexToken(TIMES,'*',3,20) -LexToken(NUMBER,2,3,21) ---- -The tokens returned by lexer.token() are instances -of LexToken. This object has -attributes tok.type, tok.value, -tok.lineno, and tok.lexpos. The following code shows an example of -accessing these attributes: - --for tok in lexer: - print tok ---- -The tok.type and tok.value attributes contain the -type and value of the token itself. -tok.line and tok.lexpos contain information about -the location of the token. tok.lexpos is the index of the -token relative to the start of the input text. - --# Tokenize -while True: - tok = lexer.token() - if not tok: break # No more input - print tok.type, tok.value, tok.line, tok.lexpos --4.2 The tokens list
- - -All lexers must provide a list tokens that defines all of the possible token -names that can be produced by the lexer. This list is always required -and is used to perform a variety of validation checks. The tokens list is also used by the -yacc.py module to identify terminals. - --In the example, the following code specified the token names: - -
-- --tokens = ( - 'NUMBER', - 'PLUS', - 'MINUS', - 'TIMES', - 'DIVIDE', - 'LPAREN', - 'RPAREN', -) --4.3 Specification of tokens
- - -Each token is specified by writing a regular expression rule. Each of these rules are -are defined by making declarations with a special prefix t_ to indicate that it -defines a token. For simple tokens, the regular expression can -be specified as strings such as this (note: Python raw strings are used since they are the -most convenient way to write regular expression strings): - --- -In this case, the name following the t_ must exactly match one of the -names supplied in tokens. If some kind of action needs to be performed, -a token rule can be specified as a function. For example, this rule matches numbers and -converts the string into a Python integer. - --t_PLUS = r'\+' ---- -When a function is used, the regular expression rule is specified in the function documentation string. -The function always takes a single argument which is an instance of -LexToken. This object has attributes of t.type which is the token type (as a string), -t.value which is the lexeme (the actual text matched), t.lineno which is the current line number, and t.lexpos which -is the position of the token relative to the beginning of the input text. -By default, t.type is set to the name following the t_ prefix. The action -function can modify the contents of the LexToken object as appropriate. However, -when it is done, the resulting token should be returned. If no value is returned by the action -function, the token is simply discarded and the next token read. - --def t_NUMBER(t): - r'\d+' - t.value = int(t.value) - return t ---Internally, lex.py uses the re module to do its patten matching. When building the master regular expression, -rules are added in the following order: -
-
-
-- All tokens defined by functions are added in the same order as they appear in the lexer file. -
- Tokens defined by strings are added next by sorting them in order of decreasing regular expression length (longer expressions -are added first). -
-Without this ordering, it can be difficult to correctly match certain types of tokens. For example, if you -wanted to have separate tokens for "=" and "==", you need to make sure that "==" is checked first. By sorting regular -expressions in order of decreasing length, this problem is solved for rules defined as strings. For functions, -the order can be explicitly controlled since rules appearing first are checked first. - -
-To handle reserved words, you should write a single rule to match an -identifier and do a special name lookup in a function like this: - -
-- -This approach greatly reduces the number of regular expression rules and is likely to make things a little faster. - --reserved = { - 'if' : 'IF', - 'then' : 'THEN', - 'else' : 'ELSE', - 'while' : 'WHILE', - ... -} - -tokens = ['LPAREN','RPAREN',...,'ID'] + list(reserved.values()) - -def t_ID(t): - r'[a-zA-Z_][a-zA-Z_0-9]*' - t.type = reserved.get(t.value,'ID') # Check for reserved words - return t ---Note: You should avoid writing individual rules for reserved words. For example, if you write rules like this, - -
-- -those rules will be triggered for identifiers that include those words as a prefix such as "forget" or "printed". This is probably not -what you want. - --t_FOR = r'for' -t_PRINT = r'print' --4.4 Token values
- - -When tokens are returned by lex, they have a value that is stored in the value attribute. Normally, the value is the text -that was matched. However, the value can be assigned to any Python object. For instance, when lexing identifiers, you may -want to return both the identifier name and information from some sort of symbol table. To do this, you might write a rule like this: - --- -It is important to note that storing data in other attribute names is not recommended. The yacc.py module only exposes the -contents of the value attribute. Thus, accessing other attributes may be unnecessarily awkward. If you -need to store multiple values on a token, assign a tuple, dictionary, or instance to value. - --def t_ID(t): - ... - # Look up symbol table information and return a tuple - t.value = (t.value, symbol_lookup(t.value)) - ... - return t --4.5 Discarded tokens
- - -To discard a token, such as a comment, simply define a token rule that returns no value. For example: - --- -Alternatively, you can include the prefix "ignore_" in the token declaration to force a token to be ignored. For example: - --def t_COMMENT(t): - r'\#.*' - pass - # No return value. Token discarded ---- -Be advised that if you are ignoring many different kinds of text, you may still want to use functions since these provide more precise -control over the order in which regular expressions are matched (i.e., functions are matched in order of specification whereas strings are -sorted by regular expression length). - --t_ignore_COMMENT = r'\#.*' --4.6 Line numbers and positional information
- - -By default, lex.py knows nothing about line numbers. This is because lex.py doesn't know anything -about what constitutes a "line" of input (e.g., the newline character or even if the input is textual data). -To update this information, you need to write a special rule. In the example, the t_newline() rule shows how to do this. - -
--Within the rule, the lineno attribute of the underlying lexer t.lexer is updated. -After the line number is updated, the token is simply discarded since nothing is returned. - --# Define a rule so we can track line numbers -def t_newline(t): - r'\n+' - t.lexer.lineno += len(t.value) ---lex.py does not perform and kind of automatic column tracking. However, it does record positional -information related to each token in the lexpos attribute. Using this, it is usually possible to compute -column information as a separate step. For instance, just count backwards until you reach a newline. - -
-- -Since column information is often only useful in the context of error handling, calculating the column -position can be performed when needed as opposed to doing it for each token. - --# Compute column. -# input is the input text string -# token is a token instance -def find_column(input,token): - last_cr = input.rfind('\n',0,token.lexpos) - if last_cr < 0: - last_cr = 0 - column = (token.lexpos - last_cr) + 1 - return column --4.7 Ignored characters
- - --The special t_ignore rule is reserved by lex.py for characters -that should be completely ignored in the input stream. -Usually this is used to skip over whitespace and other non-essential characters. -Although it is possible to define a regular expression rule for whitespace in a manner -similar to t_newline(), the use of t_ignore provides substantially better -lexing performance because it is handled as a special case and is checked in a much -more efficient manner than the normal regular expression rules. - -
4.8 Literal characters
- - --Literal characters can be specified by defining a variable literals in your lexing module. For example: - -
-- -or alternatively - --literals = [ '+','-','*','/' ] ---- -A literal character is simply a single character that is returned "as is" when encountered by the lexer. Literals are checked -after all of the defined regular expression rules. Thus, if a rule starts with one of the literal characters, it will always -take precedence. --literals = "+-*/" ---When a literal token is returned, both its type and value attributes are set to the character itself. For example, '+'. - -
4.9 Error handling
- - --Finally, the t_error() -function is used to handle lexing errors that occur when illegal -characters are detected. In this case, the t.value attribute contains the -rest of the input string that has not been tokenized. In the example, the error function -was defined as follows: - -
-- -In this case, we simply print the offending character and skip ahead one character by calling t.lexer.skip(1). - --# Error handling rule -def t_error(t): - print "Illegal character '%s'" % t.value[0] - t.lexer.skip(1) --4.10 Building and using the lexer
- - --To build the lexer, the function lex.lex() is used. This function -uses Python reflection (or introspection) to read the the regular expression rules -out of the calling context and build the lexer. Once the lexer has been built, two methods can -be used to control the lexer. - -
-
- -The preferred way to use PLY is to invoke the above methods directly on the lexer object returned by the -lex() function. The legacy interface to PLY involves module-level functions lex.input() and lex.token(). -For example: - -- lexer.input(data). Reset the lexer and store a new input string. -
- lexer.token(). Return the next token. Returns a special LexToken instance on success or -None if the end of the input text has been reached. -
-- --lex.lex() -lex.input(sometext) -while 1: - tok = lex.token() - if not tok: break - print tok ---In this example, the module-level functions lex.input() and lex.token() are bound to the input() -and token() methods of the last lexer created by the lex module. This interface may go away at some point so -it's probably best not to use it. - -
4.11 The @TOKEN decorator
- - -In some applications, you may want to define build tokens from as a series of -more complex regular expression rules. For example: - --- -In this case, we want the regular expression rule for ID to be one of the variables above. However, there is no -way to directly specify this using a normal documentation string. To solve this problem, you can use the @TOKEN -decorator. For example: - --digit = r'([0-9])' -nondigit = r'([_A-Za-z])' -identifier = r'(' + nondigit + r'(' + digit + r'|' + nondigit + r')*)' - -def t_ID(t): - # want docstring to be identifier above. ????? - ... ---- -This will attach identifier to the docstring for t_ID() allowing lex.py to work normally. An alternative -approach this problem is to set the docstring directly like this: - --from ply.lex import TOKEN - -@TOKEN(identifier) -def t_ID(t): - ... ---- -NOTE: Use of @TOKEN requires Python-2.4 or newer. If you're concerned about backwards compatibility with older -versions of Python, use the alternative approach of setting the docstring directly. - --def t_ID(t): - ... - -t_ID.__doc__ = identifier --4.12 Optimized mode
- - -For improved performance, it may be desirable to use Python's -optimized mode (e.g., running Python with the -O -option). However, doing so causes Python to ignore documentation -strings. This presents special problems for lex.py. To -handle this case, you can create your lexer using -the optimize option as follows: - --- -Next, run Python in its normal operating mode. When you do -this, lex.py will write a file called lextab.py to -the current directory. This file contains all of the regular -expression rules and tables used during lexing. On subsequent -executions, -lextab.py will simply be imported to build the lexer. This -approach substantially improves the startup time of the lexer and it -works in Python's optimized mode. - --lexer = lex.lex(optimize=1) ---To change the name of the lexer-generated file, use the lextab keyword argument. For example: - -
-- -When running in optimized mode, it is important to note that lex disables most error checking. Thus, this is really only recommended -if you're sure everything is working correctly and you're ready to start releasing production code. - --lexer = lex.lex(optimize=1,lextab="footab") --4.13 Debugging
- - -For the purpose of debugging, you can run lex() in a debugging mode as follows: - --- --lexer = lex.lex(debug=1) ---This will produce various sorts of debugging information including all of the added rules, -the master regular expressions used by the lexer, and tokens generating during lexing. -
- --In addition, lex.py comes with a simple main function which -will either tokenize input read from standard input or from a file specified -on the command line. To use it, simply put this in your lexer: -
- --- -Please refer to the "Debugging" section near the end for some more advanced details -of debugging. - --if __name__ == '__main__': - lex.runmain() --4.14 Alternative specification of lexers
- - -As shown in the example, lexers are specified all within one Python module. If you want to -put token rules in a different module from the one in which you invoke lex(), use the -module keyword argument. - --For example, you might have a dedicated module that just contains -the token rules: - -
-- -Now, if you wanted to build a tokenizer from these rules from within a different module, you would do the following (shown for Python interactive mode): - --# module: tokrules.py -# This module just contains the lexing rules - -# List of token names. This is always required -tokens = ( - 'NUMBER', - 'PLUS', - 'MINUS', - 'TIMES', - 'DIVIDE', - 'LPAREN', - 'RPAREN', -) - -# Regular expression rules for simple tokens -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_LPAREN = r'\(' -t_RPAREN = r'\)' - -# A regular expression rule with some action code -def t_NUMBER(t): - r'\d+' - t.value = int(t.value) - return t - -# Define a rule so we can track line numbers -def t_newline(t): - r'\n+' - t.lexer.lineno += len(t.value) - -# A string containing ignored characters (spaces and tabs) -t_ignore = ' \t' - -# Error handling rule -def t_error(t): - print "Illegal character '%s'" % t.value[0] - t.lexer.skip(1) ---- -The module option can also be used to define lexers from instances of a class. For example: - -->>> import tokrules ->>> lexer = lex.lex(module=tokrules) ->>> lexer.input("3 + 4") ->>> lexer.token() -LexToken(NUMBER,3,1,1,0) ->>> lexer.token() -LexToken(PLUS,'+',1,2) ->>> lexer.token() -LexToken(NUMBER,4,1,4) ->>> lexer.token() -None ->>> ---- - -When building a lexer from class, you should construct the lexer from -an instance of the class, not the class object itself. This is because -PLY only works properly if the lexer actions are defined by bound-methods. - --import ply.lex as lex - -class MyLexer: - # List of token names. This is always required - tokens = ( - 'NUMBER', - 'PLUS', - 'MINUS', - 'TIMES', - 'DIVIDE', - 'LPAREN', - 'RPAREN', - ) - - # Regular expression rules for simple tokens - t_PLUS = r'\+' - t_MINUS = r'-' - t_TIMES = r'\*' - t_DIVIDE = r'/' - t_LPAREN = r'\(' - t_RPAREN = r'\)' - - # A regular expression rule with some action code - # Note addition of self parameter since we're in a class - def t_NUMBER(self,t): - r'\d+' - t.value = int(t.value) - return t - - # Define a rule so we can track line numbers - def t_newline(self,t): - r'\n+' - t.lexer.lineno += len(t.value) - - # A string containing ignored characters (spaces and tabs) - t_ignore = ' \t' - - # Error handling rule - def t_error(self,t): - print "Illegal character '%s'" % t.value[0] - t.lexer.skip(1) - - # Build the lexer - def build(self,**kwargs): - self.lexer = lex.lex(module=self, **kwargs) - - # Test it output - def test(self,data): - self.lexer.input(data) - while True: - tok = lexer.token() - if not tok: break - print tok - -# Build the lexer and try it out -m = MyLexer() -m.build() # Build the lexer -m.test("3 + 4") # Test it ---When using the module option to lex(), PLY collects symbols -from the underlying object using the dir() function. There is no -direct access to the __dict__ attribute of the object supplied as a -module value. - -
-Finally, if you want to keep things nicely encapsulated, but don't want to use a -full-fledged class definition, lexers can be defined using closures. For example: - -
-- - --import ply.lex as lex - -# List of token names. This is always required -tokens = ( - 'NUMBER', - 'PLUS', - 'MINUS', - 'TIMES', - 'DIVIDE', - 'LPAREN', - 'RPAREN', -) - -def MyLexer(): - # Regular expression rules for simple tokens - t_PLUS = r'\+' - t_MINUS = r'-' - t_TIMES = r'\*' - t_DIVIDE = r'/' - t_LPAREN = r'\(' - t_RPAREN = r'\)' - - # A regular expression rule with some action code - def t_NUMBER(t): - r'\d+' - t.value = int(t.value) - return t - - # Define a rule so we can track line numbers - def t_newline(t): - r'\n+' - t.lexer.lineno += len(t.value) - - # A string containing ignored characters (spaces and tabs) - t_ignore = ' \t' - - # Error handling rule - def t_error(t): - print "Illegal character '%s'" % t.value[0] - t.lexer.skip(1) - - # Build the lexer from my environment and return it - return lex.lex() --4.15 Maintaining state
- - -In your lexer, you may want to maintain a variety of state -information. This might include mode settings, symbol tables, and -other details. As an example, suppose that you wanted to keep -track of how many NUMBER tokens had been encountered. - --One way to do this is to keep a set of global variables in the module -where you created the lexer. For example: - -
-- -If you don't like the use of a global variable, another place to store -information is inside the Lexer object created by lex(). -To this, you can use the lexer attribute of tokens passed to -the various rules. For example: - --num_count = 0 -def t_NUMBER(t): - r'\d+' - global num_count - num_count += 1 - t.value = int(t.value) - return t ---- -This latter approach has the advantage of being simple and working -correctly in applications where multiple instantiations of a given -lexer exist in the same application. However, this might also feel -like a gross violation of encapsulation to OO purists. -Just to put your mind at some ease, all -internal attributes of the lexer (with the exception of lineno) have names that are prefixed -by lex (e.g., lexdata,lexpos, etc.). Thus, -it is perfectly safe to store attributes in the lexer that -don't have names starting with that prefix or a name that conlicts with one of the -predefined methods (e.g., input(), token(), etc.). - --def t_NUMBER(t): - r'\d+' - t.lexer.num_count += 1 # Note use of lexer attribute - t.value = int(t.value) - return t - -lexer = lex.lex() -lexer.num_count = 0 # Set the initial count ---If you don't like assigning values on the lexer object, you can define your lexer as a class as -shown in the previous section: - -
-- -The class approach may be the easiest to manage if your application is -going to be creating multiple instances of the same lexer and you need -to manage a lot of state. - --class MyLexer: - ... - def t_NUMBER(self,t): - r'\d+' - self.num_count += 1 - t.value = int(t.value) - return t - - def build(self, **kwargs): - self.lexer = lex.lex(object=self,**kwargs) - - def __init__(self): - self.num_count = 0 ---State can also be managed through closures. For example, in Python 3: - -
-- --def MyLexer(): - num_count = 0 - ... - def t_NUMBER(t): - r'\d+' - nonlocal num_count - num_count += 1 - t.value = int(t.value) - return t - ... --4.16 Lexer cloning
- - --If necessary, a lexer object can be duplicated by invoking its clone() method. For example: - -
-- -When a lexer is cloned, the copy is exactly identical to the original lexer -including any input text and internal state. However, the clone allows a -different set of input text to be supplied which may be processed separately. -This may be useful in situations when you are writing a parser/compiler that -involves recursive or reentrant processing. For instance, if you -needed to scan ahead in the input for some reason, you could create a -clone and use it to look ahead. Or, if you were implementing some kind of preprocessor, -cloned lexers could be used to handle different input files. - --lexer = lex.lex() -... -newlexer = lexer.clone() ---Creating a clone is different than calling lex.lex() in that -PLY doesn't regenerate any of the internal tables or regular expressions. So, - -
-Special considerations need to be made when cloning lexers that also -maintain their own internal state using classes or closures. Namely, -you need to be aware that the newly created lexers will share all of -this state with the original lexer. For example, if you defined a -lexer as a class and did this: - -
-- -Then both a and b are going to be bound to the same -object m and any changes to m will be reflected in both lexers. It's -important to emphasize that clone() is only meant to create a new lexer -that reuses the regular expressions and environment of another lexer. If you -need to make a totally new copy of a lexer, then call lex() again. - --m = MyLexer() -a = lex.lex(object=m) # Create a lexer - -b = a.clone() # Clone the lexer --4.17 Internal lexer state
- - -A Lexer object lexer has a number of internal attributes that may be useful in certain -situations. - --lexer.lexpos -
-This attribute is an integer that contains the current position within the input text. If you modify -the value, it will change the result of the next call to token(). Within token rule functions, this points -to the first character after the matched text. If the value is modified within a rule, the next returned token will be -matched at the new position. -- --lexer.lineno -
-The current value of the line number attribute stored in the lexer. PLY only specifies that the attribute -exists---it never sets, updates, or performs any processing with it. If you want to track line numbers, -you will need to add code yourself (see the section on line numbers and positional information). -- --lexer.lexdata -
-The current input text stored in the lexer. This is the string passed with the input() method. It -would probably be a bad idea to modify this unless you really know what you're doing. -- --lexer.lexmatch -
-This is the raw Match object returned by the Python re.match() function (used internally by PLY) for the -current token. If you have written a regular expression that contains named groups, you can use this to retrieve those values. -Note: This attribute is only updated when tokens are defined and processed by functions. -- -4.18 Conditional lexing and start conditions
- - -In advanced parsing applications, it may be useful to have different -lexing states. For instance, you may want the occurrence of a certain -token or syntactic construct to trigger a different kind of lexing. -PLY supports a feature that allows the underlying lexer to be put into -a series of different states. Each state can have its own tokens, -lexing rules, and so forth. The implementation is based largely on -the "start condition" feature of GNU flex. Details of this can be found -at http://www.gnu.org/software/flex/manual/html_chapter/flex_11.html.. - --To define a new lexing state, it must first be declared. This is done by including a "states" declaration in your -lex file. For example: - -
-- -This declaration declares two states, 'foo' -and 'bar'. States may be of two types; 'exclusive' -and 'inclusive'. An exclusive state completely overrides the -default behavior of the lexer. That is, lex will only return tokens -and apply rules defined specifically for that state. An inclusive -state adds additional tokens and rules to the default set of rules. -Thus, lex will return both the tokens defined by default in addition -to those defined for the inclusive state. - --states = ( - ('foo','exclusive'), - ('bar','inclusive'), -) ---Once a state has been declared, tokens and rules are declared by including the -state name in token/rule declaration. For example: - -
-- -A token can be declared in multiple states by including multiple state names in the declaration. For example: - --t_foo_NUMBER = r'\d+' # Token 'NUMBER' in state 'foo' -t_bar_ID = r'[a-zA-Z_][a-zA-Z0-9_]*' # Token 'ID' in state 'bar' - -def t_foo_newline(t): - r'\n' - t.lexer.lineno += 1 ---- -Alternative, a token can be declared in all states using the 'ANY' in the name. - --t_foo_bar_NUMBER = r'\d+' # Defines token 'NUMBER' in both state 'foo' and 'bar' ---- -If no state name is supplied, as is normally the case, the token is associated with a special state 'INITIAL'. For example, -these two declarations are identical: - --t_ANY_NUMBER = r'\d+' # Defines a token 'NUMBER' in all states ---- --t_NUMBER = r'\d+' -t_INITIAL_NUMBER = r'\d+' ---States are also associated with the special t_ignore and t_error() declarations. For example, if a state treats -these differently, you can declare: - -
-- -By default, lexing operates in the 'INITIAL' state. This state includes all of the normally defined tokens. -For users who aren't using different states, this fact is completely transparent. If, during lexing or parsing, you want to change -the lexing state, use the begin() method. For example: - --t_foo_ignore = " \t\n" # Ignored characters for state 'foo' - -def t_bar_error(t): # Special error handler for state 'bar' - pass ---- -To get out of a state, you use begin() to switch back to the initial state. For example: - --def t_begin_foo(t): - r'start_foo' - t.lexer.begin('foo') # Starts 'foo' state ---- -The management of states can also be done with a stack. For example: - --def t_foo_end(t): - r'end_foo' - t.lexer.begin('INITIAL') # Back to the initial state ---- --def t_begin_foo(t): - r'start_foo' - t.lexer.push_state('foo') # Starts 'foo' state - -def t_foo_end(t): - r'end_foo' - t.lexer.pop_state() # Back to the previous state ---The use of a stack would be useful in situations where there are many ways of entering a new lexing state and you merely want to go back -to the previous state afterwards. - -
-An example might help clarify. Suppose you were writing a parser and you wanted to grab sections of arbitrary C code enclosed by -curly braces. That is, whenever you encounter a starting brace '{', you want to read all of the enclosed code up to the ending brace '}' -and return it as a string. Doing this with a normal regular expression rule is nearly (if not actually) impossible. This is because braces can -be nested and can be included in comments and strings. Thus, simply matching up to the first matching '}' character isn't good enough. Here is how -you might use lexer states to do this: - -
-- -In this example, the occurrence of the first '{' causes the lexer to record the starting position and enter a new state 'ccode'. A collection of rules then match -various parts of the input that follow (comments, strings, etc.). All of these rules merely discard the token (by not returning a value). -However, if the closing right brace is encountered, the rule t_ccode_rbrace collects all of the code (using the earlier recorded starting -position), stores it, and returns a token 'CCODE' containing all of that text. When returning the token, the lexing state is restored back to its -initial state. - --# Declare the state -states = ( - ('ccode','exclusive'), -) - -# Match the first {. Enter ccode state. -def t_ccode(t): - r'\{' - t.lexer.code_start = t.lexer.lexpos # Record the starting position - t.lexer.level = 1 # Initial brace level - t.lexer.begin('ccode') # Enter 'ccode' state - -# Rules for the ccode state -def t_ccode_lbrace(t): - r'\{' - t.lexer.level +=1 - -def t_ccode_rbrace(t): - r'\}' - t.lexer.level -=1 - - # If closing brace, return the code fragment - if t.lexer.level == 0: - t.value = t.lexer.lexdata[t.lexer.code_start:t.lexer.lexpos+1] - t.type = "CCODE" - t.lexer.lineno += t.value.count('\n') - t.lexer.begin('INITIAL') - return t - -# C or C++ comment (ignore) -def t_ccode_comment(t): - r'(/\*(.|\n)*?*/)|(//.*)' - pass - -# C string -def t_ccode_string(t): - r'\"([^\\\n]|(\\.))*?\"' - -# C character literal -def t_ccode_char(t): - r'\'([^\\\n]|(\\.))*?\'' - -# Any sequence of non-whitespace characters (not braces, strings) -def t_ccode_nonspace(t): - r'[^\s\{\}\'\"]+' - -# Ignored characters (whitespace) -t_ccode_ignore = " \t\n" - -# For bad characters, we just skip over it -def t_ccode_error(t): - t.lexer.skip(1) --4.19 Miscellaneous Issues
- - --
The lexer requires input to be supplied as a single input string. Since most machines have more than enough memory, this -rarely presents a performance concern. However, it means that the lexer currently can't be used with streaming data -such as open files or sockets. This limitation is primarily a side-effect of using the re module. - - -
The lexer should work properly with both Unicode strings given as token and pattern matching rules as -well as for input text. - - -
If you need to supply optional flags to the re.compile() function, use the reflags option to lex. For example: - - -- --lex.lex(reflags=re.UNICODE) ---
Since the lexer is written entirely in Python, its performance is -largely determined by that of the Python re module. Although -the lexer has been written to be as efficient as possible, it's not -blazingly fast when used on very large input files. If -performance is concern, you might consider upgrading to the most -recent version of Python, creating a hand-written lexer, or offloading -the lexer into a C extension module. - - -If you are going to create a hand-written lexer and you plan to use it with yacc.py, -it only needs to conform to the following requirements: - -
-
- -- It must provide a token() method that returns the next token or None if no more -tokens are available. -
- The token() method must return an object tok that has type and value attributes. If -line number tracking is being used, then the token should also define a lineno attribute. -
5. Parsing basics
- - -yacc.py is used to parse language syntax. Before showing an -example, there are a few important bits of background that must be -mentioned. First, syntax is usually specified in terms of a BNF grammar. -For example, if you wanted to parse -simple arithmetic expressions, you might first write an unambiguous -grammar specification like this: - --- -In the grammar, symbols such as NUMBER, +, -, *, and / are known -as terminals and correspond to raw input tokens. Identifiers such as term and factor refer to -grammar rules comprised of a collection of terminals and other rules. These identifiers are known as non-terminals. --expression : expression + term - | expression - term - | term - -term : term * factor - | term / factor - | factor - -factor : NUMBER - | ( expression ) --- -The semantic behavior of a language is often specified using a -technique known as syntax directed translation. In syntax directed -translation, attributes are attached to each symbol in a given grammar -rule along with an action. Whenever a particular grammar rule is -recognized, the action describes what to do. For example, given the -expression grammar above, you might write the specification for a -simple calculator like this: - -
-- -A good way to think about syntax directed translation is to -view each symbol in the grammar as a kind of object. Associated -with each symbol is a value representing its "state" (for example, the -val attribute above). Semantic -actions are then expressed as a collection of functions or methods -that operate on the symbols and associated values. - --Grammar Action --------------------------------- -------------------------------------------- -expression0 : expression1 + term expression0.val = expression1.val + term.val - | expression1 - term expression0.val = expression1.val - term.val - | term expression0.val = term.val - -term0 : term1 * factor term0.val = term1.val * factor.val - | term1 / factor term0.val = term1.val / factor.val - | factor term0.val = factor.val - -factor : NUMBER factor.val = int(NUMBER.lexval) - | ( expression ) factor.val = expression.val ---Yacc uses a parsing technique known as LR-parsing or shift-reduce parsing. LR parsing is a -bottom up technique that tries to recognize the right-hand-side of various grammar rules. -Whenever a valid right-hand-side is found in the input, the appropriate action code is triggered and the -grammar symbols are replaced by the grammar symbol on the left-hand-side. - -
-LR parsing is commonly implemented by shifting grammar symbols onto a -stack and looking at the stack and the next input token for patterns that -match one of the grammar rules. -The details of the algorithm can be found in a compiler textbook, but the -following example illustrates the steps that are performed if you -wanted to parse the expression -3 + 5 * (10 - 20) using the grammar defined above. In the example, -the special symbol $ represents the end of input. - - -
-- -When parsing the expression, an underlying state machine and the -current input token determine what happens next. If the next token -looks like part of a valid grammar rule (based on other items on the -stack), it is generally shifted onto the stack. If the top of the -stack contains a valid right-hand-side of a grammar rule, it is -usually "reduced" and the symbols replaced with the symbol on the -left-hand-side. When this reduction occurs, the appropriate action is -triggered (if defined). If the input token can't be shifted and the -top of stack doesn't match any grammar rules, a syntax error has -occurred and the parser must take some kind of recovery step (or bail -out). A parse is only successful if the parser reaches a state where -the symbol stack is empty and there are no more input tokens. - --Step Symbol Stack Input Tokens Action ----- --------------------- --------------------- ------------------------------- -1 3 + 5 * ( 10 - 20 )$ Shift 3 -2 3 + 5 * ( 10 - 20 )$ Reduce factor : NUMBER -3 factor + 5 * ( 10 - 20 )$ Reduce term : factor -4 term + 5 * ( 10 - 20 )$ Reduce expr : term -5 expr + 5 * ( 10 - 20 )$ Shift + -6 expr + 5 * ( 10 - 20 )$ Shift 5 -7 expr + 5 * ( 10 - 20 )$ Reduce factor : NUMBER -8 expr + factor * ( 10 - 20 )$ Reduce term : factor -9 expr + term * ( 10 - 20 )$ Shift * -10 expr + term * ( 10 - 20 )$ Shift ( -11 expr + term * ( 10 - 20 )$ Shift 10 -12 expr + term * ( 10 - 20 )$ Reduce factor : NUMBER -13 expr + term * ( factor - 20 )$ Reduce term : factor -14 expr + term * ( term - 20 )$ Reduce expr : term -15 expr + term * ( expr - 20 )$ Shift - -16 expr + term * ( expr - 20 )$ Shift 20 -17 expr + term * ( expr - 20 )$ Reduce factor : NUMBER -18 expr + term * ( expr - factor )$ Reduce term : factor -19 expr + term * ( expr - term )$ Reduce expr : expr - term -20 expr + term * ( expr )$ Shift ) -21 expr + term * ( expr ) $ Reduce factor : (expr) -22 expr + term * factor $ Reduce term : term * factor -23 expr + term $ Reduce expr : expr + term -24 expr $ Reduce expr -25 $ Success! ---It is important to note that the underlying implementation is built -around a large finite-state machine that is encoded in a collection of -tables. The construction of these tables is non-trivial and -beyond the scope of this discussion. However, subtle details of this -process explain why, in the example above, the parser chooses to shift -a token onto the stack in step 9 rather than reducing the -rule expr : expr + term. - -
6. Yacc
- - -The ply.yacc module implements the parsing component of PLY. -The name "yacc" stands for "Yet Another Compiler Compiler" and is -borrowed from the Unix tool of the same name. - -6.1 An example
- - -Suppose you wanted to make a grammar for simple arithmetic expressions as previously described. Here is -how you would do it with yacc.py: - --- -In this example, each grammar rule is defined by a Python function -where the docstring to that function contains the appropriate -context-free grammar specification. The statements that make up the -function body implement the semantic actions of the rule. Each function -accepts a single argument p that is a sequence containing the -values of each grammar symbol in the corresponding rule. The values -of p[i] are mapped to grammar symbols as shown here: - --# Yacc example - -import ply.yacc as yacc - -# Get the token map from the lexer. This is required. -from calclex import tokens - -def p_expression_plus(p): - 'expression : expression PLUS term' - p[0] = p[1] + p[3] - -def p_expression_minus(p): - 'expression : expression MINUS term' - p[0] = p[1] - p[3] - -def p_expression_term(p): - 'expression : term' - p[0] = p[1] - -def p_term_times(p): - 'term : term TIMES factor' - p[0] = p[1] * p[3] - -def p_term_div(p): - 'term : term DIVIDE factor' - p[0] = p[1] / p[3] - -def p_term_factor(p): - 'term : factor' - p[0] = p[1] - -def p_factor_num(p): - 'factor : NUMBER' - p[0] = p[1] - -def p_factor_expr(p): - 'factor : LPAREN expression RPAREN' - p[0] = p[2] - -# Error rule for syntax errors -def p_error(p): - print "Syntax error in input!" - -# Build the parser -parser = yacc.yacc() - -while True: - try: - s = raw_input('calc > ') - except EOFError: - break - if not s: continue - result = parser.parse(s) - print result ---- --def p_expression_plus(p): - 'expression : expression PLUS term' - # ^ ^ ^ ^ - # p[0] p[1] p[2] p[3] - - p[0] = p[1] + p[3] ---For tokens, the "value" of the corresponding p[i] is the -same as the p.value attribute assigned in the lexer -module. For non-terminals, the value is determined by whatever is -placed in p[0] when rules are reduced. This value can be -anything at all. However, it probably most common for the value to be -a simple Python type, a tuple, or an instance. In this example, we -are relying on the fact that the NUMBER token stores an -integer value in its value field. All of the other rules simply -perform various types of integer operations and propagate the result. -
- --Note: The use of negative indices have a special meaning in -yacc---specially p[-1] does not have the same value -as p[3] in this example. Please see the section on "Embedded -Actions" for further details. -
- --The first rule defined in the yacc specification determines the -starting grammar symbol (in this case, a rule for expression -appears first). Whenever the starting rule is reduced by the parser -and no more input is available, parsing stops and the final value is -returned (this value will be whatever the top-most rule placed -in p[0]). Note: an alternative starting symbol can be -specified using the start keyword argument to -yacc(). - -
The p_error(p) rule is defined to catch syntax errors. -See the error handling section below for more detail. - -
-To build the parser, call the yacc.yacc() function. This -function looks at the module and attempts to construct all of the LR -parsing tables for the grammar you have specified. The first -time yacc.yacc() is invoked, you will get a message such as -this: - -
-- -Since table construction is relatively expensive (especially for large -grammars), the resulting parsing table is written to the current -directory in a file called parsetab.py. In addition, a -debugging file called parser.out is created. On subsequent -executions, yacc will reload the table from -parsetab.py unless it has detected a change in the underlying -grammar (in which case the tables and parsetab.py file are -regenerated). Note: The names of parser output files can be changed -if necessary. See the PLY Reference for details. - --$ python calcparse.py -Generating LALR tables -calc > ---If any errors are detected in your grammar specification, yacc.py will produce -diagnostic messages and possibly raise an exception. Some of the errors that can be detected include: - -
-
- -The next few sections discuss grammar specification in more detail. - -- Duplicated function names (if more than one rule function have the same name in the grammar file). -
- Shift/reduce and reduce/reduce conflicts generated by ambiguous grammars. -
- Badly specified grammar rules. -
- Infinite recursion (rules that can never terminate). -
- Unused rules and tokens -
- Undefined rules and tokens -
-The final part of the example shows how to actually run the parser -created by -yacc(). To run the parser, you simply have to call -the parse() with a string of input text. This will run all -of the grammar rules and return the result of the entire parse. This -result return is the value assigned to p[0] in the starting -grammar rule. - -
6.2 Combining Grammar Rule Functions
- - -When grammar rules are similar, they can be combined into a single function. -For example, consider the two rules in our earlier example: - --- -Instead of writing two functions, you might write a single function like this: - --def p_expression_plus(p): - 'expression : expression PLUS term' - p[0] = p[1] + p[3] - -def p_expression_minus(t): - 'expression : expression MINUS term' - p[0] = p[1] - p[3] ---- -In general, the doc string for any given function can contain multiple grammar rules. So, it would -have also been legal (although possibly confusing) to write this: - --def p_expression(p): - '''expression : expression PLUS term - | expression MINUS term''' - if p[2] == '+': - p[0] = p[1] + p[3] - elif p[2] == '-': - p[0] = p[1] - p[3] ---- -When combining grammar rules into a single function, it is usually a good idea for all of the rules to have -a similar structure (e.g., the same number of terms). Otherwise, the corresponding action code may be more -complicated than necessary. However, it is possible to handle simple cases using len(). For example: - --def p_binary_operators(p): - '''expression : expression PLUS term - | expression MINUS term - term : term TIMES factor - | term DIVIDE factor''' - if p[2] == '+': - p[0] = p[1] + p[3] - elif p[2] == '-': - p[0] = p[1] - p[3] - elif p[2] == '*': - p[0] = p[1] * p[3] - elif p[2] == '/': - p[0] = p[1] / p[3] ---- -If parsing performance is a concern, you should resist the urge to put -too much conditional processing into a single grammar rule as shown in -these examples. When you add checks to see which grammar rule is -being handled, you are actually duplicating the work that the parser -has already performed (i.e., the parser already knows exactly what rule it -matched). You can eliminate this overhead by using a -separate p_rule() function for each grammar rule. - --def p_expressions(p): - '''expression : expression MINUS expression - | MINUS expression''' - if (len(p) == 4): - p[0] = p[1] - p[3] - elif (len(p) == 3): - p[0] = -p[2] --6.3 Character Literals
- - -If desired, a grammar may contain tokens defined as single character literals. For example: - --- -A character literal must be enclosed in quotes such as '+'. In addition, if literals are used, they must be declared in the -corresponding lex file through the use of a special literals declaration. - --def p_binary_operators(p): - '''expression : expression '+' term - | expression '-' term - term : term '*' factor - | term '/' factor''' - if p[2] == '+': - p[0] = p[1] + p[3] - elif p[2] == '-': - p[0] = p[1] - p[3] - elif p[2] == '*': - p[0] = p[1] * p[3] - elif p[2] == '/': - p[0] = p[1] / p[3] ---- -Character literals are limited to a single character. Thus, it is not legal to specify literals such as '<=' or '=='. For this, use -the normal lexing rules (e.g., define a rule such as t_EQ = r'=='). - --# Literals. Should be placed in module given to lex() -literals = ['+','-','*','/' ] --6.4 Empty Productions
- - -yacc.py can handle empty productions by defining a rule like this: - --- -Now to use the empty production, simply use 'empty' as a symbol. For example: - --def p_empty(p): - 'empty :' - pass ---- -Note: You can write empty rules anywhere by simply specifying an empty -right hand side. However, I personally find that writing an "empty" -rule and using "empty" to denote an empty production is easier to read -and more clearly states your intentions. - --def p_optitem(p): - 'optitem : item' - ' | empty' - ... --6.5 Changing the starting symbol
- - -Normally, the first rule found in a yacc specification defines the starting grammar rule (top level rule). To change this, simply -supply a start specifier in your file. For example: - --- -The use of a start specifier may be useful during debugging -since you can use it to have yacc build a subset of a larger grammar. -For this purpose, it is also possible to specify a starting symbol as -an argument to yacc(). For example: - --start = 'foo' - -def p_bar(p): - 'bar : A B' - -# This is the starting rule due to the start specifier above -def p_foo(p): - 'foo : bar X' -... ---- --yacc.yacc(start='foo') --6.6 Dealing With Ambiguous Grammars
- - -The expression grammar given in the earlier example has been written -in a special format to eliminate ambiguity. However, in many -situations, it is extremely difficult or awkward to write grammars in -this format. A much more natural way to express the grammar is in a -more compact form like this: - --- -Unfortunately, this grammar specification is ambiguous. For example, -if you are parsing the string "3 * 4 + 5", there is no way to tell how -the operators are supposed to be grouped. For example, does the -expression mean "(3 * 4) + 5" or is it "3 * (4+5)"? - --expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression - | LPAREN expression RPAREN - | NUMBER ---When an ambiguous grammar is given to yacc.py it will print -messages about "shift/reduce conflicts" or "reduce/reduce conflicts". -A shift/reduce conflict is caused when the parser generator can't -decide whether or not to reduce a rule or shift a symbol on the -parsing stack. For example, consider the string "3 * 4 + 5" and the -internal parsing stack: - -
-- -In this case, when the parser reaches step 6, it has two options. One -is to reduce the rule expr : expr * expr on the stack. The -other option is to shift the token + on the stack. Both -options are perfectly legal from the rules of the -context-free-grammar. - --Step Symbol Stack Input Tokens Action ----- --------------------- --------------------- ------------------------------- -1 $ 3 * 4 + 5$ Shift 3 -2 $ 3 * 4 + 5$ Reduce : expression : NUMBER -3 $ expr * 4 + 5$ Shift * -4 $ expr * 4 + 5$ Shift 4 -5 $ expr * 4 + 5$ Reduce: expression : NUMBER -6 $ expr * expr + 5$ SHIFT/REDUCE CONFLICT ???? ---By default, all shift/reduce conflicts are resolved in favor of -shifting. Therefore, in the above example, the parser will always -shift the + instead of reducing. Although this strategy -works in many cases (for example, the case of -"if-then" versus "if-then-else"), it is not enough for arithmetic expressions. In fact, -in the above example, the decision to shift + is completely -wrong---we should have reduced expr * expr since -multiplication has higher mathematical precedence than addition. - -
To resolve ambiguity, especially in expression -grammars, yacc.py allows individual tokens to be assigned a -precedence level and associativity. This is done by adding a variable -precedence to the grammar file like this: - -
-- -This declaration specifies that PLUS/MINUS have the -same precedence level and are left-associative and that -TIMES/DIVIDE have the same precedence and are -left-associative. Within the precedence declaration, tokens -are ordered from lowest to highest precedence. Thus, this declaration -specifies that TIMES/DIVIDE have higher precedence -than PLUS/MINUS (since they appear later in the -precedence specification). - --precedence = ( - ('left', 'PLUS', 'MINUS'), - ('left', 'TIMES', 'DIVIDE'), -) ---The precedence specification works by associating a numerical -precedence level value and associativity direction to the listed -tokens. For example, in the above example you get: - -
-- -These values are then used to attach a numerical precedence value and -associativity direction to each grammar rule. This is always -determined by looking at the precedence of the right-most terminal -symbol. For example: - --PLUS : level = 1, assoc = 'left' -MINUS : level = 1, assoc = 'left' -TIMES : level = 2, assoc = 'left' -DIVIDE : level = 2, assoc = 'left' ---- -When shift/reduce conflicts are encountered, the parser generator resolves the conflict by -looking at the precedence rules and associativity specifiers. - --expression : expression PLUS expression # level = 1, left - | expression MINUS expression # level = 1, left - | expression TIMES expression # level = 2, left - | expression DIVIDE expression # level = 2, left - | LPAREN expression RPAREN # level = None (not specified) - | NUMBER # level = None (not specified) ---
-
- -For example, if "expression PLUS expression" has been parsed and the -next token is "TIMES", the action is going to be a shift because -"TIMES" has a higher precedence level than "PLUS". On the other hand, -if "expression TIMES expression" has been parsed and the next token is -"PLUS", the action is going to be reduce because "PLUS" has a lower -precedence than "TIMES." - -- If the current token has higher precedence than the rule on the stack, it is shifted. -
- If the grammar rule on the stack has higher precedence, the rule is reduced. -
- If the current token and the grammar rule have the same precedence, the -rule is reduced for left associativity, whereas the token is shifted for right associativity. -
- If nothing is known about the precedence, shift/reduce conflicts are resolved in -favor of shifting (the default). -
-When shift/reduce conflicts are resolved using the first three -techniques (with the help of precedence rules), yacc.py will -report no errors or conflicts in the grammar (although it will print -some information in the parser.out debugging file). - -
-One problem with the precedence specifier technique is that it is -sometimes necessary to change the precedence of an operator in certain -contexts. For example, consider a unary-minus operator in "3 + 4 * --5". Mathematically, the unary minus is normally given a very high -precedence--being evaluated before the multiply. However, in our -precedence specifier, MINUS has a lower precedence than TIMES. To -deal with this, precedence rules can be given for so-called "fictitious tokens" -like this: - -
-- -Now, in the grammar file, we can write our unary minus rule like this: - --precedence = ( - ('left', 'PLUS', 'MINUS'), - ('left', 'TIMES', 'DIVIDE'), - ('right', 'UMINUS'), # Unary minus operator -) ---- -In this case, %prec UMINUS overrides the default rule precedence--setting it to that -of UMINUS in the precedence specifier. - --def p_expr_uminus(p): - 'expression : MINUS expression %prec UMINUS' - p[0] = -p[2] ---At first, the use of UMINUS in this example may appear very confusing. -UMINUS is not an input token or a grammer rule. Instead, you should -think of it as the name of a special marker in the precedence table. When you use the %prec qualifier, you're simply -telling yacc that you want the precedence of the expression to be the same as for this special marker instead of the usual precedence. - -
-It is also possible to specify non-associativity in the precedence table. This would -be used when you don't want operations to chain together. For example, suppose -you wanted to support comparison operators like < and > but you didn't want to allow -combinations like a < b < c. To do this, simply specify a rule like this: - -
-- --precedence = ( - ('nonassoc', 'LESSTHAN', 'GREATERTHAN'), # Nonassociative operators - ('left', 'PLUS', 'MINUS'), - ('left', 'TIMES', 'DIVIDE'), - ('right', 'UMINUS'), # Unary minus operator -) ---If you do this, the occurrence of input text such as a < b < c will result in a syntax error. However, simple -expressions such as a < b will still be fine. - -
-Reduce/reduce conflicts are caused when there are multiple grammar -rules that can be applied to a given set of symbols. This kind of -conflict is almost always bad and is always resolved by picking the -rule that appears first in the grammar file. Reduce/reduce conflicts -are almost always caused when different sets of grammar rules somehow -generate the same set of symbols. For example: - -
-- -In this case, a reduce/reduce conflict exists between these two rules: - --assignment : ID EQUALS NUMBER - | ID EQUALS expression - -expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression - | LPAREN expression RPAREN - | NUMBER ---- -For example, if you wrote "a = 5", the parser can't figure out if this -is supposed to be reduced as assignment : ID EQUALS NUMBER or -whether it's supposed to reduce the 5 as an expression and then reduce -the rule assignment : ID EQUALS expression. - --assignment : ID EQUALS NUMBER -expression : NUMBER ---It should be noted that reduce/reduce conflicts are notoriously -difficult to spot simply looking at the input grammer. When a -reduce/reduce conflict occurs, yacc() will try to help by -printing a warning message such as this: - -
-- -This message identifies the two rules that are in conflict. However, -it may not tell you how the parser arrived at such a state. To try -and figure it out, you'll probably have to look at your grammar and -the contents of the -parser.out debugging file with an appropriately high level of -caffeination. - --WARNING: 1 reduce/reduce conflict -WARNING: reduce/reduce conflict in state 15 resolved using rule (assignment -> ID EQUALS NUMBER) -WARNING: rejected rule (expression -> NUMBER) --6.7 The parser.out file
- - -Tracking down shift/reduce and reduce/reduce conflicts is one of the finer pleasures of using an LR -parsing algorithm. To assist in debugging, yacc.py creates a debugging file called -'parser.out' when it generates the parsing table. The contents of this file look like the following: - --- -The different states that appear in this file are a representation of -every possible sequence of valid input tokens allowed by the grammar. -When receiving input tokens, the parser is building up a stack and -looking for matching rules. Each state keeps track of the grammar -rules that might be in the process of being matched at that point. Within each -rule, the "." character indicates the current location of the parse -within that rule. In addition, the actions for each valid input token -are listed. When a shift/reduce or reduce/reduce conflict arises, -rules not selected are prefixed with an !. For example: - --Unused terminals: - - -Grammar - -Rule 1 expression -> expression PLUS expression -Rule 2 expression -> expression MINUS expression -Rule 3 expression -> expression TIMES expression -Rule 4 expression -> expression DIVIDE expression -Rule 5 expression -> NUMBER -Rule 6 expression -> LPAREN expression RPAREN - -Terminals, with rules where they appear - -TIMES : 3 -error : -MINUS : 2 -RPAREN : 6 -LPAREN : 6 -DIVIDE : 4 -PLUS : 1 -NUMBER : 5 - -Nonterminals, with rules where they appear - -expression : 1 1 2 2 3 3 4 4 6 0 - - -Parsing method: LALR - - -state 0 - - S' -> . expression - expression -> . expression PLUS expression - expression -> . expression MINUS expression - expression -> . expression TIMES expression - expression -> . expression DIVIDE expression - expression -> . NUMBER - expression -> . LPAREN expression RPAREN - - NUMBER shift and go to state 3 - LPAREN shift and go to state 2 - - -state 1 - - S' -> expression . - expression -> expression . PLUS expression - expression -> expression . MINUS expression - expression -> expression . TIMES expression - expression -> expression . DIVIDE expression - - PLUS shift and go to state 6 - MINUS shift and go to state 5 - TIMES shift and go to state 4 - DIVIDE shift and go to state 7 - - -state 2 - - expression -> LPAREN . expression RPAREN - expression -> . expression PLUS expression - expression -> . expression MINUS expression - expression -> . expression TIMES expression - expression -> . expression DIVIDE expression - expression -> . NUMBER - expression -> . LPAREN expression RPAREN - - NUMBER shift and go to state 3 - LPAREN shift and go to state 2 - - -state 3 - - expression -> NUMBER . - - $ reduce using rule 5 - PLUS reduce using rule 5 - MINUS reduce using rule 5 - TIMES reduce using rule 5 - DIVIDE reduce using rule 5 - RPAREN reduce using rule 5 - - -state 4 - - expression -> expression TIMES . expression - expression -> . expression PLUS expression - expression -> . expression MINUS expression - expression -> . expression TIMES expression - expression -> . expression DIVIDE expression - expression -> . NUMBER - expression -> . LPAREN expression RPAREN - - NUMBER shift and go to state 3 - LPAREN shift and go to state 2 - - -state 5 - - expression -> expression MINUS . expression - expression -> . expression PLUS expression - expression -> . expression MINUS expression - expression -> . expression TIMES expression - expression -> . expression DIVIDE expression - expression -> . NUMBER - expression -> . LPAREN expression RPAREN - - NUMBER shift and go to state 3 - LPAREN shift and go to state 2 - - -state 6 - - expression -> expression PLUS . expression - expression -> . expression PLUS expression - expression -> . expression MINUS expression - expression -> . expression TIMES expression - expression -> . expression DIVIDE expression - expression -> . NUMBER - expression -> . LPAREN expression RPAREN - - NUMBER shift and go to state 3 - LPAREN shift and go to state 2 - - -state 7 - - expression -> expression DIVIDE . expression - expression -> . expression PLUS expression - expression -> . expression MINUS expression - expression -> . expression TIMES expression - expression -> . expression DIVIDE expression - expression -> . NUMBER - expression -> . LPAREN expression RPAREN - - NUMBER shift and go to state 3 - LPAREN shift and go to state 2 - - -state 8 - - expression -> LPAREN expression . RPAREN - expression -> expression . PLUS expression - expression -> expression . MINUS expression - expression -> expression . TIMES expression - expression -> expression . DIVIDE expression - - RPAREN shift and go to state 13 - PLUS shift and go to state 6 - MINUS shift and go to state 5 - TIMES shift and go to state 4 - DIVIDE shift and go to state 7 - - -state 9 - - expression -> expression TIMES expression . - expression -> expression . PLUS expression - expression -> expression . MINUS expression - expression -> expression . TIMES expression - expression -> expression . DIVIDE expression - - $ reduce using rule 3 - PLUS reduce using rule 3 - MINUS reduce using rule 3 - TIMES reduce using rule 3 - DIVIDE reduce using rule 3 - RPAREN reduce using rule 3 - - ! PLUS [ shift and go to state 6 ] - ! MINUS [ shift and go to state 5 ] - ! TIMES [ shift and go to state 4 ] - ! DIVIDE [ shift and go to state 7 ] - -state 10 - - expression -> expression MINUS expression . - expression -> expression . PLUS expression - expression -> expression . MINUS expression - expression -> expression . TIMES expression - expression -> expression . DIVIDE expression - - $ reduce using rule 2 - PLUS reduce using rule 2 - MINUS reduce using rule 2 - RPAREN reduce using rule 2 - TIMES shift and go to state 4 - DIVIDE shift and go to state 7 - - ! TIMES [ reduce using rule 2 ] - ! DIVIDE [ reduce using rule 2 ] - ! PLUS [ shift and go to state 6 ] - ! MINUS [ shift and go to state 5 ] - -state 11 - - expression -> expression PLUS expression . - expression -> expression . PLUS expression - expression -> expression . MINUS expression - expression -> expression . TIMES expression - expression -> expression . DIVIDE expression - - $ reduce using rule 1 - PLUS reduce using rule 1 - MINUS reduce using rule 1 - RPAREN reduce using rule 1 - TIMES shift and go to state 4 - DIVIDE shift and go to state 7 - - ! TIMES [ reduce using rule 1 ] - ! DIVIDE [ reduce using rule 1 ] - ! PLUS [ shift and go to state 6 ] - ! MINUS [ shift and go to state 5 ] - -state 12 - - expression -> expression DIVIDE expression . - expression -> expression . PLUS expression - expression -> expression . MINUS expression - expression -> expression . TIMES expression - expression -> expression . DIVIDE expression - - $ reduce using rule 4 - PLUS reduce using rule 4 - MINUS reduce using rule 4 - TIMES reduce using rule 4 - DIVIDE reduce using rule 4 - RPAREN reduce using rule 4 - - ! PLUS [ shift and go to state 6 ] - ! MINUS [ shift and go to state 5 ] - ! TIMES [ shift and go to state 4 ] - ! DIVIDE [ shift and go to state 7 ] - -state 13 - - expression -> LPAREN expression RPAREN . - - $ reduce using rule 6 - PLUS reduce using rule 6 - MINUS reduce using rule 6 - TIMES reduce using rule 6 - DIVIDE reduce using rule 6 - RPAREN reduce using rule 6 ---- -By looking at these rules (and with a little practice), you can usually track down the source -of most parsing conflicts. It should also be stressed that not all shift-reduce conflicts are -bad. However, the only way to be sure that they are resolved correctly is to look at parser.out. - -- ! TIMES [ reduce using rule 2 ] - ! DIVIDE [ reduce using rule 2 ] - ! PLUS [ shift and go to state 6 ] - ! MINUS [ shift and go to state 5 ] --6.8 Syntax Error Handling
- - -If you are creating a parser for production use, the handling of -syntax errors is important. As a general rule, you don't want a -parser to simply throw up its hands and stop at the first sign of -trouble. Instead, you want it to report the error, recover if possible, and -continue parsing so that all of the errors in the input get reported -to the user at once. This is the standard behavior found in compilers -for languages such as C, C++, and Java. - -In PLY, when a syntax error occurs during parsing, the error is immediately -detected (i.e., the parser does not read any more tokens beyond the -source of the error). However, at this point, the parser enters a -recovery mode that can be used to try and continue further parsing. -As a general rule, error recovery in LR parsers is a delicate -topic that involves ancient rituals and black-magic. The recovery mechanism -provided by yacc.py is comparable to Unix yacc so you may want -consult a book like O'Reilly's "Lex and Yacc" for some of the finer details. - --When a syntax error occurs, yacc.py performs the following steps: - -
-
- -- On the first occurrence of an error, the user-defined p_error() function -is called with the offending token as an argument. However, if the syntax error is due to -reaching the end-of-file, p_error() is called with an argument of None. -Afterwards, the parser enters -an "error-recovery" mode in which it will not make future calls to p_error() until it -has successfully shifted at least 3 tokens onto the parsing stack. - -
-
- If no recovery action is taken in p_error(), the offending lookahead token is replaced -with a special error token. - -
-
- If the offending lookahead token is already set to error, the top item of the parsing stack is -deleted. - -
-
- If the entire parsing stack is unwound, the parser enters a restart state and attempts to start -parsing from its initial state. - -
-
- If a grammar rule accepts error as a token, it will be -shifted onto the parsing stack. - -
-
- If the top item of the parsing stack is error, lookahead tokens will be discarded until the -parser can successfully shift a new symbol or reduce a rule involving error. -
6.8.1 Recovery and resynchronization with error rules
- - -The most well-behaved approach for handling syntax errors is to write grammar rules that include the error -token. For example, suppose your language had a grammar rule for a print statement like this: - --- -To account for the possibility of a bad expression, you might write an additional grammar rule like this: - --def p_statement_print(p): - 'statement : PRINT expr SEMI' - ... ---- -In this case, the error token will match any sequence of -tokens that might appear up to the first semicolon that is -encountered. Once the semicolon is reached, the rule will be -invoked and the error token will go away. - --def p_statement_print_error(p): - 'statement : PRINT error SEMI' - print "Syntax error in print statement. Bad expression" - ---This type of recovery is sometimes known as parser resynchronization. -The error token acts as a wildcard for any bad input text and -the token immediately following error acts as a -synchronization token. - -
-It is important to note that the error token usually does not appear as the last token -on the right in an error rule. For example: - -
-- -This is because the first bad token encountered will cause the rule to -be reduced--which may make it difficult to recover if more bad tokens -immediately follow. - --def p_statement_print_error(p): - 'statement : PRINT error' - print "Syntax error in print statement. Bad expression" --6.8.2 Panic mode recovery
- - -An alternative error recovery scheme is to enter a panic mode recovery in which tokens are -discarded to a point where the parser might be able to recover in some sensible manner. - --Panic mode recovery is implemented entirely in the p_error() function. For example, this -function starts discarding tokens until it reaches a closing '}'. Then, it restarts the -parser in its initial state. - -
-- --def p_error(p): - print "Whoa. You are seriously hosed." - # Read ahead looking for a closing '}' - while 1: - tok = yacc.token() # Get the next token - if not tok or tok.type == 'RBRACE': break - yacc.restart() ---This function simply discards the bad token and tells the parser that the error was ok. - -
-- --def p_error(p): - print "Syntax error at token", p.type - # Just discard the token and tell the parser it's okay. - yacc.errok() ---Within the p_error() function, three functions are available to control the behavior -of the parser: -
-
-
- -Note: these functions are only available when invoking p_error() and are not available -at any other time. - -- yacc.errok(). This resets the parser state so it doesn't think it's in error-recovery -mode. This will prevent an error token from being generated and will reset the internal -error counters so that the next syntax error will call p_error() again. - -
-
- yacc.token(). This returns the next token on the input stream. - -
-
- yacc.restart(). This discards the entire parsing stack and resets the parser -to its initial state. -
-To supply the next lookahead token to the parser, p_error() can return a token. This might be -useful if trying to synchronize on special characters. For example: - -
-- --def p_error(p): - # Read ahead looking for a terminating ";" - while 1: - tok = yacc.token() # Get the next token - if not tok or tok.type == 'SEMI': break - yacc.errok() - - # Return SEMI to the parser as the next lookahead token - return tok --6.8.3 Signaling an error from a production
- - -If necessary, a production rule can manually force the parser to enter error recovery. This -is done by raising the SyntaxError exception like this: - --- -The effect of raising SyntaxError is the same as if the last symbol shifted onto the -parsing stack was actually a syntax error. Thus, when you do this, the last symbol shifted is popped off -of the parsing stack and the current lookahead token is set to an error token. The parser -then enters error-recovery mode where it tries to reduce rules that can accept error tokens. -The steps that follow from this point are exactly the same as if a syntax error were detected and -p_error() were called. - --def p_production(p): - 'production : some production ...' - raise SyntaxError ---One important aspect of manually setting an error is that the p_error() function will NOT be -called in this case. If you need to issue an error message, make sure you do it in the production that -raises SyntaxError. - -
-Note: This feature of PLY is meant to mimic the behavior of the YYERROR macro in yacc. - - -
6.8.4 General comments on error handling
- - -For normal types of languages, error recovery with error rules and resynchronization characters is probably the most reliable -technique. This is because you can instrument the grammar to catch errors at selected places where it is relatively easy -to recover and continue parsing. Panic mode recovery is really only useful in certain specialized applications where you might want -to discard huge portions of the input text to find a valid restart point. - -6.9 Line Number and Position Tracking
- - -Position tracking is often a tricky problem when writing compilers. -By default, PLY tracks the line number and position of all tokens. -This information is available using the following functions: - --
- -For example: - -- p.lineno(num). Return the line number for symbol num -
- p.lexpos(num). Return the lexing position for symbol num -
-- -As an optional feature, yacc.py can automatically track line -numbers and positions for all of the grammar symbols as well. -However, this extra tracking requires extra processing and can -significantly slow down parsing. Therefore, it must be enabled by -passing the -tracking=True option to yacc.parse(). For example: - --def p_expression(p): - 'expression : expression PLUS expression' - line = p.lineno(2) # line number of the PLUS token - index = p.lexpos(2) # Position of the PLUS token ---- -Once enabled, the lineno() and lexpos() methods work -for all grammar symbols. In addition, two additional methods can be -used: - --yacc.parse(data,tracking=True) ---
- -For example: - -- p.linespan(num). Return a tuple (startline,endline) with the starting and ending line number for symbol num. -
- p.lexspan(num). Return a tuple (start,end) with the starting and ending positions for symbol num. -
-- -Note: The lexspan() function only returns the range of values up to the start of the last grammar symbol. - --def p_expression(p): - 'expression : expression PLUS expression' - p.lineno(1) # Line number of the left expression - p.lineno(2) # line number of the PLUS operator - p.lineno(3) # line number of the right expression - ... - start,end = p.linespan(3) # Start,end lines of the right expression - starti,endi = p.lexspan(3) # Start,end positions of right expression - ---Although it may be convenient for PLY to track position information on -all grammar symbols, this is often unnecessary. For example, if you -are merely using line number information in an error message, you can -often just key off of a specific token in the grammar rule. For -example: - -
-- --def p_bad_func(p): - 'funccall : fname LPAREN error RPAREN' - # Line number reported from LPAREN token - print "Bad function call at line", p.lineno(2) ---Similarly, you may get better parsing performance if you only -selectively propagate line number information where it's needed using -the p.set_lineno() method. For example: - -
-- -PLY doesn't retain line number information from rules that have already been -parsed. If you are building an abstract syntax tree and need to have line numbers, -you should make sure that the line numbers appear in the tree itself. - --def p_fname(p): - 'fname : ID' - p[0] = p[1] - p.set_lineno(0,p.lineno(1)) --6.10 AST Construction
- - -yacc.py provides no special functions for constructing an -abstract syntax tree. However, such construction is easy enough to do -on your own. - -A minimal way to construct a tree is to simply create and -propagate a tuple or list in each grammar rule function. There -are many possible ways to do this, but one example would be something -like this: - -
-- --def p_expression_binop(p): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - - p[0] = ('binary-expression',p[2],p[1],p[3]) - -def p_expression_group(p): - 'expression : LPAREN expression RPAREN' - p[0] = ('group-expression',p[2]) - -def p_expression_number(p): - 'expression : NUMBER' - p[0] = ('number-expression',p[1]) ---Another approach is to create a set of data structure for different -kinds of abstract syntax tree nodes and assign nodes to p[0] -in each rule. For example: - -
-- -The advantage to this approach is that it may make it easier to attach more complicated -semantics, type checking, code generation, and other features to the node classes. - --class Expr: pass - -class BinOp(Expr): - def __init__(self,left,op,right): - self.type = "binop" - self.left = left - self.right = right - self.op = op - -class Number(Expr): - def __init__(self,value): - self.type = "number" - self.value = value - -def p_expression_binop(p): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - - p[0] = BinOp(p[1],p[2],p[3]) - -def p_expression_group(p): - 'expression : LPAREN expression RPAREN' - p[0] = p[2] - -def p_expression_number(p): - 'expression : NUMBER' - p[0] = Number(p[1]) ---To simplify tree traversal, it may make sense to pick a very generic -tree structure for your parse tree nodes. For example: - -
-- --class Node: - def __init__(self,type,children=None,leaf=None): - self.type = type - if children: - self.children = children - else: - self.children = [ ] - self.leaf = leaf - -def p_expression_binop(p): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - - p[0] = Node("binop", [p[1],p[3]], p[2]) --6.11 Embedded Actions
- - -The parsing technique used by yacc only allows actions to be executed at the end of a rule. For example, -suppose you have a rule like this: - --- --def p_foo(p): - "foo : A B C D" - print "Parsed a foo", p[1],p[2],p[3],p[4] ---In this case, the supplied action code only executes after all of the -symbols A, B, C, and D have been -parsed. Sometimes, however, it is useful to execute small code -fragments during intermediate stages of parsing. For example, suppose -you wanted to perform some action immediately after A has -been parsed. To do this, write an empty rule like this: - -
-- --def p_foo(p): - "foo : A seen_A B C D" - print "Parsed a foo", p[1],p[3],p[4],p[5] - print "seen_A returned", p[2] - -def p_seen_A(p): - "seen_A :" - print "Saw an A = ", p[-1] # Access grammar symbol to left - p[0] = some_value # Assign value to seen_A - ---In this example, the empty seen_A rule executes immediately -after A is shifted onto the parsing stack. Within this -rule, p[-1] refers to the symbol on the stack that appears -immediately to the left of the seen_A symbol. In this case, -it would be the value of A in the foo rule -immediately above. Like other rules, a value can be returned from an -embedded action by simply assigning it to p[0] - -
-The use of embedded actions can sometimes introduce extra shift/reduce conflicts. For example, -this grammar has no conflicts: - -
-- -However, if you insert an embedded action into one of the rules like this, - --def p_foo(p): - """foo : abcd - | abcx""" - -def p_abcd(p): - "abcd : A B C D" - -def p_abcx(p): - "abcx : A B C X" ---- -an extra shift-reduce conflict will be introduced. This conflict is -caused by the fact that the same symbol C appears next in -both the abcd and abcx rules. The parser can either -shift the symbol (abcd rule) or reduce the empty -rule seen_AB (abcx rule). - --def p_foo(p): - """foo : abcd - | abcx""" - -def p_abcd(p): - "abcd : A B C D" - -def p_abcx(p): - "abcx : A B seen_AB C X" - -def p_seen_AB(p): - "seen_AB :" ---A common use of embedded rules is to control other aspects of parsing -such as scoping of local variables. For example, if you were parsing C code, you might -write code like this: - -
-- -In this case, the embedded action new_scope executes -immediately after a LBRACE ({) symbol is parsed. -This might adjust internal symbol tables and other aspects of the -parser. Upon completion of the rule statements_block, code -might undo the operations performed in the embedded action -(e.g., pop_scope()). - --def p_statements_block(p): - "statements: LBRACE new_scope statements RBRACE""" - # Action code - ... - pop_scope() # Return to previous scope - -def p_new_scope(p): - "new_scope :" - # Create a new scope for local variables - s = new_scope() - push_scope(s) - ... --6.12 Miscellaneous Yacc Notes
- - --
- -- The default parsing method is LALR. To use SLR instead, run yacc() as follows: - -
--Note: LALR table generation takes approximately twice as long as SLR table generation. There is no -difference in actual parsing performance---the same code is used in both cases. LALR is preferred when working -with more complicated grammars since it is more powerful. - --yacc.yacc(method="SLR") --- -
- By default, yacc.py relies on lex.py for tokenizing. However, an alternative tokenizer -can be supplied as follows: - -
--in this case, x must be a Lexer object that minimally has a x.token() method for retrieving the next -token. If an input string is given to yacc.parse(), the lexer must also have an x.input() method. - --yacc.parse(lexer=x) ---
- By default, the yacc generates tables in debugging mode (which produces the parser.out file and other output). -To disable this, use - -
-- --yacc.yacc(debug=0) ---
- To change the name of the parsetab.py file, use: - -
-- --yacc.yacc(tabmodule="foo") ---
- To change the directory in which the parsetab.py file (and other output files) are written, use: -
-- --yacc.yacc(tabmodule="foo",outputdir="somedirectory") ---
- To prevent yacc from generating any kind of parser table file, use: -
-- -Note: If you disable table generation, yacc() will regenerate the parsing tables -each time it runs (which may take awhile depending on how large your grammar is). - --yacc.yacc(write_tables=0) ---
- To print copious amounts of debugging during parsing, use: - -
-- --yacc.parse(debug=1) ---
- The yacc.yacc() function really returns a parser object. If you want to support multiple -parsers in the same application, do this: - -
-- -Note: The function yacc.parse() is bound to the last parser that was generated. - --p = yacc.yacc() -... -p.parse() ---
- Since the generation of the LALR tables is relatively expensive, previously generated tables are -cached and reused if possible. The decision to regenerate the tables is determined by taking an MD5 -checksum of all grammar rules and precedence rules. Only in the event of a mismatch are the tables regenerated. - -
-It should be noted that table generation is reasonably efficient, even for grammars that involve around a 100 rules -and several hundred states. For more complex languages such as C, table generation may take 30-60 seconds on a slow -machine. Please be patient. - -
-
- Since LR parsing is driven by tables, the performance of the parser is largely independent of the -size of the grammar. The biggest bottlenecks will be the lexer and the complexity of the code in your grammar rules. -
7. Multiple Parsers and Lexers
- - -In advanced parsing applications, you may want to have multiple -parsers and lexers. - --As a general rules this isn't a problem. However, to make it work, -you need to carefully make sure everything gets hooked up correctly. -First, make sure you save the objects returned by lex() and -yacc(). For example: - -
-- -Next, when parsing, make sure you give the parse() function a reference to the lexer it -should be using. For example: - --lexer = lex.lex() # Return lexer object -parser = yacc.yacc() # Return parser object ---- -If you forget to do this, the parser will use the last lexer -created--which is not always what you want. - --parser.parse(text,lexer=lexer) ---Within lexer and parser rule functions, these objects are also -available. In the lexer, the "lexer" attribute of a token refers to -the lexer object that triggered the rule. For example: - -
-- -In the parser, the "lexer" and "parser" attributes refer to the lexer -and parser objects respectively. - --def t_NUMBER(t): - r'\d+' - ... - print t.lexer # Show lexer object ---- -If necessary, arbitrary attributes can be attached to the lexer or parser object. -For example, if you wanted to have different parsing modes, you could attach a mode -attribute to the parser object and look at it later. - --def p_expr_plus(p): - 'expr : expr PLUS expr' - ... - print p.parser # Show parser object - print p.lexer # Show lexer object --8. Using Python's Optimized Mode
- - -Because PLY uses information from doc-strings, parsing and lexing -information must be gathered while running the Python interpreter in -normal mode (i.e., not with the -O or -OO options). However, if you -specify optimized mode like this: - --- -then PLY can later be used when Python runs in optimized mode. To make this work, -make sure you first run Python in normal mode. Once the lexing and parsing tables -have been generated the first time, run Python in optimized mode. PLY will use -the tables without the need for doc strings. - --lex.lex(optimize=1) -yacc.yacc(optimize=1) ---Beware: running PLY in optimized mode disables a lot of error -checking. You should only do this when your project has stabilized -and you don't need to do any debugging. One of the purposes of -optimized mode is to substantially decrease the startup time of -your compiler (by assuming that everything is already properly -specified and works). - -
9. Advanced Debugging
- - --Debugging a compiler is typically not an easy task. PLY provides some -advanced diagonistic capabilities through the use of Python's -logging module. The next two sections describe this: - -
9.1 Debugging the lex() and yacc() commands
- - --Both the lex() and yacc() commands have a debugging -mode that can be enabled using the debug flag. For example: - -
-- -Normally, the output produced by debugging is routed to either -standard error or, in the case of yacc(), to a file -parser.out. This output can be more carefully controlled -by supplying a logging object. Here is an example that adds -information about where different debugging messages are coming from: - --lex.lex(debug=True) -yacc.yacc(debug=True) ---- -If you supply a custom logger, the amount of debugging -information produced can be controlled by setting the logging level. -Typically, debugging messages are either issued at the DEBUG, -INFO, or WARNING levels. - --# Set up a logging object -import logging -logging.basicConfig( - level = logging.DEBUG, - filename = "parselog.txt", - filemode = "w", - format = "%(filename)10s:%(lineno)4d:%(message)s" -) -log = logging.getLogger() - -lex.lex(debug=True,debuglog=log) -yacc.yacc(debug=True,debuglog=log) ---PLY's error messages and warnings are also produced using the logging -interface. This can be controlled by passing a logging object -using the errorlog parameter. - -
-- -If you want to completely silence warnings, you can either pass in a -logging object with an appropriate filter level or use the NullLogger -object defined in either lex or yacc. For example: - --lex.lex(errorlog=log) -yacc.yacc(errorlog=log) ---- --yacc.yacc(errorlog=yacc.NullLogger()) --9.2 Run-time Debugging
- - --To enable run-time debugging of a parser, use the debug option to parse. This -option can either be an integer (which simply turns debugging on or off) or an instance -of a logger object. For example: - -
-- -If a logging object is passed, you can use its filtering level to control how much -output gets generated. The INFO level is used to produce information -about rule reductions. The DEBUG level will show information about the -parsing stack, token shifts, and other details. The ERROR level shows information -related to parsing errors. - --log = logging.getLogger() -parser.parse(input,debug=log) ---For very complicated problems, you should pass in a logging object that -redirects to a file where you can more easily inspect the output after -execution. - -
10. Where to go from here?
- - -The examples directory of the PLY distribution contains several simple examples. Please consult a -compilers textbook for the theory and underlying implementation details or LR parsing. - - - - - - - - - - diff --git a/ply/example/BASIC/README b/ply/example/BASIC/README deleted file mode 100644 index be24a30..0000000 --- a/ply/example/BASIC/README +++ /dev/null @@ -1,79 +0,0 @@ -Inspired by a September 14, 2006 Salon article "Why Johnny Can't Code" by -David Brin (http://www.salon.com/tech/feature/2006/09/14/basic/index.html), -I thought that a fully working BASIC interpreter might be an interesting, -if not questionable, PLY example. Uh, okay, so maybe it's just a bad idea, -but in any case, here it is. - -In this example, you'll find a rough implementation of 1964 Dartmouth BASIC -as described in the manual at: - - http://www.bitsavers.org/pdf/dartmouth/BASIC_Oct64.pdf - -See also: - - http://en.wikipedia.org/wiki/Dartmouth_BASIC - -This dialect is downright primitive---there are no string variables -and no facilities for interactive input. Moreover, subroutines and functions -are brain-dead even more than they usually are for BASIC. Of course, -the GOTO statement is provided. - -Nevertheless, there are a few interesting aspects of this example: - - - It illustrates a fully working interpreter including lexing, parsing, - and interpretation of instructions. - - - The parser shows how to catch and report various kinds of parsing - errors in a more graceful way. - - - The example both parses files (supplied on command line) and - interactive input entered line by line. - - - It shows how you might represent parsed information. In this case, - each BASIC statement is encoded into a Python tuple containing the - statement type and parameters. These tuples are then stored in - a dictionary indexed by program line numbers. - - - Even though it's just BASIC, the parser contains more than 80 - rules and 150 parsing states. Thus, it's a little more meaty than - the calculator example. - -To use the example, run it as follows: - - % python basic.py hello.bas - HELLO WORLD - % - -or use it interactively: - - % python basic.py - [BASIC] 10 PRINT "HELLO WORLD" - [BASIC] 20 END - [BASIC] RUN - HELLO WORLD - [BASIC] - -The following files are defined: - - basic.py - High level script that controls everything - basiclex.py - BASIC tokenizer - basparse.py - BASIC parser - basinterp.py - BASIC interpreter that runs parsed programs. - -In addition, a number of sample BASIC programs (.bas suffix) are -provided. These were taken out of the Dartmouth manual. - -Disclaimer: I haven't spent a ton of time testing this and it's likely that -I've skimped here and there on a few finer details (e.g., strictly enforcing -variable naming rules). However, the interpreter seems to be able to run -the examples in the BASIC manual. - -Have fun! - --Dave - - - - - - diff --git a/ply/example/BASIC/basic.py b/ply/example/BASIC/basic.py deleted file mode 100644 index b14483d..0000000 --- a/ply/example/BASIC/basic.py +++ /dev/null @@ -1,71 +0,0 @@ -# An implementation of Dartmouth BASIC (1964) -# - -import sys -sys.path.insert(0,"../..") - -if sys.version_info[0] >= 3: - raw_input = input - -import basiclex -import basparse -import basinterp - -# If a filename has been specified, we try to run it. -# If a runtime error occurs, we bail out and enter -# interactive mode below -if len(sys.argv) == 2: - data = open(sys.argv[1]).read() - prog = basparse.parse(data) - if not prog: raise SystemExit - b = basinterp.BasicInterpreter(prog) - try: - b.run() - raise SystemExit - except RuntimeError: - pass - -else: - b = basinterp.BasicInterpreter({}) - -# Interactive mode. This incrementally adds/deletes statements -# from the program stored in the BasicInterpreter object. In -# addition, special commands 'NEW','LIST',and 'RUN' are added. -# Specifying a line number with no code deletes that line from -# the program. - -while 1: - try: - line = raw_input("[BASIC] ") - except EOFError: - raise SystemExit - if not line: continue - line += "\n" - prog = basparse.parse(line) - if not prog: continue - - keys = list(prog) - if keys[0] > 0: - b.add_statements(prog) - else: - stat = prog[keys[0]] - if stat[0] == 'RUN': - try: - b.run() - except RuntimeError: - pass - elif stat[0] == 'LIST': - b.list() - elif stat[0] == 'BLANK': - b.del_line(stat[1]) - elif stat[0] == 'NEW': - b.new() - - - - - - - - - diff --git a/ply/example/BASIC/basiclex.py b/ply/example/BASIC/basiclex.py deleted file mode 100644 index 3d27cde..0000000 --- a/ply/example/BASIC/basiclex.py +++ /dev/null @@ -1,74 +0,0 @@ -# An implementation of Dartmouth BASIC (1964) - -from ply import * - -keywords = ( - 'LET','READ','DATA','PRINT','GOTO','IF','THEN','FOR','NEXT','TO','STEP', - 'END','STOP','DEF','GOSUB','DIM','REM','RETURN','RUN','LIST','NEW', -) - -tokens = keywords + ( - 'EQUALS','PLUS','MINUS','TIMES','DIVIDE','POWER', - 'LPAREN','RPAREN','LT','LE','GT','GE','NE', - 'COMMA','SEMI', 'INTEGER','FLOAT', 'STRING', - 'ID','NEWLINE' -) - -t_ignore = ' \t' - -def t_REM(t): - r'REM .*' - return t - -def t_ID(t): - r'[A-Z][A-Z0-9]*' - if t.value in keywords: - t.type = t.value - return t - -t_EQUALS = r'=' -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_POWER = r'\^' -t_DIVIDE = r'/' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_LT = r'<' -t_LE = r'<=' -t_GT = r'>' -t_GE = r'>=' -t_NE = r'<>' -t_COMMA = r'\,' -t_SEMI = r';' -t_INTEGER = r'\d+' -t_FLOAT = r'((\d*\.\d+)(E[\+-]?\d+)?|([1-9]\d*E[\+-]?\d+))' -t_STRING = r'\".*?\"' - -def t_NEWLINE(t): - r'\n' - t.lexer.lineno += 1 - return t - -def t_error(t): - print("Illegal character %s" % t.value[0]) - t.lexer.skip(1) - -lex.lex(debug=0) - - - - - - - - - - - - - - - - - diff --git a/ply/example/BASIC/basiclog.py b/ply/example/BASIC/basiclog.py deleted file mode 100644 index ccfd7b9..0000000 --- a/ply/example/BASIC/basiclog.py +++ /dev/null @@ -1,79 +0,0 @@ -# An implementation of Dartmouth BASIC (1964) -# - -import sys -sys.path.insert(0,"../..") - -if sys.version_info[0] >= 3: - raw_input = input - -import logging -logging.basicConfig( - level = logging.INFO, - filename = "parselog.txt", - filemode = "w" -) -log = logging.getLogger() - -import basiclex -import basparse -import basinterp - -# If a filename has been specified, we try to run it. -# If a runtime error occurs, we bail out and enter -# interactive mode below -if len(sys.argv) == 2: - data = open(sys.argv[1]).read() - prog = basparse.parse(data,debug=log) - if not prog: raise SystemExit - b = basinterp.BasicInterpreter(prog) - try: - b.run() - raise SystemExit - except RuntimeError: - pass - -else: - b = basinterp.BasicInterpreter({}) - -# Interactive mode. This incrementally adds/deletes statements -# from the program stored in the BasicInterpreter object. In -# addition, special commands 'NEW','LIST',and 'RUN' are added. -# Specifying a line number with no code deletes that line from -# the program. - -while 1: - try: - line = raw_input("[BASIC] ") - except EOFError: - raise SystemExit - if not line: continue - line += "\n" - prog = basparse.parse(line,debug=log) - if not prog: continue - - keys = list(prog) - if keys[0] > 0: - b.add_statements(prog) - else: - stat = prog[keys[0]] - if stat[0] == 'RUN': - try: - b.run() - except RuntimeError: - pass - elif stat[0] == 'LIST': - b.list() - elif stat[0] == 'BLANK': - b.del_line(stat[1]) - elif stat[0] == 'NEW': - b.new() - - - - - - - - - diff --git a/ply/example/BASIC/basinterp.py b/ply/example/BASIC/basinterp.py deleted file mode 100644 index 3e8a777..0000000 --- a/ply/example/BASIC/basinterp.py +++ /dev/null @@ -1,441 +0,0 @@ -# This file provides the runtime support for running a basic program -# Assumes the program has been parsed using basparse.py - -import sys -import math -import random - -class BasicInterpreter: - - # Initialize the interpreter. prog is a dictionary - # containing (line,statement) mappings - def __init__(self,prog): - self.prog = prog - - self.functions = { # Built-in function table - 'SIN' : lambda z: math.sin(self.eval(z)), - 'COS' : lambda z: math.cos(self.eval(z)), - 'TAN' : lambda z: math.tan(self.eval(z)), - 'ATN' : lambda z: math.atan(self.eval(z)), - 'EXP' : lambda z: math.exp(self.eval(z)), - 'ABS' : lambda z: abs(self.eval(z)), - 'LOG' : lambda z: math.log(self.eval(z)), - 'SQR' : lambda z: math.sqrt(self.eval(z)), - 'INT' : lambda z: int(self.eval(z)), - 'RND' : lambda z: random.random() - } - - # Collect all data statements - def collect_data(self): - self.data = [] - for lineno in self.stat: - if self.prog[lineno][0] == 'DATA': - self.data = self.data + self.prog[lineno][1] - self.dc = 0 # Initialize the data counter - - # Check for end statements - def check_end(self): - has_end = 0 - for lineno in self.stat: - if self.prog[lineno][0] == 'END' and not has_end: - has_end = lineno - if not has_end: - print("NO END INSTRUCTION") - self.error = 1 - return - if has_end != lineno: - print("END IS NOT LAST") - self.error = 1 - - # Check loops - def check_loops(self): - for pc in range(len(self.stat)): - lineno = self.stat[pc] - if self.prog[lineno][0] == 'FOR': - forinst = self.prog[lineno] - loopvar = forinst[1] - for i in range(pc+1,len(self.stat)): - if self.prog[self.stat[i]][0] == 'NEXT': - nextvar = self.prog[self.stat[i]][1] - if nextvar != loopvar: continue - self.loopend[pc] = i - break - else: - print("FOR WITHOUT NEXT AT LINE %s" % self.stat[pc]) - self.error = 1 - - # Evaluate an expression - def eval(self,expr): - etype = expr[0] - if etype == 'NUM': return expr[1] - elif etype == 'GROUP': return self.eval(expr[1]) - elif etype == 'UNARY': - if expr[1] == '-': return -self.eval(expr[2]) - elif etype == 'BINOP': - if expr[1] == '+': return self.eval(expr[2])+self.eval(expr[3]) - elif expr[1] == '-': return self.eval(expr[2])-self.eval(expr[3]) - elif expr[1] == '*': return self.eval(expr[2])*self.eval(expr[3]) - elif expr[1] == '/': return float(self.eval(expr[2]))/self.eval(expr[3]) - elif expr[1] == '^': return abs(self.eval(expr[2]))**self.eval(expr[3]) - elif etype == 'VAR': - var,dim1,dim2 = expr[1] - if not dim1 and not dim2: - if var in self.vars: - return self.vars[var] - else: - print("UNDEFINED VARIABLE %s AT LINE %s" % (var, self.stat[self.pc])) - raise RuntimeError - # May be a list lookup or a function evaluation - if dim1 and not dim2: - if var in self.functions: - # A function - return self.functions[var](dim1) - else: - # A list evaluation - if var in self.lists: - dim1val = self.eval(dim1) - if dim1val < 1 or dim1val > len(self.lists[var]): - print("LIST INDEX OUT OF BOUNDS AT LINE %s" % self.stat[self.pc]) - raise RuntimeError - return self.lists[var][dim1val-1] - if dim1 and dim2: - if var in self.tables: - dim1val = self.eval(dim1) - dim2val = self.eval(dim2) - if dim1val < 1 or dim1val > len(self.tables[var]) or dim2val < 1 or dim2val > len(self.tables[var][0]): - print("TABLE INDEX OUT OUT BOUNDS AT LINE %s" % self.stat[self.pc]) - raise RuntimeError - return self.tables[var][dim1val-1][dim2val-1] - print("UNDEFINED VARIABLE %s AT LINE %s" % (var, self.stat[self.pc])) - raise RuntimeError - - # Evaluate a relational expression - def releval(self,expr): - etype = expr[1] - lhs = self.eval(expr[2]) - rhs = self.eval(expr[3]) - if etype == '<': - if lhs < rhs: return 1 - else: return 0 - - elif etype == '<=': - if lhs <= rhs: return 1 - else: return 0 - - elif etype == '>': - if lhs > rhs: return 1 - else: return 0 - - elif etype == '>=': - if lhs >= rhs: return 1 - else: return 0 - - elif etype == '=': - if lhs == rhs: return 1 - else: return 0 - - elif etype == '<>': - if lhs != rhs: return 1 - else: return 0 - - # Assignment - def assign(self,target,value): - var, dim1, dim2 = target - if not dim1 and not dim2: - self.vars[var] = self.eval(value) - elif dim1 and not dim2: - # List assignment - dim1val = self.eval(dim1) - if not var in self.lists: - self.lists[var] = [0]*10 - - if dim1val > len(self.lists[var]): - print ("DIMENSION TOO LARGE AT LINE %s" % self.stat[self.pc]) - raise RuntimeError - self.lists[var][dim1val-1] = self.eval(value) - elif dim1 and dim2: - dim1val = self.eval(dim1) - dim2val = self.eval(dim2) - if not var in self.tables: - temp = [0]*10 - v = [] - for i in range(10): v.append(temp[:]) - self.tables[var] = v - # Variable already exists - if dim1val > len(self.tables[var]) or dim2val > len(self.tables[var][0]): - print("DIMENSION TOO LARGE AT LINE %s" % self.stat[self.pc]) - raise RuntimeError - self.tables[var][dim1val-1][dim2val-1] = self.eval(value) - - # Change the current line number - def goto(self,linenum): - if not linenum in self.prog: - print("UNDEFINED LINE NUMBER %d AT LINE %d" % (linenum, self.stat[self.pc])) - raise RuntimeError - self.pc = self.stat.index(linenum) - - # Run it - def run(self): - self.vars = { } # All variables - self.lists = { } # List variables - self.tables = { } # Tables - self.loops = [ ] # Currently active loops - self.loopend= { } # Mapping saying where loops end - self.gosub = None # Gosub return point (if any) - self.error = 0 # Indicates program error - - self.stat = list(self.prog) # Ordered list of all line numbers - self.stat.sort() - self.pc = 0 # Current program counter - - # Processing prior to running - - self.collect_data() # Collect all of the data statements - self.check_end() - self.check_loops() - - if self.error: raise RuntimeError - - while 1: - line = self.stat[self.pc] - instr = self.prog[line] - - op = instr[0] - - # END and STOP statements - if op == 'END' or op == 'STOP': - break # We're done - - # GOTO statement - elif op == 'GOTO': - newline = instr[1] - self.goto(newline) - continue - - # PRINT statement - elif op == 'PRINT': - plist = instr[1] - out = "" - for label,val in plist: - if out: - out += ' '*(15 - (len(out) % 15)) - out += label - if val: - if label: out += " " - eval = self.eval(val) - out += str(eval) - sys.stdout.write(out) - end = instr[2] - if not (end == ',' or end == ';'): - sys.stdout.write("\n") - if end == ',': sys.stdout.write(" "*(15-(len(out) % 15))) - if end == ';': sys.stdout.write(" "*(3-(len(out) % 3))) - - # LET statement - elif op == 'LET': - target = instr[1] - value = instr[2] - self.assign(target,value) - - # READ statement - elif op == 'READ': - for target in instr[1]: - if self.dc < len(self.data): - value = ('NUM',self.data[self.dc]) - self.assign(target,value) - self.dc += 1 - else: - # No more data. Program ends - return - elif op == 'IF': - relop = instr[1] - newline = instr[2] - if (self.releval(relop)): - self.goto(newline) - continue - - elif op == 'FOR': - loopvar = instr[1] - initval = instr[2] - finval = instr[3] - stepval = instr[4] - - # Check to see if this is a new loop - if not self.loops or self.loops[-1][0] != self.pc: - # Looks like a new loop. Make the initial assignment - newvalue = initval - self.assign((loopvar,None,None),initval) - if not stepval: stepval = ('NUM',1) - stepval = self.eval(stepval) # Evaluate step here - self.loops.append((self.pc,stepval)) - else: - # It's a repeat of the previous loop - # Update the value of the loop variable according to the step - stepval = ('NUM',self.loops[-1][1]) - newvalue = ('BINOP','+',('VAR',(loopvar,None,None)),stepval) - - if self.loops[-1][1] < 0: relop = '>=' - else: relop = '<=' - if not self.releval(('RELOP',relop,newvalue,finval)): - # Loop is done. Jump to the NEXT - self.pc = self.loopend[self.pc] - self.loops.pop() - else: - self.assign((loopvar,None,None),newvalue) - - elif op == 'NEXT': - if not self.loops: - print("NEXT WITHOUT FOR AT LINE %s" % line) - return - - nextvar = instr[1] - self.pc = self.loops[-1][0] - loopinst = self.prog[self.stat[self.pc]] - forvar = loopinst[1] - if nextvar != forvar: - print("NEXT DOESN'T MATCH FOR AT LINE %s" % line) - return - continue - elif op == 'GOSUB': - newline = instr[1] - if self.gosub: - print("ALREADY IN A SUBROUTINE AT LINE %s" % line) - return - self.gosub = self.stat[self.pc] - self.goto(newline) - continue - - elif op == 'RETURN': - if not self.gosub: - print("RETURN WITHOUT A GOSUB AT LINE %s" % line) - return - self.goto(self.gosub) - self.gosub = None - - elif op == 'FUNC': - fname = instr[1] - pname = instr[2] - expr = instr[3] - def eval_func(pvalue,name=pname,self=self,expr=expr): - self.assign((pname,None,None),pvalue) - return self.eval(expr) - self.functions[fname] = eval_func - - elif op == 'DIM': - for vname,x,y in instr[1]: - if y == 0: - # Single dimension variable - self.lists[vname] = [0]*x - else: - # Double dimension variable - temp = [0]*y - v = [] - for i in range(x): - v.append(temp[:]) - self.tables[vname] = v - - self.pc += 1 - - # Utility functions for program listing - def expr_str(self,expr): - etype = expr[0] - if etype == 'NUM': return str(expr[1]) - elif etype == 'GROUP': return "(%s)" % self.expr_str(expr[1]) - elif etype == 'UNARY': - if expr[1] == '-': return "-"+str(expr[2]) - elif etype == 'BINOP': - return "%s %s %s" % (self.expr_str(expr[2]),expr[1],self.expr_str(expr[3])) - elif etype == 'VAR': - return self.var_str(expr[1]) - - def relexpr_str(self,expr): - return "%s %s %s" % (self.expr_str(expr[2]),expr[1],self.expr_str(expr[3])) - - def var_str(self,var): - varname,dim1,dim2 = var - if not dim1 and not dim2: return varname - if dim1 and not dim2: return "%s(%s)" % (varname, self.expr_str(dim1)) - return "%s(%s,%s)" % (varname, self.expr_str(dim1),self.expr_str(dim2)) - - # Create a program listing - def list(self): - stat = list(self.prog) # Ordered list of all line numbers - stat.sort() - for line in stat: - instr = self.prog[line] - op = instr[0] - if op in ['END','STOP','RETURN']: - print("%s %s" % (line, op)) - continue - elif op == 'REM': - print("%s %s" % (line, instr[1])) - elif op == 'PRINT': - _out = "%s %s " % (line, op) - first = 1 - for p in instr[1]: - if not first: _out += ", " - if p[0] and p[1]: _out += '"%s"%s' % (p[0],self.expr_str(p[1])) - elif p[1]: _out += self.expr_str(p[1]) - else: _out += '"%s"' % (p[0],) - first = 0 - if instr[2]: _out += instr[2] - print(_out) - elif op == 'LET': - print("%s LET %s = %s" % (line,self.var_str(instr[1]),self.expr_str(instr[2]))) - elif op == 'READ': - _out = "%s READ " % line - first = 1 - for r in instr[1]: - if not first: _out += "," - _out += self.var_str(r) - first = 0 - print(_out) - elif op == 'IF': - print("%s IF %s THEN %d" % (line,self.relexpr_str(instr[1]),instr[2])) - elif op == 'GOTO' or op == 'GOSUB': - print("%s %s %s" % (line, op, instr[1])) - elif op == 'FOR': - _out = "%s FOR %s = %s TO %s" % (line,instr[1],self.expr_str(instr[2]),self.expr_str(instr[3])) - if instr[4]: _out += " STEP %s" % (self.expr_str(instr[4])) - print(_out) - elif op == 'NEXT': - print("%s NEXT %s" % (line, instr[1])) - elif op == 'FUNC': - print("%s DEF %s(%s) = %s" % (line,instr[1],instr[2],self.expr_str(instr[3]))) - elif op == 'DIM': - _out = "%s DIM " % line - first = 1 - for vname,x,y in instr[1]: - if not first: _out += "," - first = 0 - if y == 0: - _out += "%s(%d)" % (vname,x) - else: - _out += "%s(%d,%d)" % (vname,x,y) - - print(_out) - elif op == 'DATA': - _out = "%s DATA " % line - first = 1 - for v in instr[1]: - if not first: _out += "," - first = 0 - _out += v - print(_out) - - # Erase the current program - def new(self): - self.prog = {} - - # Insert statements - def add_statements(self,prog): - for line,stat in prog.items(): - self.prog[line] = stat - - # Delete a statement - def del_line(self,lineno): - try: - del self.prog[lineno] - except KeyError: - pass - diff --git a/ply/example/BASIC/basparse.py b/ply/example/BASIC/basparse.py deleted file mode 100644 index ccdeb16..0000000 --- a/ply/example/BASIC/basparse.py +++ /dev/null @@ -1,424 +0,0 @@ -# An implementation of Dartmouth BASIC (1964) -# - -from ply import * -import basiclex - -tokens = basiclex.tokens - -precedence = ( - ('left', 'PLUS','MINUS'), - ('left', 'TIMES','DIVIDE'), - ('left', 'POWER'), - ('right','UMINUS') -) - -#### A BASIC program is a series of statements. We represent the program as a -#### dictionary of tuples indexed by line number. - -def p_program(p): - '''program : program statement - | statement''' - - if len(p) == 2 and p[1]: - p[0] = { } - line,stat = p[1] - p[0][line] = stat - elif len(p) ==3: - p[0] = p[1] - if not p[0]: p[0] = { } - if p[2]: - line,stat = p[2] - p[0][line] = stat - -#### This catch-all rule is used for any catastrophic errors. In this case, -#### we simply return nothing - -def p_program_error(p): - '''program : error''' - p[0] = None - p.parser.error = 1 - -#### Format of all BASIC statements. - -def p_statement(p): - '''statement : INTEGER command NEWLINE''' - if isinstance(p[2],str): - print("%s %s %s" % (p[2],"AT LINE", p[1])) - p[0] = None - p.parser.error = 1 - else: - lineno = int(p[1]) - p[0] = (lineno,p[2]) - -#### Interactive statements. - -def p_statement_interactive(p): - '''statement : RUN NEWLINE - | LIST NEWLINE - | NEW NEWLINE''' - p[0] = (0, (p[1],0)) - -#### Blank line number -def p_statement_blank(p): - '''statement : INTEGER NEWLINE''' - p[0] = (0,('BLANK',int(p[1]))) - -#### Error handling for malformed statements - -def p_statement_bad(p): - '''statement : INTEGER error NEWLINE''' - print("MALFORMED STATEMENT AT LINE %s" % p[1]) - p[0] = None - p.parser.error = 1 - -#### Blank line - -def p_statement_newline(p): - '''statement : NEWLINE''' - p[0] = None - -#### LET statement - -def p_command_let(p): - '''command : LET variable EQUALS expr''' - p[0] = ('LET',p[2],p[4]) - -def p_command_let_bad(p): - '''command : LET variable EQUALS error''' - p[0] = "BAD EXPRESSION IN LET" - -#### READ statement - -def p_command_read(p): - '''command : READ varlist''' - p[0] = ('READ',p[2]) - -def p_command_read_bad(p): - '''command : READ error''' - p[0] = "MALFORMED VARIABLE LIST IN READ" - -#### DATA statement - -def p_command_data(p): - '''command : DATA numlist''' - p[0] = ('DATA',p[2]) - -def p_command_data_bad(p): - '''command : DATA error''' - p[0] = "MALFORMED NUMBER LIST IN DATA" - -#### PRINT statement - -def p_command_print(p): - '''command : PRINT plist optend''' - p[0] = ('PRINT',p[2],p[3]) - -def p_command_print_bad(p): - '''command : PRINT error''' - p[0] = "MALFORMED PRINT STATEMENT" - -#### Optional ending on PRINT. Either a comma (,) or semicolon (;) - -def p_optend(p): - '''optend : COMMA - | SEMI - |''' - if len(p) == 2: - p[0] = p[1] - else: - p[0] = None - -#### PRINT statement with no arguments - -def p_command_print_empty(p): - '''command : PRINT''' - p[0] = ('PRINT',[],None) - -#### GOTO statement - -def p_command_goto(p): - '''command : GOTO INTEGER''' - p[0] = ('GOTO',int(p[2])) - -def p_command_goto_bad(p): - '''command : GOTO error''' - p[0] = "INVALID LINE NUMBER IN GOTO" - -#### IF-THEN statement - -def p_command_if(p): - '''command : IF relexpr THEN INTEGER''' - p[0] = ('IF',p[2],int(p[4])) - -def p_command_if_bad(p): - '''command : IF error THEN INTEGER''' - p[0] = "BAD RELATIONAL EXPRESSION" - -def p_command_if_bad2(p): - '''command : IF relexpr THEN error''' - p[0] = "INVALID LINE NUMBER IN THEN" - -#### FOR statement - -def p_command_for(p): - '''command : FOR ID EQUALS expr TO expr optstep''' - p[0] = ('FOR',p[2],p[4],p[6],p[7]) - -def p_command_for_bad_initial(p): - '''command : FOR ID EQUALS error TO expr optstep''' - p[0] = "BAD INITIAL VALUE IN FOR STATEMENT" - -def p_command_for_bad_final(p): - '''command : FOR ID EQUALS expr TO error optstep''' - p[0] = "BAD FINAL VALUE IN FOR STATEMENT" - -def p_command_for_bad_step(p): - '''command : FOR ID EQUALS expr TO expr STEP error''' - p[0] = "MALFORMED STEP IN FOR STATEMENT" - -#### Optional STEP qualifier on FOR statement - -def p_optstep(p): - '''optstep : STEP expr - | empty''' - if len(p) == 3: - p[0] = p[2] - else: - p[0] = None - -#### NEXT statement - -def p_command_next(p): - '''command : NEXT ID''' - - p[0] = ('NEXT',p[2]) - -def p_command_next_bad(p): - '''command : NEXT error''' - p[0] = "MALFORMED NEXT" - -#### END statement - -def p_command_end(p): - '''command : END''' - p[0] = ('END',) - -#### REM statement - -def p_command_rem(p): - '''command : REM''' - p[0] = ('REM',p[1]) - -#### STOP statement - -def p_command_stop(p): - '''command : STOP''' - p[0] = ('STOP',) - -#### DEF statement - -def p_command_def(p): - '''command : DEF ID LPAREN ID RPAREN EQUALS expr''' - p[0] = ('FUNC',p[2],p[4],p[7]) - -def p_command_def_bad_rhs(p): - '''command : DEF ID LPAREN ID RPAREN EQUALS error''' - p[0] = "BAD EXPRESSION IN DEF STATEMENT" - -def p_command_def_bad_arg(p): - '''command : DEF ID LPAREN error RPAREN EQUALS expr''' - p[0] = "BAD ARGUMENT IN DEF STATEMENT" - -#### GOSUB statement - -def p_command_gosub(p): - '''command : GOSUB INTEGER''' - p[0] = ('GOSUB',int(p[2])) - -def p_command_gosub_bad(p): - '''command : GOSUB error''' - p[0] = "INVALID LINE NUMBER IN GOSUB" - -#### RETURN statement - -def p_command_return(p): - '''command : RETURN''' - p[0] = ('RETURN',) - -#### DIM statement - -def p_command_dim(p): - '''command : DIM dimlist''' - p[0] = ('DIM',p[2]) - -def p_command_dim_bad(p): - '''command : DIM error''' - p[0] = "MALFORMED VARIABLE LIST IN DIM" - -#### List of variables supplied to DIM statement - -def p_dimlist(p): - '''dimlist : dimlist COMMA dimitem - | dimitem''' - if len(p) == 4: - p[0] = p[1] - p[0].append(p[3]) - else: - p[0] = [p[1]] - -#### DIM items - -def p_dimitem_single(p): - '''dimitem : ID LPAREN INTEGER RPAREN''' - p[0] = (p[1],eval(p[3]),0) - -def p_dimitem_double(p): - '''dimitem : ID LPAREN INTEGER COMMA INTEGER RPAREN''' - p[0] = (p[1],eval(p[3]),eval(p[5])) - -#### Arithmetic expressions - -def p_expr_binary(p): - '''expr : expr PLUS expr - | expr MINUS expr - | expr TIMES expr - | expr DIVIDE expr - | expr POWER expr''' - - p[0] = ('BINOP',p[2],p[1],p[3]) - -def p_expr_number(p): - '''expr : INTEGER - | FLOAT''' - p[0] = ('NUM',eval(p[1])) - -def p_expr_variable(p): - '''expr : variable''' - p[0] = ('VAR',p[1]) - -def p_expr_group(p): - '''expr : LPAREN expr RPAREN''' - p[0] = ('GROUP',p[2]) - -def p_expr_unary(p): - '''expr : MINUS expr %prec UMINUS''' - p[0] = ('UNARY','-',p[2]) - -#### Relational expressions - -def p_relexpr(p): - '''relexpr : expr LT expr - | expr LE expr - | expr GT expr - | expr GE expr - | expr EQUALS expr - | expr NE expr''' - p[0] = ('RELOP',p[2],p[1],p[3]) - -#### Variables - -def p_variable(p): - '''variable : ID - | ID LPAREN expr RPAREN - | ID LPAREN expr COMMA expr RPAREN''' - if len(p) == 2: - p[0] = (p[1],None,None) - elif len(p) == 5: - p[0] = (p[1],p[3],None) - else: - p[0] = (p[1],p[3],p[5]) - -#### Builds a list of variable targets as a Python list - -def p_varlist(p): - '''varlist : varlist COMMA variable - | variable''' - if len(p) > 2: - p[0] = p[1] - p[0].append(p[3]) - else: - p[0] = [p[1]] - - -#### Builds a list of numbers as a Python list - -def p_numlist(p): - '''numlist : numlist COMMA number - | number''' - - if len(p) > 2: - p[0] = p[1] - p[0].append(p[3]) - else: - p[0] = [p[1]] - -#### A number. May be an integer or a float - -def p_number(p): - '''number : INTEGER - | FLOAT''' - p[0] = eval(p[1]) - -#### A signed number. - -def p_number_signed(p): - '''number : MINUS INTEGER - | MINUS FLOAT''' - p[0] = eval("-"+p[2]) - -#### List of targets for a print statement -#### Returns a list of tuples (label,expr) - -def p_plist(p): - '''plist : plist COMMA pitem - | pitem''' - if len(p) > 3: - p[0] = p[1] - p[0].append(p[3]) - else: - p[0] = [p[1]] - -def p_item_string(p): - '''pitem : STRING''' - p[0] = (p[1][1:-1],None) - -def p_item_string_expr(p): - '''pitem : STRING expr''' - p[0] = (p[1][1:-1],p[2]) - -def p_item_expr(p): - '''pitem : expr''' - p[0] = ("",p[1]) - -#### Empty - -def p_empty(p): - '''empty : ''' - -#### Catastrophic error handler -def p_error(p): - if not p: - print("SYNTAX ERROR AT EOF") - -bparser = yacc.yacc() - -def parse(data,debug=0): - bparser.error = 0 - p = bparser.parse(data,debug=debug) - if bparser.error: return None - return p - - - - - - - - - - - - - - diff --git a/ply/example/BASIC/dim.bas b/ply/example/BASIC/dim.bas deleted file mode 100644 index 87bd95b..0000000 --- a/ply/example/BASIC/dim.bas +++ /dev/null @@ -1,14 +0,0 @@ -5 DIM A(50,15) -10 FOR I = 1 TO 50 -20 FOR J = 1 TO 15 -30 LET A(I,J) = I + J -35 REM PRINT I,J, A(I,J) -40 NEXT J -50 NEXT I -100 FOR I = 1 TO 50 -110 FOR J = 1 TO 15 -120 PRINT A(I,J), -130 NEXT J -140 PRINT -150 NEXT I -999 END diff --git a/ply/example/BASIC/func.bas b/ply/example/BASIC/func.bas deleted file mode 100644 index 447ee16..0000000 --- a/ply/example/BASIC/func.bas +++ /dev/null @@ -1,5 +0,0 @@ -10 DEF FDX(X) = 2*X -20 FOR I = 0 TO 100 -30 PRINT FDX(I) -40 NEXT I -50 END diff --git a/ply/example/BASIC/gcd.bas b/ply/example/BASIC/gcd.bas deleted file mode 100644 index d0b7746..0000000 --- a/ply/example/BASIC/gcd.bas +++ /dev/null @@ -1,22 +0,0 @@ -10 PRINT "A","B","C","GCD" -20 READ A,B,C -30 LET X = A -40 LET Y = B -50 GOSUB 200 -60 LET X = G -70 LET Y = C -80 GOSUB 200 -90 PRINT A, B, C, G -100 GOTO 20 -110 DATA 60, 90, 120 -120 DATA 38456, 64872, 98765 -130 DATA 32, 384, 72 -200 LET Q = INT(X/Y) -210 LET R = X - Q*Y -220 IF R = 0 THEN 300 -230 LET X = Y -240 LET Y = R -250 GOTO 200 -300 LET G = Y -310 RETURN -999 END diff --git a/ply/example/BASIC/gosub.bas b/ply/example/BASIC/gosub.bas deleted file mode 100644 index 99737b1..0000000 --- a/ply/example/BASIC/gosub.bas +++ /dev/null @@ -1,13 +0,0 @@ -100 LET X = 3 -110 GOSUB 400 -120 PRINT U, V, W -200 LET X = 5 -210 GOSUB 400 -220 LET Z = U + 2*V + 3*W -230 PRINT Z -240 GOTO 999 -400 LET U = X*X -410 LET V = X*X*X -420 LET W = X*X*X*X + X*X*X + X*X + X -430 RETURN -999 END diff --git a/ply/example/BASIC/hello.bas b/ply/example/BASIC/hello.bas deleted file mode 100644 index cc6f0b0..0000000 --- a/ply/example/BASIC/hello.bas +++ /dev/null @@ -1,4 +0,0 @@ -5 REM HELLO WORLD PROGAM -10 PRINT "HELLO WORLD" -99 END - diff --git a/ply/example/BASIC/linear.bas b/ply/example/BASIC/linear.bas deleted file mode 100644 index 56c0822..0000000 --- a/ply/example/BASIC/linear.bas +++ /dev/null @@ -1,17 +0,0 @@ -1 REM ::: SOLVE A SYSTEM OF LINEAR EQUATIONS -2 REM ::: A1*X1 + A2*X2 = B1 -3 REM ::: A3*X1 + A4*X2 = B2 -4 REM -------------------------------------- -10 READ A1, A2, A3, A4 -15 LET D = A1 * A4 - A3 * A2 -20 IF D = 0 THEN 65 -30 READ B1, B2 -37 LET X1 = (B1*A4 - B2*A2) / D -42 LET X2 = (A1*B2 - A3*B1) / D -55 PRINT X1, X2 -60 GOTO 30 -65 PRINT "NO UNIQUE SOLUTION" -70 DATA 1, 2, 4 -80 DATA 2, -7, 5 -85 DATA 1, 3, 4, -7 -90 END diff --git a/ply/example/BASIC/maxsin.bas b/ply/example/BASIC/maxsin.bas deleted file mode 100644 index b969015..0000000 --- a/ply/example/BASIC/maxsin.bas +++ /dev/null @@ -1,12 +0,0 @@ -5 PRINT "X VALUE", "SINE", "RESOLUTION" -10 READ D -20 LET M = -1 -30 FOR X = 0 TO 3 STEP D -40 IF SIN(X) <= M THEN 80 -50 LET X0 = X -60 LET M = SIN(X) -80 NEXT X -85 PRINT X0, M, D -90 GOTO 10 -100 DATA .1, .01, .001 -110 END diff --git a/ply/example/BASIC/powers.bas b/ply/example/BASIC/powers.bas deleted file mode 100644 index a454dc3..0000000 --- a/ply/example/BASIC/powers.bas +++ /dev/null @@ -1,13 +0,0 @@ -5 PRINT "THIS PROGRAM COMPUTES AND PRINTS THE NTH POWERS" -6 PRINT "OF THE NUMBERS LESS THAN OR EQUAL TO N FOR VARIOUS" -7 PRINT "N FROM 1 THROUGH 7" -8 PRINT -10 FOR N = 1 TO 7 -15 PRINT "N = "N -20 FOR I = 1 TO N -30 PRINT I^N, -40 NEXT I -50 PRINT -60 PRINT -70 NEXT N -80 END diff --git a/ply/example/BASIC/rand.bas b/ply/example/BASIC/rand.bas deleted file mode 100644 index 4ff7a14..0000000 --- a/ply/example/BASIC/rand.bas +++ /dev/null @@ -1,4 +0,0 @@ -10 FOR I = 1 TO 20 -20 PRINT INT(10*RND(0)) -30 NEXT I -40 END diff --git a/ply/example/BASIC/sales.bas b/ply/example/BASIC/sales.bas deleted file mode 100644 index a39aefb..0000000 --- a/ply/example/BASIC/sales.bas +++ /dev/null @@ -1,20 +0,0 @@ -10 FOR I = 1 TO 3 -20 READ P(I) -30 NEXT I -40 FOR I = 1 TO 3 -50 FOR J = 1 TO 5 -60 READ S(I,J) -70 NEXT J -80 NEXT I -90 FOR J = 1 TO 5 -100 LET S = 0 -110 FOR I = 1 TO 3 -120 LET S = S + P(I) * S(I,J) -130 NEXT I -140 PRINT "TOTAL SALES FOR SALESMAN"J, "$"S -150 NEXT J -200 DATA 1.25, 4.30, 2.50 -210 DATA 40, 20, 37, 29, 42 -220 DATA 10, 16, 3, 21, 8 -230 DATA 35, 47, 29, 16, 33 -300 END diff --git a/ply/example/BASIC/sears.bas b/ply/example/BASIC/sears.bas deleted file mode 100644 index 5ced397..0000000 --- a/ply/example/BASIC/sears.bas +++ /dev/null @@ -1,18 +0,0 @@ -1 REM :: THIS PROGRAM COMPUTES HOW MANY TIMES YOU HAVE TO FOLD -2 REM :: A PIECE OF PAPER SO THAT IT IS TALLER THAN THE -3 REM :: SEARS TOWER. -4 REM :: S = HEIGHT OF TOWER (METERS) -5 REM :: T = THICKNESS OF PAPER (MILLIMETERS) -10 LET S = 442 -20 LET T = 0.1 -30 REM CONVERT T TO METERS -40 LET T = T * .001 -50 LET F = 1 -60 LET H = T -100 IF H > S THEN 200 -120 LET H = 2 * H -125 LET F = F + 1 -130 GOTO 100 -200 PRINT "NUMBER OF FOLDS ="F -220 PRINT "FINAL HEIGHT ="H -999 END diff --git a/ply/example/BASIC/sqrt1.bas b/ply/example/BASIC/sqrt1.bas deleted file mode 100644 index 6673a91..0000000 --- a/ply/example/BASIC/sqrt1.bas +++ /dev/null @@ -1,5 +0,0 @@ -10 LET X = 0 -20 LET X = X + 1 -30 PRINT X, SQR(X) -40 IF X < 100 THEN 20 -50 END diff --git a/ply/example/BASIC/sqrt2.bas b/ply/example/BASIC/sqrt2.bas deleted file mode 100644 index 862d85e..0000000 --- a/ply/example/BASIC/sqrt2.bas +++ /dev/null @@ -1,4 +0,0 @@ -10 FOR X = 1 TO 100 -20 PRINT X, SQR(X) -30 NEXT X -40 END diff --git a/ply/example/GardenSnake/GardenSnake.py b/ply/example/GardenSnake/GardenSnake.py deleted file mode 100644 index 2a7f45e..0000000 --- a/ply/example/GardenSnake/GardenSnake.py +++ /dev/null @@ -1,709 +0,0 @@ -# GardenSnake - a parser generator demonstration program -# -# This implements a modified version of a subset of Python: -# - only 'def', 'return' and 'if' statements -# - 'if' only has 'then' clause (no elif nor else) -# - single-quoted strings only, content in raw format -# - numbers are decimal.Decimal instances (not integers or floats) -# - no print statment; use the built-in 'print' function -# - only < > == + - / * implemented (and unary + -) -# - assignment and tuple assignment work -# - no generators of any sort -# - no ... well, no quite a lot - -# Why? I'm thinking about a new indentation-based configuration -# language for a project and wanted to figure out how to do it. Once -# I got that working I needed a way to test it out. My original AST -# was dumb so I decided to target Python's AST and compile it into -# Python code. Plus, it's pretty cool that it only took a day or so -# from sitting down with Ply to having working code. - -# This uses David Beazley's Ply from http://www.dabeaz.com/ply/ - -# This work is hereby released into the Public Domain. To view a copy of -# the public domain dedication, visit -# http://creativecommons.org/licenses/publicdomain/ or send a letter to -# Creative Commons, 543 Howard Street, 5th Floor, San Francisco, -# California, 94105, USA. -# -# Portions of this work are derived from Python's Grammar definition -# and may be covered under the Python copyright and license -# -# Andrew Dalke / Dalke Scientific Software, LLC -# 30 August 2006 / Cape Town, South Africa - -# Changelog: -# 30 August - added link to CC license; removed the "swapcase" encoding - -# Modifications for inclusion in PLY distribution -import sys -sys.path.insert(0,"../..") -from ply import * - -##### Lexer ###### -#import lex -import decimal - -tokens = ( - 'DEF', - 'IF', - 'NAME', - 'NUMBER', # Python decimals - 'STRING', # single quoted strings only; syntax of raw strings - 'LPAR', - 'RPAR', - 'COLON', - 'EQ', - 'ASSIGN', - 'LT', - 'GT', - 'PLUS', - 'MINUS', - 'MULT', - 'DIV', - 'RETURN', - 'WS', - 'NEWLINE', - 'COMMA', - 'SEMICOLON', - 'INDENT', - 'DEDENT', - 'ENDMARKER', - ) - -#t_NUMBER = r'\d+' -# taken from decmial.py but without the leading sign -def t_NUMBER(t): - r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?""" - t.value = decimal.Decimal(t.value) - return t - -def t_STRING(t): - r"'([^\\']+|\\'|\\\\)*'" # I think this is right ... - t.value=t.value[1:-1].decode("string-escape") # .swapcase() # for fun - return t - -t_COLON = r':' -t_EQ = r'==' -t_ASSIGN = r'=' -t_LT = r'<' -t_GT = r'>' -t_PLUS = r'\+' -t_MINUS = r'-' -t_MULT = r'\*' -t_DIV = r'/' -t_COMMA = r',' -t_SEMICOLON = r';' - -# Ply nicely documented how to do this. - -RESERVED = { - "def": "DEF", - "if": "IF", - "return": "RETURN", - } - -def t_NAME(t): - r'[a-zA-Z_][a-zA-Z0-9_]*' - t.type = RESERVED.get(t.value, "NAME") - return t - -# Putting this before t_WS let it consume lines with only comments in -# them so the latter code never sees the WS part. Not consuming the -# newline. Needed for "if 1: #comment" -def t_comment(t): - r"[ ]*\043[^\n]*" # \043 is '#' - pass - - -# Whitespace -def t_WS(t): - r' [ ]+ ' - if t.lexer.at_line_start and t.lexer.paren_count == 0: - return t - -# Don't generate newline tokens when inside of parenthesis, eg -# a = (1, -# 2, 3) -def t_newline(t): - r'\n+' - t.lexer.lineno += len(t.value) - t.type = "NEWLINE" - if t.lexer.paren_count == 0: - return t - -def t_LPAR(t): - r'\(' - t.lexer.paren_count += 1 - return t - -def t_RPAR(t): - r'\)' - # check for underflow? should be the job of the parser - t.lexer.paren_count -= 1 - return t - - -def t_error(t): - raise SyntaxError("Unknown symbol %r" % (t.value[0],)) - print "Skipping", repr(t.value[0]) - t.lexer.skip(1) - -## I implemented INDENT / DEDENT generation as a post-processing filter - -# The original lex token stream contains WS and NEWLINE characters. -# WS will only occur before any other tokens on a line. - -# I have three filters. One tags tokens by adding two attributes. -# "must_indent" is True if the token must be indented from the -# previous code. The other is "at_line_start" which is True for WS -# and the first non-WS/non-NEWLINE on a line. It flags the check so -# see if the new line has changed indication level. - -# Python's syntax has three INDENT states -# 0) no colon hence no need to indent -# 1) "if 1: go()" - simple statements have a COLON but no need for an indent -# 2) "if 1:\n go()" - complex statements have a COLON NEWLINE and must indent -NO_INDENT = 0 -MAY_INDENT = 1 -MUST_INDENT = 2 - -# only care about whitespace at the start of a line -def track_tokens_filter(lexer, tokens): - lexer.at_line_start = at_line_start = True - indent = NO_INDENT - saw_colon = False - for token in tokens: - token.at_line_start = at_line_start - - if token.type == "COLON": - at_line_start = False - indent = MAY_INDENT - token.must_indent = False - - elif token.type == "NEWLINE": - at_line_start = True - if indent == MAY_INDENT: - indent = MUST_INDENT - token.must_indent = False - - elif token.type == "WS": - assert token.at_line_start == True - at_line_start = True - token.must_indent = False - - else: - # A real token; only indent after COLON NEWLINE - if indent == MUST_INDENT: - token.must_indent = True - else: - token.must_indent = False - at_line_start = False - indent = NO_INDENT - - yield token - lexer.at_line_start = at_line_start - -def _new_token(type, lineno): - tok = lex.LexToken() - tok.type = type - tok.value = None - tok.lineno = lineno - return tok - -# Synthesize a DEDENT tag -def DEDENT(lineno): - return _new_token("DEDENT", lineno) - -# Synthesize an INDENT tag -def INDENT(lineno): - return _new_token("INDENT", lineno) - - -# Track the indentation level and emit the right INDENT / DEDENT events. -def indentation_filter(tokens): - # A stack of indentation levels; will never pop item 0 - levels = [0] - token = None - depth = 0 - prev_was_ws = False - for token in tokens: -## if 1: -## print "Process", token, -## if token.at_line_start: -## print "at_line_start", -## if token.must_indent: -## print "must_indent", -## print - - # WS only occurs at the start of the line - # There may be WS followed by NEWLINE so - # only track the depth here. Don't indent/dedent - # until there's something real. - if token.type == "WS": - assert depth == 0 - depth = len(token.value) - prev_was_ws = True - # WS tokens are never passed to the parser - continue - - if token.type == "NEWLINE": - depth = 0 - if prev_was_ws or token.at_line_start: - # ignore blank lines - continue - # pass the other cases on through - yield token - continue - - # then it must be a real token (not WS, not NEWLINE) - # which can affect the indentation level - - prev_was_ws = False - if token.must_indent: - # The current depth must be larger than the previous level - if not (depth > levels[-1]): - raise IndentationError("expected an indented block") - - levels.append(depth) - yield INDENT(token.lineno) - - elif token.at_line_start: - # Must be on the same level or one of the previous levels - if depth == levels[-1]: - # At the same level - pass - elif depth > levels[-1]: - raise IndentationError("indentation increase but not in new block") - else: - # Back up; but only if it matches a previous level - try: - i = levels.index(depth) - except ValueError: - raise IndentationError("inconsistent indentation") - for _ in range(i+1, len(levels)): - yield DEDENT(token.lineno) - levels.pop() - - yield token - - ### Finished processing ### - - # Must dedent any remaining levels - if len(levels) > 1: - assert token is not None - for _ in range(1, len(levels)): - yield DEDENT(token.lineno) - - -# The top-level filter adds an ENDMARKER, if requested. -# Python's grammar uses it. -def filter(lexer, add_endmarker = True): - token = None - tokens = iter(lexer.token, None) - tokens = track_tokens_filter(lexer, tokens) - for token in indentation_filter(tokens): - yield token - - if add_endmarker: - lineno = 1 - if token is not None: - lineno = token.lineno - yield _new_token("ENDMARKER", lineno) - -# Combine Ply and my filters into a new lexer - -class IndentLexer(object): - def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0): - self.lexer = lex.lex(debug=debug, optimize=optimize, lextab=lextab, reflags=reflags) - self.token_stream = None - def input(self, s, add_endmarker=True): - self.lexer.paren_count = 0 - self.lexer.input(s) - self.token_stream = filter(self.lexer, add_endmarker) - def token(self): - try: - return self.token_stream.next() - except StopIteration: - return None - -########## Parser (tokens -> AST) ###### - -# also part of Ply -#import yacc - -# I use the Python AST -from compiler import ast - -# Helper function -def Assign(left, right): - names = [] - if isinstance(left, ast.Name): - # Single assignment on left - return ast.Assign([ast.AssName(left.name, 'OP_ASSIGN')], right) - elif isinstance(left, ast.Tuple): - # List of things - make sure they are Name nodes - names = [] - for child in left.getChildren(): - if not isinstance(child, ast.Name): - raise SyntaxError("that assignment not supported") - names.append(child.name) - ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names] - return ast.Assign([ast.AssTuple(ass_list)], right) - else: - raise SyntaxError("Can't do that yet") - - -# The grammar comments come from Python's Grammar/Grammar file - -## NB: compound_stmt in single_input is followed by extra NEWLINE! -# file_input: (NEWLINE | stmt)* ENDMARKER -def p_file_input_end(p): - """file_input_end : file_input ENDMARKER""" - p[0] = ast.Stmt(p[1]) -def p_file_input(p): - """file_input : file_input NEWLINE - | file_input stmt - | NEWLINE - | stmt""" - if isinstance(p[len(p)-1], basestring): - if len(p) == 3: - p[0] = p[1] - else: - p[0] = [] # p == 2 --> only a blank line - else: - if len(p) == 3: - p[0] = p[1] + p[2] - else: - p[0] = p[1] - - -# funcdef: [decorators] 'def' NAME parameters ':' suite -# ignoring decorators -def p_funcdef(p): - "funcdef : DEF NAME parameters COLON suite" - p[0] = ast.Function(None, p[2], tuple(p[3]), (), 0, None, p[5]) - -# parameters: '(' [varargslist] ')' -def p_parameters(p): - """parameters : LPAR RPAR - | LPAR varargslist RPAR""" - if len(p) == 3: - p[0] = [] - else: - p[0] = p[2] - - -# varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) | -# highly simplified -def p_varargslist(p): - """varargslist : varargslist COMMA NAME - | NAME""" - if len(p) == 4: - p[0] = p[1] + p[3] - else: - p[0] = [p[1]] - -# stmt: simple_stmt | compound_stmt -def p_stmt_simple(p): - """stmt : simple_stmt""" - # simple_stmt is a list - p[0] = p[1] - -def p_stmt_compound(p): - """stmt : compound_stmt""" - p[0] = [p[1]] - -# simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE -def p_simple_stmt(p): - """simple_stmt : small_stmts NEWLINE - | small_stmts SEMICOLON NEWLINE""" - p[0] = p[1] - -def p_small_stmts(p): - """small_stmts : small_stmts SEMICOLON small_stmt - | small_stmt""" - if len(p) == 4: - p[0] = p[1] + [p[3]] - else: - p[0] = [p[1]] - -# small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | -# import_stmt | global_stmt | exec_stmt | assert_stmt -def p_small_stmt(p): - """small_stmt : flow_stmt - | expr_stmt""" - p[0] = p[1] - -# expr_stmt: testlist (augassign (yield_expr|testlist) | -# ('=' (yield_expr|testlist))*) -# augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | -# '<<=' | '>>=' | '**=' | '//=') -def p_expr_stmt(p): - """expr_stmt : testlist ASSIGN testlist - | testlist """ - if len(p) == 2: - # a list of expressions - p[0] = ast.Discard(p[1]) - else: - p[0] = Assign(p[1], p[3]) - -def p_flow_stmt(p): - "flow_stmt : return_stmt" - p[0] = p[1] - -# return_stmt: 'return' [testlist] -def p_return_stmt(p): - "return_stmt : RETURN testlist" - p[0] = ast.Return(p[2]) - - -def p_compound_stmt(p): - """compound_stmt : if_stmt - | funcdef""" - p[0] = p[1] - -def p_if_stmt(p): - 'if_stmt : IF test COLON suite' - p[0] = ast.If([(p[2], p[4])], None) - -def p_suite(p): - """suite : simple_stmt - | NEWLINE INDENT stmts DEDENT""" - if len(p) == 2: - p[0] = ast.Stmt(p[1]) - else: - p[0] = ast.Stmt(p[3]) - - -def p_stmts(p): - """stmts : stmts stmt - | stmt""" - if len(p) == 3: - p[0] = p[1] + p[2] - else: - p[0] = p[1] - -## No using Python's approach because Ply supports precedence - -# comparison: expr (comp_op expr)* -# arith_expr: term (('+'|'-') term)* -# term: factor (('*'|'/'|'%'|'//') factor)* -# factor: ('+'|'-'|'~') factor | power -# comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' - -def make_lt_compare((left, right)): - return ast.Compare(left, [('<', right),]) -def make_gt_compare((left, right)): - return ast.Compare(left, [('>', right),]) -def make_eq_compare((left, right)): - return ast.Compare(left, [('==', right),]) - - -binary_ops = { - "+": ast.Add, - "-": ast.Sub, - "*": ast.Mul, - "/": ast.Div, - "<": make_lt_compare, - ">": make_gt_compare, - "==": make_eq_compare, -} -unary_ops = { - "+": ast.UnaryAdd, - "-": ast.UnarySub, - } -precedence = ( - ("left", "EQ", "GT", "LT"), - ("left", "PLUS", "MINUS"), - ("left", "MULT", "DIV"), - ) - -def p_comparison(p): - """comparison : comparison PLUS comparison - | comparison MINUS comparison - | comparison MULT comparison - | comparison DIV comparison - | comparison LT comparison - | comparison EQ comparison - | comparison GT comparison - | PLUS comparison - | MINUS comparison - | power""" - if len(p) == 4: - p[0] = binary_ops[p[2]]((p[1], p[3])) - elif len(p) == 3: - p[0] = unary_ops[p[1]](p[2]) - else: - p[0] = p[1] - -# power: atom trailer* ['**' factor] -# trailers enables function calls. I only allow one level of calls -# so this is 'trailer' -def p_power(p): - """power : atom - | atom trailer""" - if len(p) == 2: - p[0] = p[1] - else: - if p[2][0] == "CALL": - p[0] = ast.CallFunc(p[1], p[2][1], None, None) - else: - raise AssertionError("not implemented") - -def p_atom_name(p): - """atom : NAME""" - p[0] = ast.Name(p[1]) - -def p_atom_number(p): - """atom : NUMBER - | STRING""" - p[0] = ast.Const(p[1]) - -def p_atom_tuple(p): - """atom : LPAR testlist RPAR""" - p[0] = p[2] - -# trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME -def p_trailer(p): - "trailer : LPAR arglist RPAR" - p[0] = ("CALL", p[2]) - -# testlist: test (',' test)* [','] -# Contains shift/reduce error -def p_testlist(p): - """testlist : testlist_multi COMMA - | testlist_multi """ - if len(p) == 2: - p[0] = p[1] - else: - # May need to promote singleton to tuple - if isinstance(p[1], list): - p[0] = p[1] - else: - p[0] = [p[1]] - # Convert into a tuple? - if isinstance(p[0], list): - p[0] = ast.Tuple(p[0]) - -def p_testlist_multi(p): - """testlist_multi : testlist_multi COMMA test - | test""" - if len(p) == 2: - # singleton - p[0] = p[1] - else: - if isinstance(p[1], list): - p[0] = p[1] + [p[3]] - else: - # singleton -> tuple - p[0] = [p[1], p[3]] - - -# test: or_test ['if' or_test 'else' test] | lambdef -# as I don't support 'and', 'or', and 'not' this works down to 'comparison' -def p_test(p): - "test : comparison" - p[0] = p[1] - - - -# arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test) -# XXX INCOMPLETE: this doesn't allow the trailing comma -def p_arglist(p): - """arglist : arglist COMMA argument - | argument""" - if len(p) == 4: - p[0] = p[1] + [p[3]] - else: - p[0] = [p[1]] - -# argument: test [gen_for] | test '=' test # Really [keyword '='] test -def p_argument(p): - "argument : test" - p[0] = p[1] - -def p_error(p): - #print "Error!", repr(p) - raise SyntaxError(p) - - -class GardenSnakeParser(object): - def __init__(self, lexer = None): - if lexer is None: - lexer = IndentLexer() - self.lexer = lexer - self.parser = yacc.yacc(start="file_input_end") - - def parse(self, code): - self.lexer.input(code) - result = self.parser.parse(lexer = self.lexer) - return ast.Module(None, result) - - -###### Code generation ###### - -from compiler import misc, syntax, pycodegen - -class GardenSnakeCompiler(object): - def __init__(self): - self.parser = GardenSnakeParser() - def compile(self, code, filename=""): - tree = self.parser.parse(code) - #print tree - misc.set_filename(filename, tree) - syntax.check(tree) - gen = pycodegen.ModuleCodeGenerator(tree) - code = gen.getCode() - return code - -####### Test code ####### - -compile = GardenSnakeCompiler().compile - -code = r""" - -print('LET\'S TRY THIS \\OUT') - -#Comment here -def x(a): - print('called with',a) - if a == 1: - return 2 - if a*2 > 10: return 999 / 4 - # Another comment here - - return a+2*3 - -ints = (1, 2, - 3, 4, -5) -print('mutiline-expression', ints) - -t = 4+1/3*2+6*(9-5+1) -print('predence test; should be 34+2/3:', t, t==(34+2/3)) - -print('numbers', 1,2,3,4,5) -if 1: - 8 - a=9 - print(x(a)) - -print(x(1)) -print(x(2)) -print(x(8),'3') -print('this is decimal', 1/5) -print('BIG DECIMAL', 1.234567891234567e12345) - -""" - -# Set up the GardenSnake run-time environment -def print_(*args): - print "-->", " ".join(map(str,args)) - -globals()["print"] = print_ - -compiled_code = compile(code) - -exec compiled_code in globals() -print "Done" diff --git a/ply/example/GardenSnake/README b/ply/example/GardenSnake/README deleted file mode 100644 index 4d8be2d..0000000 --- a/ply/example/GardenSnake/README +++ /dev/null @@ -1,5 +0,0 @@ -This example is Andrew Dalke's GardenSnake language. It shows how to process an -indentation-like language like Python. Further details can be found here: - -http://dalkescientific.com/writings/diary/archive/2006/08/30/gardensnake_language.html - diff --git a/ply/example/README b/ply/example/README deleted file mode 100644 index 63519b5..0000000 --- a/ply/example/README +++ /dev/null @@ -1,10 +0,0 @@ -Simple examples: - calc - Simple calculator - classcalc - Simple calculate defined as a class - -Complex examples - ansic - ANSI C grammar from K&R - BASIC - A small BASIC interpreter - GardenSnake - A simple python-like language - yply - Converts Unix yacc files to PLY programs. - diff --git a/ply/example/ansic/README b/ply/example/ansic/README deleted file mode 100644 index e049d3b..0000000 --- a/ply/example/ansic/README +++ /dev/null @@ -1,2 +0,0 @@ -This example is incomplete. Was going to specify an ANSI C parser. -This is part of it. diff --git a/ply/example/ansic/clex.py b/ply/example/ansic/clex.py deleted file mode 100644 index 37fdd8e..0000000 --- a/ply/example/ansic/clex.py +++ /dev/null @@ -1,164 +0,0 @@ -# ---------------------------------------------------------------------- -# clex.py -# -# A lexer for ANSI C. -# ---------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - -import ply.lex as lex - -# Reserved words -reserved = ( - 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', - 'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER', - 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF', - 'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE', - ) - -tokens = reserved + ( - # Literals (identifier, integer constant, float constant, string constant, char const) - 'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST', - - # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) - 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', - 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', - 'LOR', 'LAND', 'LNOT', - 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', - - # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) - 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', - 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', - - # Increment/decrement (++,--) - 'PLUSPLUS', 'MINUSMINUS', - - # Structure dereference (->) - 'ARROW', - - # Conditional operator (?) - 'CONDOP', - - # Delimeters ( ) [ ] { } , . ; : - 'LPAREN', 'RPAREN', - 'LBRACKET', 'RBRACKET', - 'LBRACE', 'RBRACE', - 'COMMA', 'PERIOD', 'SEMI', 'COLON', - - # Ellipsis (...) - 'ELLIPSIS', - ) - -# Completely ignored characters -t_ignore = ' \t\x0c' - -# Newlines -def t_NEWLINE(t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - -# Operators -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_MOD = r'%' -t_OR = r'\|' -t_AND = r'&' -t_NOT = r'~' -t_XOR = r'\^' -t_LSHIFT = r'<<' -t_RSHIFT = r'>>' -t_LOR = r'\|\|' -t_LAND = r'&&' -t_LNOT = r'!' -t_LT = r'<' -t_GT = r'>' -t_LE = r'<=' -t_GE = r'>=' -t_EQ = r'==' -t_NE = r'!=' - -# Assignment operators - -t_EQUALS = r'=' -t_TIMESEQUAL = r'\*=' -t_DIVEQUAL = r'/=' -t_MODEQUAL = r'%=' -t_PLUSEQUAL = r'\+=' -t_MINUSEQUAL = r'-=' -t_LSHIFTEQUAL = r'<<=' -t_RSHIFTEQUAL = r'>>=' -t_ANDEQUAL = r'&=' -t_OREQUAL = r'\|=' -t_XOREQUAL = r'^=' - -# Increment/decrement -t_PLUSPLUS = r'\+\+' -t_MINUSMINUS = r'--' - -# -> -t_ARROW = r'->' - -# ? -t_CONDOP = r'\?' - -# Delimeters -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_LBRACKET = r'\[' -t_RBRACKET = r'\]' -t_LBRACE = r'\{' -t_RBRACE = r'\}' -t_COMMA = r',' -t_PERIOD = r'\.' -t_SEMI = r';' -t_COLON = r':' -t_ELLIPSIS = r'\.\.\.' - -# Identifiers and reserved words - -reserved_map = { } -for r in reserved: - reserved_map[r.lower()] = r - -def t_ID(t): - r'[A-Za-z_][\w_]*' - t.type = reserved_map.get(t.value,"ID") - return t - -# Integer literal -t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?' - -# Floating literal -t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' - -# String literal -t_SCONST = r'\"([^\\\n]|(\\.))*?\"' - -# Character constant 'c' or L'c' -t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\'' - -# Comments -def t_comment(t): - r'/\*(.|\n)*?\*/' - t.lexer.lineno += t.value.count('\n') - -# Preprocessor directive (ignored) -def t_preprocessor(t): - r'\#(.)*?\n' - t.lexer.lineno += 1 - -def t_error(t): - print("Illegal character %s" % repr(t.value[0])) - t.lexer.skip(1) - -lexer = lex.lex(optimize=1) -if __name__ == "__main__": - lex.runmain(lexer) - - - - - diff --git a/ply/example/ansic/cparse.py b/ply/example/ansic/cparse.py deleted file mode 100644 index c9b9164..0000000 --- a/ply/example/ansic/cparse.py +++ /dev/null @@ -1,863 +0,0 @@ -# ----------------------------------------------------------------------------- -# cparse.py -# -# Simple parser for ANSI C. Based on the grammar in K&R, 2nd Ed. -# ----------------------------------------------------------------------------- - -import sys -import clex -import ply.yacc as yacc - -# Get the token map -tokens = clex.tokens - -# translation-unit: - -def p_translation_unit_1(t): - 'translation_unit : external_declaration' - pass - -def p_translation_unit_2(t): - 'translation_unit : translation_unit external_declaration' - pass - -# external-declaration: - -def p_external_declaration_1(t): - 'external_declaration : function_definition' - pass - -def p_external_declaration_2(t): - 'external_declaration : declaration' - pass - -# function-definition: - -def p_function_definition_1(t): - 'function_definition : declaration_specifiers declarator declaration_list compound_statement' - pass - -def p_function_definition_2(t): - 'function_definition : declarator declaration_list compound_statement' - pass - -def p_function_definition_3(t): - 'function_definition : declarator compound_statement' - pass - -def p_function_definition_4(t): - 'function_definition : declaration_specifiers declarator compound_statement' - pass - -# declaration: - -def p_declaration_1(t): - 'declaration : declaration_specifiers init_declarator_list SEMI' - pass - -def p_declaration_2(t): - 'declaration : declaration_specifiers SEMI' - pass - -# declaration-list: - -def p_declaration_list_1(t): - 'declaration_list : declaration' - pass - -def p_declaration_list_2(t): - 'declaration_list : declaration_list declaration ' - pass - -# declaration-specifiers -def p_declaration_specifiers_1(t): - 'declaration_specifiers : storage_class_specifier declaration_specifiers' - pass - -def p_declaration_specifiers_2(t): - 'declaration_specifiers : type_specifier declaration_specifiers' - pass - -def p_declaration_specifiers_3(t): - 'declaration_specifiers : type_qualifier declaration_specifiers' - pass - -def p_declaration_specifiers_4(t): - 'declaration_specifiers : storage_class_specifier' - pass - -def p_declaration_specifiers_5(t): - 'declaration_specifiers : type_specifier' - pass - -def p_declaration_specifiers_6(t): - 'declaration_specifiers : type_qualifier' - pass - -# storage-class-specifier -def p_storage_class_specifier(t): - '''storage_class_specifier : AUTO - | REGISTER - | STATIC - | EXTERN - | TYPEDEF - ''' - pass - -# type-specifier: -def p_type_specifier(t): - '''type_specifier : VOID - | CHAR - | SHORT - | INT - | LONG - | FLOAT - | DOUBLE - | SIGNED - | UNSIGNED - | struct_or_union_specifier - | enum_specifier - | TYPEID - ''' - pass - -# type-qualifier: -def p_type_qualifier(t): - '''type_qualifier : CONST - | VOLATILE''' - pass - -# struct-or-union-specifier - -def p_struct_or_union_specifier_1(t): - 'struct_or_union_specifier : struct_or_union ID LBRACE struct_declaration_list RBRACE' - pass - -def p_struct_or_union_specifier_2(t): - 'struct_or_union_specifier : struct_or_union LBRACE struct_declaration_list RBRACE' - pass - -def p_struct_or_union_specifier_3(t): - 'struct_or_union_specifier : struct_or_union ID' - pass - -# struct-or-union: -def p_struct_or_union(t): - '''struct_or_union : STRUCT - | UNION - ''' - pass - -# struct-declaration-list: - -def p_struct_declaration_list_1(t): - 'struct_declaration_list : struct_declaration' - pass - -def p_struct_declaration_list_2(t): - 'struct_declaration_list : struct_declaration_list struct_declaration' - pass - -# init-declarator-list: - -def p_init_declarator_list_1(t): - 'init_declarator_list : init_declarator' - pass - -def p_init_declarator_list_2(t): - 'init_declarator_list : init_declarator_list COMMA init_declarator' - pass - -# init-declarator - -def p_init_declarator_1(t): - 'init_declarator : declarator' - pass - -def p_init_declarator_2(t): - 'init_declarator : declarator EQUALS initializer' - pass - -# struct-declaration: - -def p_struct_declaration(t): - 'struct_declaration : specifier_qualifier_list struct_declarator_list SEMI' - pass - -# specifier-qualifier-list: - -def p_specifier_qualifier_list_1(t): - 'specifier_qualifier_list : type_specifier specifier_qualifier_list' - pass - -def p_specifier_qualifier_list_2(t): - 'specifier_qualifier_list : type_specifier' - pass - -def p_specifier_qualifier_list_3(t): - 'specifier_qualifier_list : type_qualifier specifier_qualifier_list' - pass - -def p_specifier_qualifier_list_4(t): - 'specifier_qualifier_list : type_qualifier' - pass - -# struct-declarator-list: - -def p_struct_declarator_list_1(t): - 'struct_declarator_list : struct_declarator' - pass - -def p_struct_declarator_list_2(t): - 'struct_declarator_list : struct_declarator_list COMMA struct_declarator' - pass - -# struct-declarator: - -def p_struct_declarator_1(t): - 'struct_declarator : declarator' - pass - -def p_struct_declarator_2(t): - 'struct_declarator : declarator COLON constant_expression' - pass - -def p_struct_declarator_3(t): - 'struct_declarator : COLON constant_expression' - pass - -# enum-specifier: - -def p_enum_specifier_1(t): - 'enum_specifier : ENUM ID LBRACE enumerator_list RBRACE' - pass - -def p_enum_specifier_2(t): - 'enum_specifier : ENUM LBRACE enumerator_list RBRACE' - pass - -def p_enum_specifier_3(t): - 'enum_specifier : ENUM ID' - pass - -# enumerator_list: -def p_enumerator_list_1(t): - 'enumerator_list : enumerator' - pass - -def p_enumerator_list_2(t): - 'enumerator_list : enumerator_list COMMA enumerator' - pass - -# enumerator: -def p_enumerator_1(t): - 'enumerator : ID' - pass - -def p_enumerator_2(t): - 'enumerator : ID EQUALS constant_expression' - pass - -# declarator: - -def p_declarator_1(t): - 'declarator : pointer direct_declarator' - pass - -def p_declarator_2(t): - 'declarator : direct_declarator' - pass - -# direct-declarator: - -def p_direct_declarator_1(t): - 'direct_declarator : ID' - pass - -def p_direct_declarator_2(t): - 'direct_declarator : LPAREN declarator RPAREN' - pass - -def p_direct_declarator_3(t): - 'direct_declarator : direct_declarator LBRACKET constant_expression_opt RBRACKET' - pass - -def p_direct_declarator_4(t): - 'direct_declarator : direct_declarator LPAREN parameter_type_list RPAREN ' - pass - -def p_direct_declarator_5(t): - 'direct_declarator : direct_declarator LPAREN identifier_list RPAREN ' - pass - -def p_direct_declarator_6(t): - 'direct_declarator : direct_declarator LPAREN RPAREN ' - pass - -# pointer: -def p_pointer_1(t): - 'pointer : TIMES type_qualifier_list' - pass - -def p_pointer_2(t): - 'pointer : TIMES' - pass - -def p_pointer_3(t): - 'pointer : TIMES type_qualifier_list pointer' - pass - -def p_pointer_4(t): - 'pointer : TIMES pointer' - pass - -# type-qualifier-list: - -def p_type_qualifier_list_1(t): - 'type_qualifier_list : type_qualifier' - pass - -def p_type_qualifier_list_2(t): - 'type_qualifier_list : type_qualifier_list type_qualifier' - pass - -# parameter-type-list: - -def p_parameter_type_list_1(t): - 'parameter_type_list : parameter_list' - pass - -def p_parameter_type_list_2(t): - 'parameter_type_list : parameter_list COMMA ELLIPSIS' - pass - -# parameter-list: - -def p_parameter_list_1(t): - 'parameter_list : parameter_declaration' - pass - -def p_parameter_list_2(t): - 'parameter_list : parameter_list COMMA parameter_declaration' - pass - -# parameter-declaration: -def p_parameter_declaration_1(t): - 'parameter_declaration : declaration_specifiers declarator' - pass - -def p_parameter_declaration_2(t): - 'parameter_declaration : declaration_specifiers abstract_declarator_opt' - pass - -# identifier-list: -def p_identifier_list_1(t): - 'identifier_list : ID' - pass - -def p_identifier_list_2(t): - 'identifier_list : identifier_list COMMA ID' - pass - -# initializer: - -def p_initializer_1(t): - 'initializer : assignment_expression' - pass - -def p_initializer_2(t): - '''initializer : LBRACE initializer_list RBRACE - | LBRACE initializer_list COMMA RBRACE''' - pass - -# initializer-list: - -def p_initializer_list_1(t): - 'initializer_list : initializer' - pass - -def p_initializer_list_2(t): - 'initializer_list : initializer_list COMMA initializer' - pass - -# type-name: - -def p_type_name(t): - 'type_name : specifier_qualifier_list abstract_declarator_opt' - pass - -def p_abstract_declarator_opt_1(t): - 'abstract_declarator_opt : empty' - pass - -def p_abstract_declarator_opt_2(t): - 'abstract_declarator_opt : abstract_declarator' - pass - -# abstract-declarator: - -def p_abstract_declarator_1(t): - 'abstract_declarator : pointer ' - pass - -def p_abstract_declarator_2(t): - 'abstract_declarator : pointer direct_abstract_declarator' - pass - -def p_abstract_declarator_3(t): - 'abstract_declarator : direct_abstract_declarator' - pass - -# direct-abstract-declarator: - -def p_direct_abstract_declarator_1(t): - 'direct_abstract_declarator : LPAREN abstract_declarator RPAREN' - pass - -def p_direct_abstract_declarator_2(t): - 'direct_abstract_declarator : direct_abstract_declarator LBRACKET constant_expression_opt RBRACKET' - pass - -def p_direct_abstract_declarator_3(t): - 'direct_abstract_declarator : LBRACKET constant_expression_opt RBRACKET' - pass - -def p_direct_abstract_declarator_4(t): - 'direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN' - pass - -def p_direct_abstract_declarator_5(t): - 'direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN' - pass - -# Optional fields in abstract declarators - -def p_constant_expression_opt_1(t): - 'constant_expression_opt : empty' - pass - -def p_constant_expression_opt_2(t): - 'constant_expression_opt : constant_expression' - pass - -def p_parameter_type_list_opt_1(t): - 'parameter_type_list_opt : empty' - pass - -def p_parameter_type_list_opt_2(t): - 'parameter_type_list_opt : parameter_type_list' - pass - -# statement: - -def p_statement(t): - ''' - statement : labeled_statement - | expression_statement - | compound_statement - | selection_statement - | iteration_statement - | jump_statement - ''' - pass - -# labeled-statement: - -def p_labeled_statement_1(t): - 'labeled_statement : ID COLON statement' - pass - -def p_labeled_statement_2(t): - 'labeled_statement : CASE constant_expression COLON statement' - pass - -def p_labeled_statement_3(t): - 'labeled_statement : DEFAULT COLON statement' - pass - -# expression-statement: -def p_expression_statement(t): - 'expression_statement : expression_opt SEMI' - pass - -# compound-statement: - -def p_compound_statement_1(t): - 'compound_statement : LBRACE declaration_list statement_list RBRACE' - pass - -def p_compound_statement_2(t): - 'compound_statement : LBRACE statement_list RBRACE' - pass - -def p_compound_statement_3(t): - 'compound_statement : LBRACE declaration_list RBRACE' - pass - -def p_compound_statement_4(t): - 'compound_statement : LBRACE RBRACE' - pass - -# statement-list: - -def p_statement_list_1(t): - 'statement_list : statement' - pass - -def p_statement_list_2(t): - 'statement_list : statement_list statement' - pass - -# selection-statement - -def p_selection_statement_1(t): - 'selection_statement : IF LPAREN expression RPAREN statement' - pass - -def p_selection_statement_2(t): - 'selection_statement : IF LPAREN expression RPAREN statement ELSE statement ' - pass - -def p_selection_statement_3(t): - 'selection_statement : SWITCH LPAREN expression RPAREN statement ' - pass - -# iteration_statement: - -def p_iteration_statement_1(t): - 'iteration_statement : WHILE LPAREN expression RPAREN statement' - pass - -def p_iteration_statement_2(t): - 'iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN statement ' - pass - -def p_iteration_statement_3(t): - 'iteration_statement : DO statement WHILE LPAREN expression RPAREN SEMI' - pass - -# jump_statement: - -def p_jump_statement_1(t): - 'jump_statement : GOTO ID SEMI' - pass - -def p_jump_statement_2(t): - 'jump_statement : CONTINUE SEMI' - pass - -def p_jump_statement_3(t): - 'jump_statement : BREAK SEMI' - pass - -def p_jump_statement_4(t): - 'jump_statement : RETURN expression_opt SEMI' - pass - -def p_expression_opt_1(t): - 'expression_opt : empty' - pass - -def p_expression_opt_2(t): - 'expression_opt : expression' - pass - -# expression: -def p_expression_1(t): - 'expression : assignment_expression' - pass - -def p_expression_2(t): - 'expression : expression COMMA assignment_expression' - pass - -# assigment_expression: -def p_assignment_expression_1(t): - 'assignment_expression : conditional_expression' - pass - -def p_assignment_expression_2(t): - 'assignment_expression : unary_expression assignment_operator assignment_expression' - pass - -# assignment_operator: -def p_assignment_operator(t): - ''' - assignment_operator : EQUALS - | TIMESEQUAL - | DIVEQUAL - | MODEQUAL - | PLUSEQUAL - | MINUSEQUAL - | LSHIFTEQUAL - | RSHIFTEQUAL - | ANDEQUAL - | OREQUAL - | XOREQUAL - ''' - pass - -# conditional-expression -def p_conditional_expression_1(t): - 'conditional_expression : logical_or_expression' - pass - -def p_conditional_expression_2(t): - 'conditional_expression : logical_or_expression CONDOP expression COLON conditional_expression ' - pass - -# constant-expression - -def p_constant_expression(t): - 'constant_expression : conditional_expression' - pass - -# logical-or-expression - -def p_logical_or_expression_1(t): - 'logical_or_expression : logical_and_expression' - pass - -def p_logical_or_expression_2(t): - 'logical_or_expression : logical_or_expression LOR logical_and_expression' - pass - -# logical-and-expression - -def p_logical_and_expression_1(t): - 'logical_and_expression : inclusive_or_expression' - pass - -def p_logical_and_expression_2(t): - 'logical_and_expression : logical_and_expression LAND inclusive_or_expression' - pass - -# inclusive-or-expression: - -def p_inclusive_or_expression_1(t): - 'inclusive_or_expression : exclusive_or_expression' - pass - -def p_inclusive_or_expression_2(t): - 'inclusive_or_expression : inclusive_or_expression OR exclusive_or_expression' - pass - -# exclusive-or-expression: - -def p_exclusive_or_expression_1(t): - 'exclusive_or_expression : and_expression' - pass - -def p_exclusive_or_expression_2(t): - 'exclusive_or_expression : exclusive_or_expression XOR and_expression' - pass - -# AND-expression - -def p_and_expression_1(t): - 'and_expression : equality_expression' - pass - -def p_and_expression_2(t): - 'and_expression : and_expression AND equality_expression' - pass - - -# equality-expression: -def p_equality_expression_1(t): - 'equality_expression : relational_expression' - pass - -def p_equality_expression_2(t): - 'equality_expression : equality_expression EQ relational_expression' - pass - -def p_equality_expression_3(t): - 'equality_expression : equality_expression NE relational_expression' - pass - - -# relational-expression: -def p_relational_expression_1(t): - 'relational_expression : shift_expression' - pass - -def p_relational_expression_2(t): - 'relational_expression : relational_expression LT shift_expression' - pass - -def p_relational_expression_3(t): - 'relational_expression : relational_expression GT shift_expression' - pass - -def p_relational_expression_4(t): - 'relational_expression : relational_expression LE shift_expression' - pass - -def p_relational_expression_5(t): - 'relational_expression : relational_expression GE shift_expression' - pass - -# shift-expression - -def p_shift_expression_1(t): - 'shift_expression : additive_expression' - pass - -def p_shift_expression_2(t): - 'shift_expression : shift_expression LSHIFT additive_expression' - pass - -def p_shift_expression_3(t): - 'shift_expression : shift_expression RSHIFT additive_expression' - pass - -# additive-expression - -def p_additive_expression_1(t): - 'additive_expression : multiplicative_expression' - pass - -def p_additive_expression_2(t): - 'additive_expression : additive_expression PLUS multiplicative_expression' - pass - -def p_additive_expression_3(t): - 'additive_expression : additive_expression MINUS multiplicative_expression' - pass - -# multiplicative-expression - -def p_multiplicative_expression_1(t): - 'multiplicative_expression : cast_expression' - pass - -def p_multiplicative_expression_2(t): - 'multiplicative_expression : multiplicative_expression TIMES cast_expression' - pass - -def p_multiplicative_expression_3(t): - 'multiplicative_expression : multiplicative_expression DIVIDE cast_expression' - pass - -def p_multiplicative_expression_4(t): - 'multiplicative_expression : multiplicative_expression MOD cast_expression' - pass - -# cast-expression: - -def p_cast_expression_1(t): - 'cast_expression : unary_expression' - pass - -def p_cast_expression_2(t): - 'cast_expression : LPAREN type_name RPAREN cast_expression' - pass - -# unary-expression: -def p_unary_expression_1(t): - 'unary_expression : postfix_expression' - pass - -def p_unary_expression_2(t): - 'unary_expression : PLUSPLUS unary_expression' - pass - -def p_unary_expression_3(t): - 'unary_expression : MINUSMINUS unary_expression' - pass - -def p_unary_expression_4(t): - 'unary_expression : unary_operator cast_expression' - pass - -def p_unary_expression_5(t): - 'unary_expression : SIZEOF unary_expression' - pass - -def p_unary_expression_6(t): - 'unary_expression : SIZEOF LPAREN type_name RPAREN' - pass - -#unary-operator -def p_unary_operator(t): - '''unary_operator : AND - | TIMES - | PLUS - | MINUS - | NOT - | LNOT ''' - pass - -# postfix-expression: -def p_postfix_expression_1(t): - 'postfix_expression : primary_expression' - pass - -def p_postfix_expression_2(t): - 'postfix_expression : postfix_expression LBRACKET expression RBRACKET' - pass - -def p_postfix_expression_3(t): - 'postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN' - pass - -def p_postfix_expression_4(t): - 'postfix_expression : postfix_expression LPAREN RPAREN' - pass - -def p_postfix_expression_5(t): - 'postfix_expression : postfix_expression PERIOD ID' - pass - -def p_postfix_expression_6(t): - 'postfix_expression : postfix_expression ARROW ID' - pass - -def p_postfix_expression_7(t): - 'postfix_expression : postfix_expression PLUSPLUS' - pass - -def p_postfix_expression_8(t): - 'postfix_expression : postfix_expression MINUSMINUS' - pass - -# primary-expression: -def p_primary_expression(t): - '''primary_expression : ID - | constant - | SCONST - | LPAREN expression RPAREN''' - pass - -# argument-expression-list: -def p_argument_expression_list(t): - '''argument_expression_list : assignment_expression - | argument_expression_list COMMA assignment_expression''' - pass - -# constant: -def p_constant(t): - '''constant : ICONST - | FCONST - | CCONST''' - pass - - -def p_empty(t): - 'empty : ' - pass - -def p_error(t): - print("Whoa. We're hosed") - -import profile -# Build the grammar - -yacc.yacc(method='LALR') - -#profile.run("yacc.yacc(method='LALR')") - - - - diff --git a/ply/example/calc/calc.py b/ply/example/calc/calc.py deleted file mode 100644 index b923780..0000000 --- a/ply/example/calc/calc.py +++ /dev/null @@ -1,107 +0,0 @@ -# ----------------------------------------------------------------------------- -# calc.py -# -# A simple calculator with variables. This is from O'Reilly's -# "Lex and Yacc", p. 63. -# ----------------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - -if sys.version_info[0] >= 3: - raw_input = input - -tokens = ( - 'NAME','NUMBER', - ) - -literals = ['=','+','-','*','/', '(',')'] - -# Tokens - -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - t.value = int(t.value) - return t - -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -import ply.lex as lex -lex.lex() - -# Parsing rules - -precedence = ( - ('left','+','-'), - ('left','*','/'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(p): - 'statement : NAME "=" expression' - names[p[1]] = p[3] - -def p_statement_expr(p): - 'statement : expression' - print(p[1]) - -def p_expression_binop(p): - '''expression : expression '+' expression - | expression '-' expression - | expression '*' expression - | expression '/' expression''' - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] - -def p_expression_uminus(p): - "expression : '-' expression %prec UMINUS" - p[0] = -p[2] - -def p_expression_group(p): - "expression : '(' expression ')'" - p[0] = p[2] - -def p_expression_number(p): - "expression : NUMBER" - p[0] = p[1] - -def p_expression_name(p): - "expression : NAME" - try: - p[0] = names[p[1]] - except LookupError: - print("Undefined name '%s'" % p[1]) - p[0] = 0 - -def p_error(p): - if p: - print("Syntax error at '%s'" % p.value) - else: - print("Syntax error at EOF") - -import ply.yacc as yacc -yacc.yacc() - -while 1: - try: - s = raw_input('calc > ') - except EOFError: - break - if not s: continue - yacc.parse(s) diff --git a/ply/example/calcdebug/calc.py b/ply/example/calcdebug/calc.py deleted file mode 100644 index 6732f9f..0000000 --- a/ply/example/calcdebug/calc.py +++ /dev/null @@ -1,113 +0,0 @@ -# ----------------------------------------------------------------------------- -# calc.py -# -# This example shows how to run the parser in a debugging mode -# with output routed to a logging object. -# ----------------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - -if sys.version_info[0] >= 3: - raw_input = input - -tokens = ( - 'NAME','NUMBER', - ) - -literals = ['=','+','-','*','/', '(',')'] - -# Tokens - -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - t.value = int(t.value) - return t - -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -import ply.lex as lex -lex.lex() - -# Parsing rules - -precedence = ( - ('left','+','-'), - ('left','*','/'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(p): - 'statement : NAME "=" expression' - names[p[1]] = p[3] - -def p_statement_expr(p): - 'statement : expression' - print(p[1]) - -def p_expression_binop(p): - '''expression : expression '+' expression - | expression '-' expression - | expression '*' expression - | expression '/' expression''' - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] - -def p_expression_uminus(p): - "expression : '-' expression %prec UMINUS" - p[0] = -p[2] - -def p_expression_group(p): - "expression : '(' expression ')'" - p[0] = p[2] - -def p_expression_number(p): - "expression : NUMBER" - p[0] = p[1] - -def p_expression_name(p): - "expression : NAME" - try: - p[0] = names[p[1]] - except LookupError: - print("Undefined name '%s'" % p[1]) - p[0] = 0 - -def p_error(p): - if p: - print("Syntax error at '%s'" % p.value) - else: - print("Syntax error at EOF") - -import ply.yacc as yacc -yacc.yacc() - -import logging -logging.basicConfig( - level=logging.INFO, - filename="parselog.txt" -) - -while 1: - try: - s = raw_input('calc > ') - except EOFError: - break - if not s: continue - yacc.parse(s,debug=logging.getLogger()) diff --git a/ply/example/classcalc/calc.py b/ply/example/classcalc/calc.py deleted file mode 100755 index bf0d065..0000000 --- a/ply/example/classcalc/calc.py +++ /dev/null @@ -1,157 +0,0 @@ -#!/usr/bin/env python - -# ----------------------------------------------------------------------------- -# calc.py -# -# A simple calculator with variables. This is from O'Reilly's -# "Lex and Yacc", p. 63. -# -# Class-based example contributed to PLY by David McNab -# ----------------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - -if sys.version_info[0] >= 3: - raw_input = input - -import ply.lex as lex -import ply.yacc as yacc -import os - -class Parser: - """ - Base class for a lexer/parser that has the rules defined as methods - """ - tokens = () - precedence = () - - def __init__(self, **kw): - self.debug = kw.get('debug', 0) - self.names = { } - try: - modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__ - except: - modname = "parser"+"_"+self.__class__.__name__ - self.debugfile = modname + ".dbg" - self.tabmodule = modname + "_" + "parsetab" - #print self.debugfile, self.tabmodule - - # Build the lexer and parser - lex.lex(module=self, debug=self.debug) - yacc.yacc(module=self, - debug=self.debug, - debugfile=self.debugfile, - tabmodule=self.tabmodule) - - def run(self): - while 1: - try: - s = raw_input('calc > ') - except EOFError: - break - if not s: continue - yacc.parse(s) - - -class Calc(Parser): - - tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','EXP', 'TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - - # Tokens - - t_PLUS = r'\+' - t_MINUS = r'-' - t_EXP = r'\*\*' - t_TIMES = r'\*' - t_DIVIDE = r'/' - t_EQUALS = r'=' - t_LPAREN = r'\(' - t_RPAREN = r'\)' - t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - - def t_NUMBER(self, t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - #print "parsed number %s" % repr(t.value) - return t - - t_ignore = " \t" - - def t_newline(self, t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - - def t_error(self, t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - - # Parsing rules - - precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('left', 'EXP'), - ('right','UMINUS'), - ) - - def p_statement_assign(self, p): - 'statement : NAME EQUALS expression' - self.names[p[1]] = p[3] - - def p_statement_expr(self, p): - 'statement : expression' - print(p[1]) - - def p_expression_binop(self, p): - """ - expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression - | expression EXP expression - """ - #print [repr(p[i]) for i in range(0,4)] - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] - elif p[2] == '**': p[0] = p[1] ** p[3] - - def p_expression_uminus(self, p): - 'expression : MINUS expression %prec UMINUS' - p[0] = -p[2] - - def p_expression_group(self, p): - 'expression : LPAREN expression RPAREN' - p[0] = p[2] - - def p_expression_number(self, p): - 'expression : NUMBER' - p[0] = p[1] - - def p_expression_name(self, p): - 'expression : NAME' - try: - p[0] = self.names[p[1]] - except LookupError: - print("Undefined name '%s'" % p[1]) - p[0] = 0 - - def p_error(self, p): - if p: - print("Syntax error at '%s'" % p.value) - else: - print("Syntax error at EOF") - -if __name__ == '__main__': - calc = Calc() - calc.run() diff --git a/ply/example/classcalc/calc_Calc_parsetab.py b/ply/example/classcalc/calc_Calc_parsetab.py deleted file mode 100644 index 6ec0d30..0000000 --- a/ply/example/classcalc/calc_Calc_parsetab.py +++ /dev/null @@ -1,40 +0,0 @@ - -# calc_Calc_parsetab.py -# This file is automatically generated. Do not edit. -_tabversion = '3.2' - -_lr_method = 'LALR' - -_lr_signature = '|\x0f"\xe2\x0e\xf7\x0fT\x15K\x1c\xc0\x1e\xa3c\x10' - -_lr_action_items = {'$end':([1,2,3,5,9,15,16,17,18,19,20,21,22,],[-11,-10,0,-2,-11,-8,-1,-9,-6,-5,-3,-7,-4,]),'RPAREN':([2,8,9,15,17,18,19,20,21,22,],[-10,17,-11,-8,-9,-6,-5,-3,-7,-4,]),'DIVIDE':([1,2,5,8,9,15,16,17,18,19,20,21,22,],[-11,-10,10,10,-11,-8,10,-9,-6,-5,10,-7,10,]),'EQUALS':([1,],[7,]),'NUMBER':([0,4,6,7,10,11,12,13,14,],[2,2,2,2,2,2,2,2,2,]),'PLUS':([1,2,5,8,9,15,16,17,18,19,20,21,22,],[-11,-10,12,12,-11,-8,12,-9,-6,-5,-3,-7,-4,]),'LPAREN':([0,4,6,7,10,11,12,13,14,],[4,4,4,4,4,4,4,4,4,]),'EXP':([1,2,5,8,9,15,16,17,18,19,20,21,22,],[-11,-10,13,13,-11,-8,13,-9,13,13,13,-7,13,]),'TIMES':([1,2,5,8,9,15,16,17,18,19,20,21,22,],[-11,-10,11,11,-11,-8,11,-9,-6,-5,11,-7,11,]),'MINUS':([0,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,],[6,-11,-10,6,14,6,6,14,-11,6,6,6,6,6,-8,14,-9,-6,-5,-3,-7,-4,]),'NAME':([0,4,6,7,10,11,12,13,14,],[1,9,9,9,9,9,9,9,9,]),} - -_lr_action = { } -for _k, _v in _lr_action_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_action: _lr_action[_x] = { } - _lr_action[_x][_k] = _y -del _lr_action_items - -_lr_goto_items = {'expression':([0,4,6,7,10,11,12,13,14,],[5,8,15,16,18,19,20,21,22,]),'statement':([0,],[3,]),} - -_lr_goto = { } -for _k, _v in _lr_goto_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_goto: _lr_goto[_x] = { } - _lr_goto[_x][_k] = _y -del _lr_goto_items -_lr_productions = [ - ("S' -> statement","S'",1,None,None,None), - ('statement -> NAME EQUALS expression','statement',3,'p_statement_assign','./calc.py',107), - ('statement -> expression','statement',1,'p_statement_expr','./calc.py',111), - ('expression -> expression PLUS expression','expression',3,'p_expression_binop','./calc.py',116), - ('expression -> expression MINUS expression','expression',3,'p_expression_binop','./calc.py',117), - ('expression -> expression TIMES expression','expression',3,'p_expression_binop','./calc.py',118), - ('expression -> expression DIVIDE expression','expression',3,'p_expression_binop','./calc.py',119), - ('expression -> expression EXP expression','expression',3,'p_expression_binop','./calc.py',120), - ('expression -> MINUS expression','expression',2,'p_expression_uminus','./calc.py',130), - ('expression -> LPAREN expression RPAREN','expression',3,'p_expression_group','./calc.py',134), - ('expression -> NUMBER','expression',1,'p_expression_number','./calc.py',138), - ('expression -> NAME','expression',1,'p_expression_name','./calc.py',142), -] diff --git a/ply/example/cleanup.sh b/ply/example/cleanup.sh deleted file mode 100755 index 3e115f4..0000000 --- a/ply/example/cleanup.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -rm -f */*.pyc */parsetab.py */parser.out */*~ */*.class diff --git a/ply/example/closurecalc/calc.py b/ply/example/closurecalc/calc.py deleted file mode 100644 index 6598f58..0000000 --- a/ply/example/closurecalc/calc.py +++ /dev/null @@ -1,130 +0,0 @@ -# ----------------------------------------------------------------------------- -# calc.py -# -# A calculator parser that makes use of closures. The function make_calculator() -# returns a function that accepts an input string and returns a result. All -# lexing rules, parsing rules, and internal state are held inside the function. -# ----------------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - -if sys.version_info[0] >= 3: - raw_input = input - -# Make a calculator function - -def make_calculator(): - import ply.lex as lex - import ply.yacc as yacc - - # ------- Internal calculator state - - variables = { } # Dictionary of stored variables - - # ------- Calculator tokenizing rules - - tokens = ( - 'NAME','NUMBER', - ) - - literals = ['=','+','-','*','/', '(',')'] - - t_ignore = " \t" - - t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - - def t_NUMBER(t): - r'\d+' - t.value = int(t.value) - return t - - def t_newline(t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - - def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - - # Build the lexer - lexer = lex.lex() - - # ------- Calculator parsing rules - - precedence = ( - ('left','+','-'), - ('left','*','/'), - ('right','UMINUS'), - ) - - def p_statement_assign(p): - 'statement : NAME "=" expression' - variables[p[1]] = p[3] - p[0] = None - - def p_statement_expr(p): - 'statement : expression' - p[0] = p[1] - - def p_expression_binop(p): - '''expression : expression '+' expression - | expression '-' expression - | expression '*' expression - | expression '/' expression''' - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] - - def p_expression_uminus(p): - "expression : '-' expression %prec UMINUS" - p[0] = -p[2] - - def p_expression_group(p): - "expression : '(' expression ')'" - p[0] = p[2] - - def p_expression_number(p): - "expression : NUMBER" - p[0] = p[1] - - def p_expression_name(p): - "expression : NAME" - try: - p[0] = variables[p[1]] - except LookupError: - print("Undefined name '%s'" % p[1]) - p[0] = 0 - - def p_error(p): - if p: - print("Syntax error at '%s'" % p.value) - else: - print("Syntax error at EOF") - - - # Build the parser - parser = yacc.yacc() - - # ------- Input function - - def input(text): - result = parser.parse(text,lexer=lexer) - return result - - return input - -# Make a calculator object and use it -calc = make_calculator() - -while True: - try: - s = raw_input("calc > ") - except EOFError: - break - r = calc(s) - if r: - print(r) - - diff --git a/ply/example/hedit/hedit.py b/ply/example/hedit/hedit.py deleted file mode 100644 index 2e80675..0000000 --- a/ply/example/hedit/hedit.py +++ /dev/null @@ -1,48 +0,0 @@ -# ----------------------------------------------------------------------------- -# hedit.py -# -# Paring of Fortran H Edit descriptions (Contributed by Pearu Peterson) -# -# These tokens can't be easily tokenized because they are of the following -# form: -# -# nHc1...cn -# -# where n is a positive integer and c1 ... cn are characters. -# -# This example shows how to modify the state of the lexer to parse -# such tokens -# ----------------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - - -tokens = ( - 'H_EDIT_DESCRIPTOR', - ) - -# Tokens -t_ignore = " \t\n" - -def t_H_EDIT_DESCRIPTOR(t): - r"\d+H.*" # This grabs all of the remaining text - i = t.value.index('H') - n = eval(t.value[:i]) - - # Adjust the tokenizing position - t.lexer.lexpos -= len(t.value) - (i+1+n) - - t.value = t.value[i+1:i+1+n] - return t - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -import ply.lex as lex -lex.lex() -lex.runmain() - - diff --git a/ply/example/newclasscalc/calc.py b/ply/example/newclasscalc/calc.py deleted file mode 100755 index a12e498..0000000 --- a/ply/example/newclasscalc/calc.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python - -# ----------------------------------------------------------------------------- -# calc.py -# -# A simple calculator with variables. This is from O'Reilly's -# "Lex and Yacc", p. 63. -# -# Class-based example contributed to PLY by David McNab. -# -# Modified to use new-style classes. Test case. -# ----------------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - -if sys.version_info[0] >= 3: - raw_input = input - -import ply.lex as lex -import ply.yacc as yacc -import os - -class Parser(object): - """ - Base class for a lexer/parser that has the rules defined as methods - """ - tokens = () - precedence = () - - - def __init__(self, **kw): - self.debug = kw.get('debug', 0) - self.names = { } - try: - modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__ - except: - modname = "parser"+"_"+self.__class__.__name__ - self.debugfile = modname + ".dbg" - self.tabmodule = modname + "_" + "parsetab" - #print self.debugfile, self.tabmodule - - # Build the lexer and parser - lex.lex(module=self, debug=self.debug) - yacc.yacc(module=self, - debug=self.debug, - debugfile=self.debugfile, - tabmodule=self.tabmodule) - - def run(self): - while 1: - try: - s = raw_input('calc > ') - except EOFError: - break - if not s: continue - yacc.parse(s) - - -class Calc(Parser): - - tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','EXP', 'TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - - # Tokens - - t_PLUS = r'\+' - t_MINUS = r'-' - t_EXP = r'\*\*' - t_TIMES = r'\*' - t_DIVIDE = r'/' - t_EQUALS = r'=' - t_LPAREN = r'\(' - t_RPAREN = r'\)' - t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - - def t_NUMBER(self, t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - #print "parsed number %s" % repr(t.value) - return t - - t_ignore = " \t" - - def t_newline(self, t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - - def t_error(self, t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - - # Parsing rules - - precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('left', 'EXP'), - ('right','UMINUS'), - ) - - def p_statement_assign(self, p): - 'statement : NAME EQUALS expression' - self.names[p[1]] = p[3] - - def p_statement_expr(self, p): - 'statement : expression' - print(p[1]) - - def p_expression_binop(self, p): - """ - expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression - | expression EXP expression - """ - #print [repr(p[i]) for i in range(0,4)] - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] - elif p[2] == '**': p[0] = p[1] ** p[3] - - def p_expression_uminus(self, p): - 'expression : MINUS expression %prec UMINUS' - p[0] = -p[2] - - def p_expression_group(self, p): - 'expression : LPAREN expression RPAREN' - p[0] = p[2] - - def p_expression_number(self, p): - 'expression : NUMBER' - p[0] = p[1] - - def p_expression_name(self, p): - 'expression : NAME' - try: - p[0] = self.names[p[1]] - except LookupError: - print("Undefined name '%s'" % p[1]) - p[0] = 0 - - def p_error(self, p): - if p: - print("Syntax error at '%s'" % p.value) - else: - print("Syntax error at EOF") - -if __name__ == '__main__': - calc = Calc() - calc.run() diff --git a/ply/example/optcalc/README b/ply/example/optcalc/README deleted file mode 100644 index 53dd5fc..0000000 --- a/ply/example/optcalc/README +++ /dev/null @@ -1,9 +0,0 @@ -An example showing how to use Python optimized mode. -To run: - - - First run 'python calc.py' - - - Then run 'python -OO calc.py' - -If working correctly, the second version should run the -same way. diff --git a/ply/example/optcalc/calc.py b/ply/example/optcalc/calc.py deleted file mode 100644 index dd83351..0000000 --- a/ply/example/optcalc/calc.py +++ /dev/null @@ -1,119 +0,0 @@ -# ----------------------------------------------------------------------------- -# calc.py -# -# A simple calculator with variables. This is from O'Reilly's -# "Lex and Yacc", p. 63. -# ----------------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - -if sys.version_info[0] >= 3: - raw_input = input - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - return t - -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -import ply.lex as lex -lex.lex(optimize=1) - -# Parsing rules - -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - elif t[2] == '<': t[0] = t[1] < t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - if t: - print("Syntax error at '%s'" % t.value) - else: - print("Syntax error at EOF") - -import ply.yacc as yacc -yacc.yacc(optimize=1) - -while 1: - try: - s = raw_input('calc > ') - except EOFError: - break - yacc.parse(s) - diff --git a/ply/example/unicalc/calc.py b/ply/example/unicalc/calc.py deleted file mode 100644 index 55fb48d..0000000 --- a/ply/example/unicalc/calc.py +++ /dev/null @@ -1,117 +0,0 @@ -# ----------------------------------------------------------------------------- -# calc.py -# -# A simple calculator with variables. This is from O'Reilly's -# "Lex and Yacc", p. 63. -# -# This example uses unicode strings for tokens, docstrings, and input. -# ----------------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = ur'\+' -t_MINUS = ur'-' -t_TIMES = ur'\*' -t_DIVIDE = ur'/' -t_EQUALS = ur'=' -t_LPAREN = ur'\(' -t_RPAREN = ur'\)' -t_NAME = ur'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - ur'\d+' - try: - t.value = int(t.value) - except ValueError: - print "Integer value too large", t.value - t.value = 0 - return t - -t_ignore = u" \t" - -def t_newline(t): - ur'\n+' - t.lexer.lineno += t.value.count("\n") - -def t_error(t): - print "Illegal character '%s'" % t.value[0] - t.lexer.skip(1) - -# Build the lexer -import ply.lex as lex -lex.lex() - -# Parsing rules - -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(p): - 'statement : NAME EQUALS expression' - names[p[1]] = p[3] - -def p_statement_expr(p): - 'statement : expression' - print p[1] - -def p_expression_binop(p): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if p[2] == u'+' : p[0] = p[1] + p[3] - elif p[2] == u'-': p[0] = p[1] - p[3] - elif p[2] == u'*': p[0] = p[1] * p[3] - elif p[2] == u'/': p[0] = p[1] / p[3] - -def p_expression_uminus(p): - 'expression : MINUS expression %prec UMINUS' - p[0] = -p[2] - -def p_expression_group(p): - 'expression : LPAREN expression RPAREN' - p[0] = p[2] - -def p_expression_number(p): - 'expression : NUMBER' - p[0] = p[1] - -def p_expression_name(p): - 'expression : NAME' - try: - p[0] = names[p[1]] - except LookupError: - print "Undefined name '%s'" % p[1] - p[0] = 0 - -def p_error(p): - if p: - print "Syntax error at '%s'" % p.value - else: - print "Syntax error at EOF" - -import ply.yacc as yacc -yacc.yacc() - -while 1: - try: - s = raw_input('calc > ') - except EOFError: - break - if not s: continue - yacc.parse(unicode(s)) diff --git a/ply/example/yply/README b/ply/example/yply/README deleted file mode 100644 index bfadf36..0000000 --- a/ply/example/yply/README +++ /dev/null @@ -1,41 +0,0 @@ -yply.py - -This example implements a program yply.py that converts a UNIX-yacc -specification file into a PLY-compatible program. To use, simply -run it like this: - - % python yply.py [-nocode] inputfile.y >myparser.py - -The output of this program is Python code. In the output, -any C code in the original file is included, but is commented out. -If you use the -nocode option, then all of the C code in the -original file is just discarded. - -To use the resulting grammer with PLY, you'll need to edit the -myparser.py file. Within this file, some stub code is included that -can be used to test the construction of the parsing tables. However, -you'll need to do more editing to make a workable parser. - -Disclaimer: This just an example I threw together in an afternoon. -It might have some bugs. However, it worked when I tried it on -a yacc-specified C++ parser containing 442 rules and 855 parsing -states. - -Comments: - -1. This example does not parse specification files meant for lex/flex. - You'll need to specify the tokenizer on your own. - -2. This example shows a number of interesting PLY features including - - - Parsing of literal text delimited by nested parentheses - - Some interaction between the parser and the lexer. - - Use of literals in the grammar specification - - One pass compilation. The program just emits the result, - there is no intermediate parse tree. - -3. This program could probably be cleaned up and enhanced a lot. - It would be great if someone wanted to work on this (hint). - --Dave - diff --git a/ply/example/yply/ylex.py b/ply/example/yply/ylex.py deleted file mode 100644 index 84f2f7a..0000000 --- a/ply/example/yply/ylex.py +++ /dev/null @@ -1,112 +0,0 @@ -# lexer for yacc-grammars -# -# Author: David Beazley (dave@dabeaz.com) -# Date : October 2, 2006 - -import sys -sys.path.append("../..") - -from ply import * - -tokens = ( - 'LITERAL','SECTION','TOKEN','LEFT','RIGHT','PREC','START','TYPE','NONASSOC','UNION','CODE', - 'ID','QLITERAL','NUMBER', -) - -states = (('code','exclusive'),) - -literals = [ ';', ',', '<', '>', '|',':' ] -t_ignore = ' \t' - -t_TOKEN = r'%token' -t_LEFT = r'%left' -t_RIGHT = r'%right' -t_NONASSOC = r'%nonassoc' -t_PREC = r'%prec' -t_START = r'%start' -t_TYPE = r'%type' -t_UNION = r'%union' -t_ID = r'[a-zA-Z_][a-zA-Z_0-9]*' -t_QLITERAL = r'''(?P ['"]).*?(?P=quote)''' -t_NUMBER = r'\d+' - -def t_SECTION(t): - r'%%' - if getattr(t.lexer,"lastsection",0): - t.value = t.lexer.lexdata[t.lexpos+2:] - t.lexer.lexpos = len(t.lexer.lexdata) - else: - t.lexer.lastsection = 0 - return t - -# Comments -def t_ccomment(t): - r'/\*(.|\n)*?\*/' - t.lexer.lineno += t.value.count('\n') - -t_ignore_cppcomment = r'//.*' - -def t_LITERAL(t): - r'%\{(.|\n)*?%\}' - t.lexer.lineno += t.value.count("\n") - return t - -def t_NEWLINE(t): - r'\n' - t.lexer.lineno += 1 - -def t_code(t): - r'\{' - t.lexer.codestart = t.lexpos - t.lexer.level = 1 - t.lexer.begin('code') - -def t_code_ignore_string(t): - r'\"([^\\\n]|(\\.))*?\"' - -def t_code_ignore_char(t): - r'\'([^\\\n]|(\\.))*?\'' - -def t_code_ignore_comment(t): - r'/\*(.|\n)*?\*/' - -def t_code_ignore_cppcom(t): - r'//.*' - -def t_code_lbrace(t): - r'\{' - t.lexer.level += 1 - -def t_code_rbrace(t): - r'\}' - t.lexer.level -= 1 - if t.lexer.level == 0: - t.type = 'CODE' - t.value = t.lexer.lexdata[t.lexer.codestart:t.lexpos+1] - t.lexer.begin('INITIAL') - t.lexer.lineno += t.value.count('\n') - return t - -t_code_ignore_nonspace = r'[^\s\}\'\"\{]+' -t_code_ignore_whitespace = r'\s+' -t_code_ignore = "" - -def t_code_error(t): - raise RuntimeError - -def t_error(t): - print "%d: Illegal character '%s'" % (t.lexer.lineno, t.value[0]) - print t.value - t.lexer.skip(1) - -lex.lex() - -if __name__ == '__main__': - lex.runmain() - - - - - - - diff --git a/ply/example/yply/yparse.py b/ply/example/yply/yparse.py deleted file mode 100644 index ab5b884..0000000 --- a/ply/example/yply/yparse.py +++ /dev/null @@ -1,217 +0,0 @@ -# parser for Unix yacc-based grammars -# -# Author: David Beazley (dave@dabeaz.com) -# Date : October 2, 2006 - -import ylex -tokens = ylex.tokens - -from ply import * - -tokenlist = [] -preclist = [] - -emit_code = 1 - -def p_yacc(p): - '''yacc : defsection rulesection''' - -def p_defsection(p): - '''defsection : definitions SECTION - | SECTION''' - p.lexer.lastsection = 1 - print "tokens = ", repr(tokenlist) - print - print "precedence = ", repr(preclist) - print - print "# -------------- RULES ----------------" - print - -def p_rulesection(p): - '''rulesection : rules SECTION''' - - print "# -------------- RULES END ----------------" - print_code(p[2],0) - -def p_definitions(p): - '''definitions : definitions definition - | definition''' - -def p_definition_literal(p): - '''definition : LITERAL''' - print_code(p[1],0) - -def p_definition_start(p): - '''definition : START ID''' - print "start = '%s'" % p[2] - -def p_definition_token(p): - '''definition : toktype opttype idlist optsemi ''' - for i in p[3]: - if i[0] not in "'\"": - tokenlist.append(i) - if p[1] == '%left': - preclist.append(('left',) + tuple(p[3])) - elif p[1] == '%right': - preclist.append(('right',) + tuple(p[3])) - elif p[1] == '%nonassoc': - preclist.append(('nonassoc',)+ tuple(p[3])) - -def p_toktype(p): - '''toktype : TOKEN - | LEFT - | RIGHT - | NONASSOC''' - p[0] = p[1] - -def p_opttype(p): - '''opttype : '<' ID '>' - | empty''' - -def p_idlist(p): - '''idlist : idlist optcomma tokenid - | tokenid''' - if len(p) == 2: - p[0] = [p[1]] - else: - p[0] = p[1] - p[1].append(p[3]) - -def p_tokenid(p): - '''tokenid : ID - | ID NUMBER - | QLITERAL - | QLITERAL NUMBER''' - p[0] = p[1] - -def p_optsemi(p): - '''optsemi : ';' - | empty''' - -def p_optcomma(p): - '''optcomma : ',' - | empty''' - -def p_definition_type(p): - '''definition : TYPE '<' ID '>' namelist optsemi''' - # type declarations are ignored - -def p_namelist(p): - '''namelist : namelist optcomma ID - | ID''' - -def p_definition_union(p): - '''definition : UNION CODE optsemi''' - # Union declarations are ignored - -def p_rules(p): - '''rules : rules rule - | rule''' - if len(p) == 2: - rule = p[1] - else: - rule = p[2] - - # Print out a Python equivalent of this rule - - embedded = [ ] # Embedded actions (a mess) - embed_count = 0 - - rulename = rule[0] - rulecount = 1 - for r in rule[1]: - # r contains one of the rule possibilities - print "def p_%s_%d(p):" % (rulename,rulecount) - prod = [] - prodcode = "" - for i in range(len(r)): - item = r[i] - if item[0] == '{': # A code block - if i == len(r) - 1: - prodcode = item - break - else: - # an embedded action - embed_name = "_embed%d_%s" % (embed_count,rulename) - prod.append(embed_name) - embedded.append((embed_name,item)) - embed_count += 1 - else: - prod.append(item) - print " '''%s : %s'''" % (rulename, " ".join(prod)) - # Emit code - print_code(prodcode,4) - print - rulecount += 1 - - for e,code in embedded: - print "def p_%s(p):" % e - print " '''%s : '''" % e - print_code(code,4) - print - -def p_rule(p): - '''rule : ID ':' rulelist ';' ''' - p[0] = (p[1],[p[3]]) - -def p_rule2(p): - '''rule : ID ':' rulelist morerules ';' ''' - p[4].insert(0,p[3]) - p[0] = (p[1],p[4]) - -def p_rule_empty(p): - '''rule : ID ':' ';' ''' - p[0] = (p[1],[[]]) - -def p_rule_empty2(p): - '''rule : ID ':' morerules ';' ''' - - p[3].insert(0,[]) - p[0] = (p[1],p[3]) - -def p_morerules(p): - '''morerules : morerules '|' rulelist - | '|' rulelist - | '|' ''' - - if len(p) == 2: - p[0] = [[]] - elif len(p) == 3: - p[0] = [p[2]] - else: - p[0] = p[1] - p[0].append(p[3]) - -# print "morerules", len(p), p[0] - -def p_rulelist(p): - '''rulelist : rulelist ruleitem - | ruleitem''' - - if len(p) == 2: - p[0] = [p[1]] - else: - p[0] = p[1] - p[1].append(p[2]) - -def p_ruleitem(p): - '''ruleitem : ID - | QLITERAL - | CODE - | PREC''' - p[0] = p[1] - -def p_empty(p): - '''empty : ''' - -def p_error(p): - pass - -yacc.yacc(debug=0) - -def print_code(code,indent): - if not emit_code: return - codelines = code.splitlines() - for c in codelines: - print "%s# %s" % (" "*indent,c) - diff --git a/ply/example/yply/yply.py b/ply/example/yply/yply.py deleted file mode 100755 index a439817..0000000 --- a/ply/example/yply/yply.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/local/bin/python -# yply.py -# -# Author: David Beazley (dave@dabeaz.com) -# Date : October 2, 2006 -# -# Converts a UNIX-yacc specification file into a PLY-compatible -# specification. To use, simply do this: -# -# % python yply.py [-nocode] inputfile.y >myparser.py -# -# The output of this program is Python code. In the output, -# any C code in the original file is included, but is commented. -# If you use the -nocode option, then all of the C code in the -# original file is discarded. -# -# Disclaimer: This just an example I threw together in an afternoon. -# It might have some bugs. However, it worked when I tried it on -# a yacc-specified C++ parser containing 442 rules and 855 parsing -# states. -# - -import sys -sys.path.insert(0,"../..") - -import ylex -import yparse - -from ply import * - -if len(sys.argv) == 1: - print "usage : yply.py [-nocode] inputfile" - raise SystemExit - -if len(sys.argv) == 3: - if sys.argv[1] == '-nocode': - yparse.emit_code = 0 - else: - print "Unknown option '%s'" % sys.argv[1] - raise SystemExit - filename = sys.argv[2] -else: - filename = sys.argv[1] - -yacc.parse(open(filename).read()) - -print """ -if __name__ == '__main__': - from ply import * - yacc.yacc() -""" - - diff --git a/ply/ply/__init__.py b/ply/ply/__init__.py deleted file mode 100644 index 853a985..0000000 --- a/ply/ply/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# PLY package -# Author: David Beazley (dave@dabeaz.com) - -__all__ = ['lex','yacc'] diff --git a/ply/ply/cpp.py b/ply/ply/cpp.py deleted file mode 100644 index 5cad682..0000000 --- a/ply/ply/cpp.py +++ /dev/null @@ -1,898 +0,0 @@ -# ----------------------------------------------------------------------------- -# cpp.py -# -# Author: David Beazley (http://www.dabeaz.com) -# Copyright (C) 2007 -# All rights reserved -# -# This module implements an ANSI-C style lexical preprocessor for PLY. -# ----------------------------------------------------------------------------- -from __future__ import generators - -# ----------------------------------------------------------------------------- -# Default preprocessor lexer definitions. These tokens are enough to get -# a basic preprocessor working. Other modules may import these if they want -# ----------------------------------------------------------------------------- - -tokens = ( - 'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT', 'CPP_POUND','CPP_DPOUND' -) - -literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\"" - -# Whitespace -def t_CPP_WS(t): - r'\s+' - t.lexer.lineno += t.value.count("\n") - return t - -t_CPP_POUND = r'\#' -t_CPP_DPOUND = r'\#\#' - -# Identifier -t_CPP_ID = r'[A-Za-z_][\w_]*' - -# Integer literal -def CPP_INTEGER(t): - r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU]|[lL]|[uU][lL]|[lL][uU])?)' - return t - -t_CPP_INTEGER = CPP_INTEGER - -# Floating literal -t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' - -# String literal -def t_CPP_STRING(t): - r'\"([^\\\n]|(\\(.|\n)))*?\"' - t.lexer.lineno += t.value.count("\n") - return t - -# Character constant 'c' or L'c' -def t_CPP_CHAR(t): - r'(L)?\'([^\\\n]|(\\(.|\n)))*?\'' - t.lexer.lineno += t.value.count("\n") - return t - -# Comment -def t_CPP_COMMENT(t): - r'(/\*(.|\n)*?\*/)|(//.*?\n)' - t.lexer.lineno += t.value.count("\n") - return t - -def t_error(t): - t.type = t.value[0] - t.value = t.value[0] - t.lexer.skip(1) - return t - -import re -import copy -import time -import os.path - -# ----------------------------------------------------------------------------- -# trigraph() -# -# Given an input string, this function replaces all trigraph sequences. -# The following mapping is used: -# -# ??= # -# ??/ \ -# ??' ^ -# ??( [ -# ??) ] -# ??! | -# ??< { -# ??> } -# ??- ~ -# ----------------------------------------------------------------------------- - -_trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''') -_trigraph_rep = { - '=':'#', - '/':'\\', - "'":'^', - '(':'[', - ')':']', - '!':'|', - '<':'{', - '>':'}', - '-':'~' -} - -def trigraph(input): - return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input) - -# ------------------------------------------------------------------ -# Macro object -# -# This object holds information about preprocessor macros -# -# .name - Macro name (string) -# .value - Macro value (a list of tokens) -# .arglist - List of argument names -# .variadic - Boolean indicating whether or not variadic macro -# .vararg - Name of the variadic parameter -# -# When a macro is created, the macro replacement token sequence is -# pre-scanned and used to create patch lists that are later used -# during macro expansion -# ------------------------------------------------------------------ - -class Macro(object): - def __init__(self,name,value,arglist=None,variadic=False): - self.name = name - self.value = value - self.arglist = arglist - self.variadic = variadic - if variadic: - self.vararg = arglist[-1] - self.source = None - -# ------------------------------------------------------------------ -# Preprocessor object -# -# Object representing a preprocessor. Contains macro definitions, -# include directories, and other information -# ------------------------------------------------------------------ - -class Preprocessor(object): - def __init__(self,lexer=None): - if lexer is None: - lexer = lex.lexer - self.lexer = lexer - self.macros = { } - self.path = [] - self.temp_path = [] - - # Probe the lexer for selected tokens - self.lexprobe() - - tm = time.localtime() - self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm)) - self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm)) - self.parser = None - - # ----------------------------------------------------------------------------- - # tokenize() - # - # Utility function. Given a string of text, tokenize into a list of tokens - # ----------------------------------------------------------------------------- - - def tokenize(self,text): - tokens = [] - self.lexer.input(text) - while True: - tok = self.lexer.token() - if not tok: break - tokens.append(tok) - return tokens - - # --------------------------------------------------------------------- - # error() - # - # Report a preprocessor error/warning of some kind - # ---------------------------------------------------------------------- - - def error(self,file,line,msg): - print("%s:%d %s" % (file,line,msg)) - - # ---------------------------------------------------------------------- - # lexprobe() - # - # This method probes the preprocessor lexer object to discover - # the token types of symbols that are important to the preprocessor. - # If this works right, the preprocessor will simply "work" - # with any suitable lexer regardless of how tokens have been named. - # ---------------------------------------------------------------------- - - def lexprobe(self): - - # Determine the token type for identifiers - self.lexer.input("identifier") - tok = self.lexer.token() - if not tok or tok.value != "identifier": - print("Couldn't determine identifier type") - else: - self.t_ID = tok.type - - # Determine the token type for integers - self.lexer.input("12345") - tok = self.lexer.token() - if not tok or int(tok.value) != 12345: - print("Couldn't determine integer type") - else: - self.t_INTEGER = tok.type - self.t_INTEGER_TYPE = type(tok.value) - - # Determine the token type for strings enclosed in double quotes - self.lexer.input("\"filename\"") - tok = self.lexer.token() - if not tok or tok.value != "\"filename\"": - print("Couldn't determine string type") - else: - self.t_STRING = tok.type - - # Determine the token type for whitespace--if any - self.lexer.input(" ") - tok = self.lexer.token() - if not tok or tok.value != " ": - self.t_SPACE = None - else: - self.t_SPACE = tok.type - - # Determine the token type for newlines - self.lexer.input("\n") - tok = self.lexer.token() - if not tok or tok.value != "\n": - self.t_NEWLINE = None - print("Couldn't determine token for newlines") - else: - self.t_NEWLINE = tok.type - - self.t_WS = (self.t_SPACE, self.t_NEWLINE) - - # Check for other characters used by the preprocessor - chars = [ '<','>','#','##','\\','(',')',',','.'] - for c in chars: - self.lexer.input(c) - tok = self.lexer.token() - if not tok or tok.value != c: - print("Unable to lex '%s' required for preprocessor" % c) - - # ---------------------------------------------------------------------- - # add_path() - # - # Adds a search path to the preprocessor. - # ---------------------------------------------------------------------- - - def add_path(self,path): - self.path.append(path) - - # ---------------------------------------------------------------------- - # group_lines() - # - # Given an input string, this function splits it into lines. Trailing whitespace - # is removed. Any line ending with \ is grouped with the next line. This - # function forms the lowest level of the preprocessor---grouping into text into - # a line-by-line format. - # ---------------------------------------------------------------------- - - def group_lines(self,input): - lex = self.lexer.clone() - lines = [x.rstrip() for x in input.splitlines()] - for i in xrange(len(lines)): - j = i+1 - while lines[i].endswith('\\') and (j < len(lines)): - lines[i] = lines[i][:-1]+lines[j] - lines[j] = "" - j += 1 - - input = "\n".join(lines) - lex.input(input) - lex.lineno = 1 - - current_line = [] - while True: - tok = lex.token() - if not tok: - break - current_line.append(tok) - if tok.type in self.t_WS and '\n' in tok.value: - yield current_line - current_line = [] - - if current_line: - yield current_line - - # ---------------------------------------------------------------------- - # tokenstrip() - # - # Remove leading/trailing whitespace tokens from a token list - # ---------------------------------------------------------------------- - - def tokenstrip(self,tokens): - i = 0 - while i < len(tokens) and tokens[i].type in self.t_WS: - i += 1 - del tokens[:i] - i = len(tokens)-1 - while i >= 0 and tokens[i].type in self.t_WS: - i -= 1 - del tokens[i+1:] - return tokens - - - # ---------------------------------------------------------------------- - # collect_args() - # - # Collects comma separated arguments from a list of tokens. The arguments - # must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions) - # where tokencount is the number of tokens consumed, args is a list of arguments, - # and positions is a list of integers containing the starting index of each - # argument. Each argument is represented by a list of tokens. - # - # When collecting arguments, leading and trailing whitespace is removed - # from each argument. - # - # This function properly handles nested parenthesis and commas---these do not - # define new arguments. - # ---------------------------------------------------------------------- - - def collect_args(self,tokenlist): - args = [] - positions = [] - current_arg = [] - nesting = 1 - tokenlen = len(tokenlist) - - # Search for the opening '('. - i = 0 - while (i < tokenlen) and (tokenlist[i].type in self.t_WS): - i += 1 - - if (i < tokenlen) and (tokenlist[i].value == '('): - positions.append(i+1) - else: - self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments") - return 0, [], [] - - i += 1 - - while i < tokenlen: - t = tokenlist[i] - if t.value == '(': - current_arg.append(t) - nesting += 1 - elif t.value == ')': - nesting -= 1 - if nesting == 0: - if current_arg: - args.append(self.tokenstrip(current_arg)) - positions.append(i) - return i+1,args,positions - current_arg.append(t) - elif t.value == ',' and nesting == 1: - args.append(self.tokenstrip(current_arg)) - positions.append(i+1) - current_arg = [] - else: - current_arg.append(t) - i += 1 - - # Missing end argument - self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments") - return 0, [],[] - - # ---------------------------------------------------------------------- - # macro_prescan() - # - # Examine the macro value (token sequence) and identify patch points - # This is used to speed up macro expansion later on---we'll know - # right away where to apply patches to the value to form the expansion - # ---------------------------------------------------------------------- - - def macro_prescan(self,macro): - macro.patch = [] # Standard macro arguments - macro.str_patch = [] # String conversion expansion - macro.var_comma_patch = [] # Variadic macro comma patch - i = 0 - while i < len(macro.value): - if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist: - argnum = macro.arglist.index(macro.value[i].value) - # Conversion of argument to a string - if i > 0 and macro.value[i-1].value == '#': - macro.value[i] = copy.copy(macro.value[i]) - macro.value[i].type = self.t_STRING - del macro.value[i-1] - macro.str_patch.append((argnum,i-1)) - continue - # Concatenation - elif (i > 0 and macro.value[i-1].value == '##'): - macro.patch.append(('c',argnum,i-1)) - del macro.value[i-1] - continue - elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'): - macro.patch.append(('c',argnum,i)) - i += 1 - continue - # Standard expansion - else: - macro.patch.append(('e',argnum,i)) - elif macro.value[i].value == '##': - if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \ - ((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \ - (macro.value[i+1].value == macro.vararg): - macro.var_comma_patch.append(i-1) - i += 1 - macro.patch.sort(key=lambda x: x[2],reverse=True) - - # ---------------------------------------------------------------------- - # macro_expand_args() - # - # Given a Macro and list of arguments (each a token list), this method - # returns an expanded version of a macro. The return value is a token sequence - # representing the replacement macro tokens - # ---------------------------------------------------------------------- - - def macro_expand_args(self,macro,args): - # Make a copy of the macro token sequence - rep = [copy.copy(_x) for _x in macro.value] - - # Make string expansion patches. These do not alter the length of the replacement sequence - - str_expansion = {} - for argnum, i in macro.str_patch: - if argnum not in str_expansion: - str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\") - rep[i] = copy.copy(rep[i]) - rep[i].value = str_expansion[argnum] - - # Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid - comma_patch = False - if macro.variadic and not args[-1]: - for i in macro.var_comma_patch: - rep[i] = None - comma_patch = True - - # Make all other patches. The order of these matters. It is assumed that the patch list - # has been sorted in reverse order of patch location since replacements will cause the - # size of the replacement sequence to expand from the patch point. - - expanded = { } - for ptype, argnum, i in macro.patch: - # Concatenation. Argument is left unexpanded - if ptype == 'c': - rep[i:i+1] = args[argnum] - # Normal expansion. Argument is macro expanded first - elif ptype == 'e': - if argnum not in expanded: - expanded[argnum] = self.expand_macros(args[argnum]) - rep[i:i+1] = expanded[argnum] - - # Get rid of removed comma if necessary - if comma_patch: - rep = [_i for _i in rep if _i] - - return rep - - - # ---------------------------------------------------------------------- - # expand_macros() - # - # Given a list of tokens, this function performs macro expansion. - # The expanded argument is a dictionary that contains macros already - # expanded. This is used to prevent infinite recursion. - # ---------------------------------------------------------------------- - - def expand_macros(self,tokens,expanded=None): - if expanded is None: - expanded = {} - i = 0 - while i < len(tokens): - t = tokens[i] - if t.type == self.t_ID: - if t.value in self.macros and t.value not in expanded: - # Yes, we found a macro match - expanded[t.value] = True - - m = self.macros[t.value] - if not m.arglist: - # A simple macro - ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded) - for e in ex: - e.lineno = t.lineno - tokens[i:i+1] = ex - i += len(ex) - else: - # A macro with arguments - j = i + 1 - while j < len(tokens) and tokens[j].type in self.t_WS: - j += 1 - if tokens[j].value == '(': - tokcount,args,positions = self.collect_args(tokens[j:]) - if not m.variadic and len(args) != len(m.arglist): - self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist))) - i = j + tokcount - elif m.variadic and len(args) < len(m.arglist)-1: - if len(m.arglist) > 2: - self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1)) - else: - self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1)) - i = j + tokcount - else: - if m.variadic: - if len(args) == len(m.arglist)-1: - args.append([]) - else: - args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1] - del args[len(m.arglist):] - - # Get macro replacement text - rep = self.macro_expand_args(m,args) - rep = self.expand_macros(rep,expanded) - for r in rep: - r.lineno = t.lineno - tokens[i:j+tokcount] = rep - i += len(rep) - del expanded[t.value] - continue - elif t.value == '__LINE__': - t.type = self.t_INTEGER - t.value = self.t_INTEGER_TYPE(t.lineno) - - i += 1 - return tokens - - # ---------------------------------------------------------------------- - # evalexpr() - # - # Evaluate an expression token sequence for the purposes of evaluating - # integral expressions. - # ---------------------------------------------------------------------- - - def evalexpr(self,tokens): - # tokens = tokenize(line) - # Search for defined macros - i = 0 - while i < len(tokens): - if tokens[i].type == self.t_ID and tokens[i].value == 'defined': - j = i + 1 - needparen = False - result = "0L" - while j < len(tokens): - if tokens[j].type in self.t_WS: - j += 1 - continue - elif tokens[j].type == self.t_ID: - if tokens[j].value in self.macros: - result = "1L" - else: - result = "0L" - if not needparen: break - elif tokens[j].value == '(': - needparen = True - elif tokens[j].value == ')': - break - else: - self.error(self.source,tokens[i].lineno,"Malformed defined()") - j += 1 - tokens[i].type = self.t_INTEGER - tokens[i].value = self.t_INTEGER_TYPE(result) - del tokens[i+1:j+1] - i += 1 - tokens = self.expand_macros(tokens) - for i,t in enumerate(tokens): - if t.type == self.t_ID: - tokens[i] = copy.copy(t) - tokens[i].type = self.t_INTEGER - tokens[i].value = self.t_INTEGER_TYPE("0L") - elif t.type == self.t_INTEGER: - tokens[i] = copy.copy(t) - # Strip off any trailing suffixes - tokens[i].value = str(tokens[i].value) - while tokens[i].value[-1] not in "0123456789abcdefABCDEF": - tokens[i].value = tokens[i].value[:-1] - - expr = "".join([str(x.value) for x in tokens]) - expr = expr.replace("&&"," and ") - expr = expr.replace("||"," or ") - expr = expr.replace("!"," not ") - try: - result = eval(expr) - except StandardError: - self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression") - result = 0 - return result - - # ---------------------------------------------------------------------- - # parsegen() - # - # Parse an input string/ - # ---------------------------------------------------------------------- - def parsegen(self,input,source=None): - - # Replace trigraph sequences - t = trigraph(input) - lines = self.group_lines(t) - - if not source: - source = "" - - self.define("__FILE__ \"%s\"" % source) - - self.source = source - chunk = [] - enable = True - iftrigger = False - ifstack = [] - - for x in lines: - for i,tok in enumerate(x): - if tok.type not in self.t_WS: break - if tok.value == '#': - # Preprocessor directive - - for tok in x: - if tok in self.t_WS and '\n' in tok.value: - chunk.append(tok) - - dirtokens = self.tokenstrip(x[i+1:]) - if dirtokens: - name = dirtokens[0].value - args = self.tokenstrip(dirtokens[1:]) - else: - name = "" - args = [] - - if name == 'define': - if enable: - for tok in self.expand_macros(chunk): - yield tok - chunk = [] - self.define(args) - elif name == 'include': - if enable: - for tok in self.expand_macros(chunk): - yield tok - chunk = [] - oldfile = self.macros['__FILE__'] - for tok in self.include(args): - yield tok - self.macros['__FILE__'] = oldfile - self.source = source - elif name == 'undef': - if enable: - for tok in self.expand_macros(chunk): - yield tok - chunk = [] - self.undef(args) - elif name == 'ifdef': - ifstack.append((enable,iftrigger)) - if enable: - if not args[0].value in self.macros: - enable = False - iftrigger = False - else: - iftrigger = True - elif name == 'ifndef': - ifstack.append((enable,iftrigger)) - if enable: - if args[0].value in self.macros: - enable = False - iftrigger = False - else: - iftrigger = True - elif name == 'if': - ifstack.append((enable,iftrigger)) - if enable: - result = self.evalexpr(args) - if not result: - enable = False - iftrigger = False - else: - iftrigger = True - elif name == 'elif': - if ifstack: - if ifstack[-1][0]: # We only pay attention if outer "if" allows this - if enable: # If already true, we flip enable False - enable = False - elif not iftrigger: # If False, but not triggered yet, we'll check expression - result = self.evalexpr(args) - if result: - enable = True - iftrigger = True - else: - self.error(self.source,dirtokens[0].lineno,"Misplaced #elif") - - elif name == 'else': - if ifstack: - if ifstack[-1][0]: - if enable: - enable = False - elif not iftrigger: - enable = True - iftrigger = True - else: - self.error(self.source,dirtokens[0].lineno,"Misplaced #else") - - elif name == 'endif': - if ifstack: - enable,iftrigger = ifstack.pop() - else: - self.error(self.source,dirtokens[0].lineno,"Misplaced #endif") - else: - # Unknown preprocessor directive - pass - - else: - # Normal text - if enable: - chunk.extend(x) - - for tok in self.expand_macros(chunk): - yield tok - chunk = [] - - # ---------------------------------------------------------------------- - # include() - # - # Implementation of file-inclusion - # ---------------------------------------------------------------------- - - def include(self,tokens): - # Try to extract the filename and then process an include file - if not tokens: - return - if tokens: - if tokens[0].value != '<' and tokens[0].type != self.t_STRING: - tokens = self.expand_macros(tokens) - - if tokens[0].value == '<': - # Include <...> - i = 1 - while i < len(tokens): - if tokens[i].value == '>': - break - i += 1 - else: - print("Malformed #include <...>") - return - filename = "".join([x.value for x in tokens[1:i]]) - path = self.path + [""] + self.temp_path - elif tokens[0].type == self.t_STRING: - filename = tokens[0].value[1:-1] - path = self.temp_path + [""] + self.path - else: - print("Malformed #include statement") - return - for p in path: - iname = os.path.join(p,filename) - try: - data = open(iname,"r").read() - dname = os.path.dirname(iname) - if dname: - self.temp_path.insert(0,dname) - for tok in self.parsegen(data,filename): - yield tok - if dname: - del self.temp_path[0] - break - except IOError: - pass - else: - print("Couldn't find '%s'" % filename) - - # ---------------------------------------------------------------------- - # define() - # - # Define a new macro - # ---------------------------------------------------------------------- - - def define(self,tokens): - if isinstance(tokens,(str,unicode)): - tokens = self.tokenize(tokens) - - linetok = tokens - try: - name = linetok[0] - if len(linetok) > 1: - mtype = linetok[1] - else: - mtype = None - if not mtype: - m = Macro(name.value,[]) - self.macros[name.value] = m - elif mtype.type in self.t_WS: - # A normal macro - m = Macro(name.value,self.tokenstrip(linetok[2:])) - self.macros[name.value] = m - elif mtype.value == '(': - # A macro with arguments - tokcount, args, positions = self.collect_args(linetok[1:]) - variadic = False - for a in args: - if variadic: - print("No more arguments may follow a variadic argument") - break - astr = "".join([str(_i.value) for _i in a]) - if astr == "...": - variadic = True - a[0].type = self.t_ID - a[0].value = '__VA_ARGS__' - variadic = True - del a[1:] - continue - elif astr[-3:] == "..." and a[0].type == self.t_ID: - variadic = True - del a[1:] - # If, for some reason, "." is part of the identifier, strip off the name for the purposes - # of macro expansion - if a[0].value[-3:] == '...': - a[0].value = a[0].value[:-3] - continue - if len(a) > 1 or a[0].type != self.t_ID: - print("Invalid macro argument") - break - else: - mvalue = self.tokenstrip(linetok[1+tokcount:]) - i = 0 - while i < len(mvalue): - if i+1 < len(mvalue): - if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##': - del mvalue[i] - continue - elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS: - del mvalue[i+1] - i += 1 - m = Macro(name.value,mvalue,[x[0].value for x in args],variadic) - self.macro_prescan(m) - self.macros[name.value] = m - else: - print("Bad macro definition") - except LookupError: - print("Bad macro definition") - - # ---------------------------------------------------------------------- - # undef() - # - # Undefine a macro - # ---------------------------------------------------------------------- - - def undef(self,tokens): - id = tokens[0].value - try: - del self.macros[id] - except LookupError: - pass - - # ---------------------------------------------------------------------- - # parse() - # - # Parse input text. - # ---------------------------------------------------------------------- - def parse(self,input,source=None,ignore={}): - self.ignore = ignore - self.parser = self.parsegen(input,source) - - # ---------------------------------------------------------------------- - # token() - # - # Method to return individual tokens - # ---------------------------------------------------------------------- - def token(self): - try: - while True: - tok = next(self.parser) - if tok.type not in self.ignore: return tok - except StopIteration: - self.parser = None - return None - -if __name__ == '__main__': - import ply.lex as lex - lexer = lex.lex() - - # Run a preprocessor - import sys - f = open(sys.argv[1]) - input = f.read() - - p = Preprocessor(lexer) - p.parse(input,sys.argv[1]) - while True: - tok = p.token() - if not tok: break - print(p.source, tok) - - - - - - - - - - - diff --git a/ply/ply/ctokens.py b/ply/ply/ctokens.py deleted file mode 100644 index dd5f102..0000000 --- a/ply/ply/ctokens.py +++ /dev/null @@ -1,133 +0,0 @@ -# ---------------------------------------------------------------------- -# ctokens.py -# -# Token specifications for symbols in ANSI C and C++. This file is -# meant to be used as a library in other tokenizers. -# ---------------------------------------------------------------------- - -# Reserved words - -tokens = [ - # Literals (identifier, integer constant, float constant, string constant, char const) - 'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST', - - # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) - 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', - 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', - 'LOR', 'LAND', 'LNOT', - 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', - - # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) - 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', - 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', - - # Increment/decrement (++,--) - 'PLUSPLUS', 'MINUSMINUS', - - # Structure dereference (->) - 'ARROW', - - # Ternary operator (?) - 'TERNARY', - - # Delimeters ( ) [ ] { } , . ; : - 'LPAREN', 'RPAREN', - 'LBRACKET', 'RBRACKET', - 'LBRACE', 'RBRACE', - 'COMMA', 'PERIOD', 'SEMI', 'COLON', - - # Ellipsis (...) - 'ELLIPSIS', -] - -# Operators -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_MODULO = r'%' -t_OR = r'\|' -t_AND = r'&' -t_NOT = r'~' -t_XOR = r'\^' -t_LSHIFT = r'<<' -t_RSHIFT = r'>>' -t_LOR = r'\|\|' -t_LAND = r'&&' -t_LNOT = r'!' -t_LT = r'<' -t_GT = r'>' -t_LE = r'<=' -t_GE = r'>=' -t_EQ = r'==' -t_NE = r'!=' - -# Assignment operators - -t_EQUALS = r'=' -t_TIMESEQUAL = r'\*=' -t_DIVEQUAL = r'/=' -t_MODEQUAL = r'%=' -t_PLUSEQUAL = r'\+=' -t_MINUSEQUAL = r'-=' -t_LSHIFTEQUAL = r'<<=' -t_RSHIFTEQUAL = r'>>=' -t_ANDEQUAL = r'&=' -t_OREQUAL = r'\|=' -t_XOREQUAL = r'^=' - -# Increment/decrement -t_INCREMENT = r'\+\+' -t_DECREMENT = r'--' - -# -> -t_ARROW = r'->' - -# ? -t_TERNARY = r'\?' - -# Delimeters -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_LBRACKET = r'\[' -t_RBRACKET = r'\]' -t_LBRACE = r'\{' -t_RBRACE = r'\}' -t_COMMA = r',' -t_PERIOD = r'\.' -t_SEMI = r';' -t_COLON = r':' -t_ELLIPSIS = r'\.\.\.' - -# Identifiers -t_ID = r'[A-Za-z_][A-Za-z0-9_]*' - -# Integer literal -t_INTEGER = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?' - -# Floating literal -t_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' - -# String literal -t_STRING = r'\"([^\\\n]|(\\.))*?\"' - -# Character constant 'c' or L'c' -t_CHARACTER = r'(L)?\'([^\\\n]|(\\.))*?\'' - -# Comment (C-Style) -def t_COMMENT(t): - r'/\*(.|\n)*?\*/' - t.lexer.lineno += t.value.count('\n') - return t - -# Comment (C++-Style) -def t_CPPCOMMENT(t): - r'//.*\n' - t.lexer.lineno += 1 - return t - - - - - - diff --git a/ply/ply/lex.py b/ply/ply/lex.py deleted file mode 100644 index bd32da9..0000000 --- a/ply/ply/lex.py +++ /dev/null @@ -1,1058 +0,0 @@ -# ----------------------------------------------------------------------------- -# ply: lex.py -# -# Copyright (C) 2001-2011, -# David M. Beazley (Dabeaz LLC) -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * Neither the name of the David Beazley or Dabeaz LLC may be used to -# endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ----------------------------------------------------------------------------- - -__version__ = "3.4" -__tabversion__ = "3.2" # Version of table file used - -import re, sys, types, copy, os - -# This tuple contains known string types -try: - # Python 2.6 - StringTypes = (types.StringType, types.UnicodeType) -except AttributeError: - # Python 3.0 - StringTypes = (str, bytes) - -# Extract the code attribute of a function. Different implementations -# are for Python 2/3 compatibility. - -if sys.version_info[0] < 3: - def func_code(f): - return f.func_code -else: - def func_code(f): - return f.__code__ - -# This regular expression is used to match valid token names -_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') - -# Exception thrown when invalid token encountered and no default error -# handler is defined. - -class LexError(Exception): - def __init__(self,message,s): - self.args = (message,) - self.text = s - -# Token class. This class is used to represent the tokens produced. -class LexToken(object): - def __str__(self): - return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos) - def __repr__(self): - return str(self) - -# This object is a stand-in for a logging object created by the -# logging module. - -class PlyLogger(object): - def __init__(self,f): - self.f = f - def critical(self,msg,*args,**kwargs): - self.f.write((msg % args) + "\n") - - def warning(self,msg,*args,**kwargs): - self.f.write("WARNING: "+ (msg % args) + "\n") - - def error(self,msg,*args,**kwargs): - self.f.write("ERROR: " + (msg % args) + "\n") - - info = critical - debug = critical - -# Null logger is used when no output is generated. Does nothing. -class NullLogger(object): - def __getattribute__(self,name): - return self - def __call__(self,*args,**kwargs): - return self - -# ----------------------------------------------------------------------------- -# === Lexing Engine === -# -# The following Lexer class implements the lexer runtime. There are only -# a few public methods and attributes: -# -# input() - Store a new string in the lexer -# token() - Get the next token -# clone() - Clone the lexer -# -# lineno - Current line number -# lexpos - Current position in the input string -# ----------------------------------------------------------------------------- - -class Lexer: - def __init__(self): - self.lexre = None # Master regular expression. This is a list of - # tuples (re,findex) where re is a compiled - # regular expression and findex is a list - # mapping regex group numbers to rules - self.lexretext = None # Current regular expression strings - self.lexstatere = {} # Dictionary mapping lexer states to master regexs - self.lexstateretext = {} # Dictionary mapping lexer states to regex strings - self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names - self.lexstate = "INITIAL" # Current lexer state - self.lexstatestack = [] # Stack of lexer states - self.lexstateinfo = None # State information - self.lexstateignore = {} # Dictionary of ignored characters for each state - self.lexstateerrorf = {} # Dictionary of error functions for each state - self.lexreflags = 0 # Optional re compile flags - self.lexdata = None # Actual input data (as a string) - self.lexpos = 0 # Current position in input text - self.lexlen = 0 # Length of the input text - self.lexerrorf = None # Error rule (if any) - self.lextokens = None # List of valid tokens - self.lexignore = "" # Ignored characters - self.lexliterals = "" # Literal characters that can be passed through - self.lexmodule = None # Module - self.lineno = 1 # Current line number - self.lexoptimize = 0 # Optimized mode - - def clone(self,object=None): - c = copy.copy(self) - - # If the object parameter has been supplied, it means we are attaching the - # lexer to a new object. In this case, we have to rebind all methods in - # the lexstatere and lexstateerrorf tables. - - if object: - newtab = { } - for key, ritem in self.lexstatere.items(): - newre = [] - for cre, findex in ritem: - newfindex = [] - for f in findex: - if not f or not f[0]: - newfindex.append(f) - continue - newfindex.append((getattr(object,f[0].__name__),f[1])) - newre.append((cre,newfindex)) - newtab[key] = newre - c.lexstatere = newtab - c.lexstateerrorf = { } - for key, ef in self.lexstateerrorf.items(): - c.lexstateerrorf[key] = getattr(object,ef.__name__) - c.lexmodule = object - return c - - # ------------------------------------------------------------ - # writetab() - Write lexer information to a table file - # ------------------------------------------------------------ - def writetab(self,tabfile,outputdir=""): - if isinstance(tabfile,types.ModuleType): - return - basetabfilename = tabfile.split(".")[-1] - filename = os.path.join(outputdir,basetabfilename)+".py" - tf = open(filename,"w") - tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__)) - tf.write("_tabversion = %s\n" % repr(__version__)) - tf.write("_lextokens = %s\n" % repr(self.lextokens)) - tf.write("_lexreflags = %s\n" % repr(self.lexreflags)) - tf.write("_lexliterals = %s\n" % repr(self.lexliterals)) - tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo)) - - tabre = { } - # Collect all functions in the initial state - initial = self.lexstatere["INITIAL"] - initialfuncs = [] - for part in initial: - for f in part[1]: - if f and f[0]: - initialfuncs.append(f) - - for key, lre in self.lexstatere.items(): - titem = [] - for i in range(len(lre)): - titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1],self.lexstaterenames[key][i]))) - tabre[key] = titem - - tf.write("_lexstatere = %s\n" % repr(tabre)) - tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore)) - - taberr = { } - for key, ef in self.lexstateerrorf.items(): - if ef: - taberr[key] = ef.__name__ - else: - taberr[key] = None - tf.write("_lexstateerrorf = %s\n" % repr(taberr)) - tf.close() - - # ------------------------------------------------------------ - # readtab() - Read lexer information from a tab file - # ------------------------------------------------------------ - def readtab(self,tabfile,fdict): - if isinstance(tabfile,types.ModuleType): - lextab = tabfile - else: - if sys.version_info[0] < 3: - exec("import %s as lextab" % tabfile) - else: - env = { } - exec("import %s as lextab" % tabfile, env,env) - lextab = env['lextab'] - - if getattr(lextab,"_tabversion","0.0") != __version__: - raise ImportError("Inconsistent PLY version") - - self.lextokens = lextab._lextokens - self.lexreflags = lextab._lexreflags - self.lexliterals = lextab._lexliterals - self.lexstateinfo = lextab._lexstateinfo - self.lexstateignore = lextab._lexstateignore - self.lexstatere = { } - self.lexstateretext = { } - for key,lre in lextab._lexstatere.items(): - titem = [] - txtitem = [] - for i in range(len(lre)): - titem.append((re.compile(lre[i][0],lextab._lexreflags | re.VERBOSE),_names_to_funcs(lre[i][1],fdict))) - txtitem.append(lre[i][0]) - self.lexstatere[key] = titem - self.lexstateretext[key] = txtitem - self.lexstateerrorf = { } - for key,ef in lextab._lexstateerrorf.items(): - self.lexstateerrorf[key] = fdict[ef] - self.begin('INITIAL') - - # ------------------------------------------------------------ - # input() - Push a new string into the lexer - # ------------------------------------------------------------ - def input(self,s): - # Pull off the first character to see if s looks like a string - c = s[:1] - if not isinstance(c,StringTypes): - raise ValueError("Expected a string") - self.lexdata = s - self.lexpos = 0 - self.lexlen = len(s) - - # ------------------------------------------------------------ - # begin() - Changes the lexing state - # ------------------------------------------------------------ - def begin(self,state): - if not state in self.lexstatere: - raise ValueError("Undefined state") - self.lexre = self.lexstatere[state] - self.lexretext = self.lexstateretext[state] - self.lexignore = self.lexstateignore.get(state,"") - self.lexerrorf = self.lexstateerrorf.get(state,None) - self.lexstate = state - - # ------------------------------------------------------------ - # push_state() - Changes the lexing state and saves old on stack - # ------------------------------------------------------------ - def push_state(self,state): - self.lexstatestack.append(self.lexstate) - self.begin(state) - - # ------------------------------------------------------------ - # pop_state() - Restores the previous state - # ------------------------------------------------------------ - def pop_state(self): - self.begin(self.lexstatestack.pop()) - - # ------------------------------------------------------------ - # current_state() - Returns the current lexing state - # ------------------------------------------------------------ - def current_state(self): - return self.lexstate - - # ------------------------------------------------------------ - # skip() - Skip ahead n characters - # ------------------------------------------------------------ - def skip(self,n): - self.lexpos += n - - # ------------------------------------------------------------ - # opttoken() - Return the next token from the Lexer - # - # Note: This function has been carefully implemented to be as fast - # as possible. Don't make changes unless you really know what - # you are doing - # ------------------------------------------------------------ - def token(self): - # Make local copies of frequently referenced attributes - lexpos = self.lexpos - lexlen = self.lexlen - lexignore = self.lexignore - lexdata = self.lexdata - - while lexpos < lexlen: - # This code provides some short-circuit code for whitespace, tabs, and other ignored characters - if lexdata[lexpos] in lexignore: - lexpos += 1 - continue - - # Look for a regular expression match - for lexre,lexindexfunc in self.lexre: - m = lexre.match(lexdata,lexpos) - if not m: continue - - # Create a token for return - tok = LexToken() - tok.value = m.group() - tok.lineno = self.lineno - tok.lexpos = lexpos - - i = m.lastindex - func,tok.type = lexindexfunc[i] - - if not func: - # If no token type was set, it's an ignored token - if tok.type: - self.lexpos = m.end() - return tok - else: - lexpos = m.end() - break - - lexpos = m.end() - - # If token is processed by a function, call it - - tok.lexer = self # Set additional attributes useful in token rules - self.lexmatch = m - self.lexpos = lexpos - - newtok = func(tok) - - # Every function must return a token, if nothing, we just move to next token - if not newtok: - lexpos = self.lexpos # This is here in case user has updated lexpos. - lexignore = self.lexignore # This is here in case there was a state change - break - - # Verify type of the token. If not in the token map, raise an error - if not self.lexoptimize: - if not newtok.type in self.lextokens: - raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( - func_code(func).co_filename, func_code(func).co_firstlineno, - func.__name__, newtok.type),lexdata[lexpos:]) - - return newtok - else: - # No match, see if in literals - if lexdata[lexpos] in self.lexliterals: - tok = LexToken() - tok.value = lexdata[lexpos] - tok.lineno = self.lineno - tok.type = tok.value - tok.lexpos = lexpos - self.lexpos = lexpos + 1 - return tok - - # No match. Call t_error() if defined. - if self.lexerrorf: - tok = LexToken() - tok.value = self.lexdata[lexpos:] - tok.lineno = self.lineno - tok.type = "error" - tok.lexer = self - tok.lexpos = lexpos - self.lexpos = lexpos - newtok = self.lexerrorf(tok) - if lexpos == self.lexpos: - # Error method didn't change text position at all. This is an error. - raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) - lexpos = self.lexpos - if not newtok: continue - return newtok - - self.lexpos = lexpos - raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:]) - - self.lexpos = lexpos + 1 - if self.lexdata is None: - raise RuntimeError("No input string given with input()") - return None - - # Iterator interface - def __iter__(self): - return self - - def next(self): - t = self.token() - if t is None: - raise StopIteration - return t - - __next__ = next - -# ----------------------------------------------------------------------------- -# ==== Lex Builder === -# -# The functions and classes below are used to collect lexing information -# and build a Lexer object from it. -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# get_caller_module_dict() -# -# This function returns a dictionary containing all of the symbols defined within -# a caller further down the call stack. This is used to get the environment -# associated with the yacc() call if none was provided. -# ----------------------------------------------------------------------------- - -def get_caller_module_dict(levels): - try: - raise RuntimeError - except RuntimeError: - e,b,t = sys.exc_info() - f = t.tb_frame - while levels > 0: - f = f.f_back - levels -= 1 - ldict = f.f_globals.copy() - if f.f_globals != f.f_locals: - ldict.update(f.f_locals) - - return ldict - -# ----------------------------------------------------------------------------- -# _funcs_to_names() -# -# Given a list of regular expression functions, this converts it to a list -# suitable for output to a table file -# ----------------------------------------------------------------------------- - -def _funcs_to_names(funclist,namelist): - result = [] - for f,name in zip(funclist,namelist): - if f and f[0]: - result.append((name, f[1])) - else: - result.append(f) - return result - -# ----------------------------------------------------------------------------- -# _names_to_funcs() -# -# Given a list of regular expression function names, this converts it back to -# functions. -# ----------------------------------------------------------------------------- - -def _names_to_funcs(namelist,fdict): - result = [] - for n in namelist: - if n and n[0]: - result.append((fdict[n[0]],n[1])) - else: - result.append(n) - return result - -# ----------------------------------------------------------------------------- -# _form_master_re() -# -# This function takes a list of all of the regex components and attempts to -# form the master regular expression. Given limitations in the Python re -# module, it may be necessary to break the master regex into separate expressions. -# ----------------------------------------------------------------------------- - -def _form_master_re(relist,reflags,ldict,toknames): - if not relist: return [] - regex = "|".join(relist) - try: - lexre = re.compile(regex,re.VERBOSE | reflags) - - # Build the index to function map for the matching engine - lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1) - lexindexnames = lexindexfunc[:] - - for f,i in lexre.groupindex.items(): - handle = ldict.get(f,None) - if type(handle) in (types.FunctionType, types.MethodType): - lexindexfunc[i] = (handle,toknames[f]) - lexindexnames[i] = f - elif handle is not None: - lexindexnames[i] = f - if f.find("ignore_") > 0: - lexindexfunc[i] = (None,None) - else: - lexindexfunc[i] = (None, toknames[f]) - - return [(lexre,lexindexfunc)],[regex],[lexindexnames] - except Exception: - m = int(len(relist)/2) - if m == 0: m = 1 - llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames) - rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames) - return llist+rlist, lre+rre, lnames+rnames - -# ----------------------------------------------------------------------------- -# def _statetoken(s,names) -# -# Given a declaration name s of the form "t_" and a dictionary whose keys are -# state names, this function returns a tuple (states,tokenname) where states -# is a tuple of state names and tokenname is the name of the token. For example, -# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') -# ----------------------------------------------------------------------------- - -def _statetoken(s,names): - nonstate = 1 - parts = s.split("_") - for i in range(1,len(parts)): - if not parts[i] in names and parts[i] != 'ANY': break - if i > 1: - states = tuple(parts[1:i]) - else: - states = ('INITIAL',) - - if 'ANY' in states: - states = tuple(names) - - tokenname = "_".join(parts[i:]) - return (states,tokenname) - - -# ----------------------------------------------------------------------------- -# LexerReflect() -# -# This class represents information needed to build a lexer as extracted from a -# user's input file. -# ----------------------------------------------------------------------------- -class LexerReflect(object): - def __init__(self,ldict,log=None,reflags=0): - self.ldict = ldict - self.error_func = None - self.tokens = [] - self.reflags = reflags - self.stateinfo = { 'INITIAL' : 'inclusive'} - self.files = {} - self.error = 0 - - if log is None: - self.log = PlyLogger(sys.stderr) - else: - self.log = log - - # Get all of the basic information - def get_all(self): - self.get_tokens() - self.get_literals() - self.get_states() - self.get_rules() - - # Validate all of the information - def validate_all(self): - self.validate_tokens() - self.validate_literals() - self.validate_rules() - return self.error - - # Get the tokens map - def get_tokens(self): - tokens = self.ldict.get("tokens",None) - if not tokens: - self.log.error("No token list is defined") - self.error = 1 - return - - if not isinstance(tokens,(list, tuple)): - self.log.error("tokens must be a list or tuple") - self.error = 1 - return - - if not tokens: - self.log.error("tokens is empty") - self.error = 1 - return - - self.tokens = tokens - - # Validate the tokens - def validate_tokens(self): - terminals = {} - for n in self.tokens: - if not _is_identifier.match(n): - self.log.error("Bad token name '%s'",n) - self.error = 1 - if n in terminals: - self.log.warning("Token '%s' multiply defined", n) - terminals[n] = 1 - - # Get the literals specifier - def get_literals(self): - self.literals = self.ldict.get("literals","") - - # Validate literals - def validate_literals(self): - try: - for c in self.literals: - if not isinstance(c,StringTypes) or len(c) > 1: - self.log.error("Invalid literal %s. Must be a single character", repr(c)) - self.error = 1 - continue - - except TypeError: - self.log.error("Invalid literals specification. literals must be a sequence of characters") - self.error = 1 - - def get_states(self): - self.states = self.ldict.get("states",None) - # Build statemap - if self.states: - if not isinstance(self.states,(tuple,list)): - self.log.error("states must be defined as a tuple or list") - self.error = 1 - else: - for s in self.states: - if not isinstance(s,tuple) or len(s) != 2: - self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')",repr(s)) - self.error = 1 - continue - name, statetype = s - if not isinstance(name,StringTypes): - self.log.error("State name %s must be a string", repr(name)) - self.error = 1 - continue - if not (statetype == 'inclusive' or statetype == 'exclusive'): - self.log.error("State type for state %s must be 'inclusive' or 'exclusive'",name) - self.error = 1 - continue - if name in self.stateinfo: - self.log.error("State '%s' already defined",name) - self.error = 1 - continue - self.stateinfo[name] = statetype - - # Get all of the symbols with a t_ prefix and sort them into various - # categories (functions, strings, error functions, and ignore characters) - - def get_rules(self): - tsymbols = [f for f in self.ldict if f[:2] == 't_' ] - - # Now build up a list of functions and a list of strings - - self.toknames = { } # Mapping of symbols to token names - self.funcsym = { } # Symbols defined as functions - self.strsym = { } # Symbols defined as strings - self.ignore = { } # Ignore strings by state - self.errorf = { } # Error functions by state - - for s in self.stateinfo: - self.funcsym[s] = [] - self.strsym[s] = [] - - if len(tsymbols) == 0: - self.log.error("No rules of the form t_rulename are defined") - self.error = 1 - return - - for f in tsymbols: - t = self.ldict[f] - states, tokname = _statetoken(f,self.stateinfo) - self.toknames[f] = tokname - - if hasattr(t,"__call__"): - if tokname == 'error': - for s in states: - self.errorf[s] = t - elif tokname == 'ignore': - line = func_code(t).co_firstlineno - file = func_code(t).co_filename - self.log.error("%s:%d: Rule '%s' must be defined as a string",file,line,t.__name__) - self.error = 1 - else: - for s in states: - self.funcsym[s].append((f,t)) - elif isinstance(t, StringTypes): - if tokname == 'ignore': - for s in states: - self.ignore[s] = t - if "\\" in t: - self.log.warning("%s contains a literal backslash '\\'",f) - - elif tokname == 'error': - self.log.error("Rule '%s' must be defined as a function", f) - self.error = 1 - else: - for s in states: - self.strsym[s].append((f,t)) - else: - self.log.error("%s not defined as a function or string", f) - self.error = 1 - - # Sort the functions by line number - for f in self.funcsym.values(): - if sys.version_info[0] < 3: - f.sort(lambda x,y: cmp(func_code(x[1]).co_firstlineno,func_code(y[1]).co_firstlineno)) - else: - # Python 3.0 - f.sort(key=lambda x: func_code(x[1]).co_firstlineno) - - # Sort the strings by regular expression length - for s in self.strsym.values(): - if sys.version_info[0] < 3: - s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1]))) - else: - # Python 3.0 - s.sort(key=lambda x: len(x[1]),reverse=True) - - # Validate all of the t_rules collected - def validate_rules(self): - for state in self.stateinfo: - # Validate all rules defined by functions - - - - for fname, f in self.funcsym[state]: - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - self.files[file] = 1 - - tokname = self.toknames[fname] - if isinstance(f, types.MethodType): - reqargs = 2 - else: - reqargs = 1 - nargs = func_code(f).co_argcount - if nargs > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) - self.error = 1 - continue - - if nargs < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) - self.error = 1 - continue - - if not f.__doc__: - self.log.error("%s:%d: No regular expression defined for rule '%s'",file,line,f.__name__) - self.error = 1 - continue - - try: - c = re.compile("(?P<%s>%s)" % (fname,f.__doc__), re.VERBOSE | self.reflags) - if c.match(""): - self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file,line,f.__name__) - self.error = 1 - except re.error: - _etype, e, _etrace = sys.exc_info() - self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file,line,f.__name__,e) - if '#' in f.__doc__: - self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'",file,line, f.__name__) - self.error = 1 - - # Validate all rules defined by strings - for name,r in self.strsym[state]: - tokname = self.toknames[name] - if tokname == 'error': - self.log.error("Rule '%s' must be defined as a function", name) - self.error = 1 - continue - - if not tokname in self.tokens and tokname.find("ignore_") < 0: - self.log.error("Rule '%s' defined for an unspecified token %s",name,tokname) - self.error = 1 - continue - - try: - c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | self.reflags) - if (c.match("")): - self.log.error("Regular expression for rule '%s' matches empty string",name) - self.error = 1 - except re.error: - _etype, e, _etrace = sys.exc_info() - self.log.error("Invalid regular expression for rule '%s'. %s",name,e) - if '#' in r: - self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'",name) - self.error = 1 - - if not self.funcsym[state] and not self.strsym[state]: - self.log.error("No rules defined for state '%s'",state) - self.error = 1 - - # Validate the error function - efunc = self.errorf.get(state,None) - if efunc: - f = efunc - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - self.files[file] = 1 - - if isinstance(f, types.MethodType): - reqargs = 2 - else: - reqargs = 1 - nargs = func_code(f).co_argcount - if nargs > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) - self.error = 1 - - if nargs < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) - self.error = 1 - - for f in self.files: - self.validate_file(f) - - - # ----------------------------------------------------------------------------- - # validate_file() - # - # This checks to see if there are duplicated t_rulename() functions or strings - # in the parser input file. This is done using a simple regular expression - # match on each line in the given file. - # ----------------------------------------------------------------------------- - - def validate_file(self,filename): - import os.path - base,ext = os.path.splitext(filename) - if ext != '.py': return # No idea what the file is. Return OK - - try: - f = open(filename) - lines = f.readlines() - f.close() - except IOError: - return # Couldn't find the file. Don't worry about it - - fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') - sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') - - counthash = { } - linen = 1 - for l in lines: - m = fre.match(l) - if not m: - m = sre.match(l) - if m: - name = m.group(1) - prev = counthash.get(name) - if not prev: - counthash[name] = linen - else: - self.log.error("%s:%d: Rule %s redefined. Previously defined on line %d",filename,linen,name,prev) - self.error = 1 - linen += 1 - -# ----------------------------------------------------------------------------- -# lex(module) -# -# Build all of the regular expression rules from definitions in the supplied module -# ----------------------------------------------------------------------------- -def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0,outputdir="", debuglog=None, errorlog=None): - global lexer - ldict = None - stateinfo = { 'INITIAL' : 'inclusive'} - lexobj = Lexer() - lexobj.lexoptimize = optimize - global token,input - - if errorlog is None: - errorlog = PlyLogger(sys.stderr) - - if debug: - if debuglog is None: - debuglog = PlyLogger(sys.stderr) - - # Get the module dictionary used for the lexer - if object: module = object - - if module: - _items = [(k,getattr(module,k)) for k in dir(module)] - ldict = dict(_items) - else: - ldict = get_caller_module_dict(2) - - # Collect parser information from the dictionary - linfo = LexerReflect(ldict,log=errorlog,reflags=reflags) - linfo.get_all() - if not optimize: - if linfo.validate_all(): - raise SyntaxError("Can't build lexer") - - if optimize and lextab: - try: - lexobj.readtab(lextab,ldict) - token = lexobj.token - input = lexobj.input - lexer = lexobj - return lexobj - - except ImportError: - pass - - # Dump some basic debugging information - if debug: - debuglog.info("lex: tokens = %r", linfo.tokens) - debuglog.info("lex: literals = %r", linfo.literals) - debuglog.info("lex: states = %r", linfo.stateinfo) - - # Build a dictionary of valid token names - lexobj.lextokens = { } - for n in linfo.tokens: - lexobj.lextokens[n] = 1 - - # Get literals specification - if isinstance(linfo.literals,(list,tuple)): - lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) - else: - lexobj.lexliterals = linfo.literals - - # Get the stateinfo dictionary - stateinfo = linfo.stateinfo - - regexs = { } - # Build the master regular expressions - for state in stateinfo: - regex_list = [] - - # Add rules defined by functions first - for fname, f in linfo.funcsym[state]: - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - regex_list.append("(?P<%s>%s)" % (fname,f.__doc__)) - if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f.__doc__, state) - - # Now add all of the simple rules - for name,r in linfo.strsym[state]: - regex_list.append("(?P<%s>%s)" % (name,r)) - if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state) - - regexs[state] = regex_list - - # Build the master regular expressions - - if debug: - debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====") - - for state in regexs: - lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,linfo.toknames) - lexobj.lexstatere[state] = lexre - lexobj.lexstateretext[state] = re_text - lexobj.lexstaterenames[state] = re_names - if debug: - for i in range(len(re_text)): - debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_text[i]) - - # For inclusive states, we need to add the regular expressions from the INITIAL state - for state,stype in stateinfo.items(): - if state != "INITIAL" and stype == 'inclusive': - lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) - lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) - lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) - - lexobj.lexstateinfo = stateinfo - lexobj.lexre = lexobj.lexstatere["INITIAL"] - lexobj.lexretext = lexobj.lexstateretext["INITIAL"] - lexobj.lexreflags = reflags - - # Set up ignore variables - lexobj.lexstateignore = linfo.ignore - lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","") - - # Set up error functions - lexobj.lexstateerrorf = linfo.errorf - lexobj.lexerrorf = linfo.errorf.get("INITIAL",None) - if not lexobj.lexerrorf: - errorlog.warning("No t_error rule is defined") - - # Check state information for ignore and error rules - for s,stype in stateinfo.items(): - if stype == 'exclusive': - if not s in linfo.errorf: - errorlog.warning("No error rule is defined for exclusive state '%s'", s) - if not s in linfo.ignore and lexobj.lexignore: - errorlog.warning("No ignore rule is defined for exclusive state '%s'", s) - elif stype == 'inclusive': - if not s in linfo.errorf: - linfo.errorf[s] = linfo.errorf.get("INITIAL",None) - if not s in linfo.ignore: - linfo.ignore[s] = linfo.ignore.get("INITIAL","") - - # Create global versions of the token() and input() functions - token = lexobj.token - input = lexobj.input - lexer = lexobj - - # If in optimize mode, we write the lextab - if lextab and optimize: - lexobj.writetab(lextab,outputdir) - - return lexobj - -# ----------------------------------------------------------------------------- -# runmain() -# -# This runs the lexer as a main program -# ----------------------------------------------------------------------------- - -def runmain(lexer=None,data=None): - if not data: - try: - filename = sys.argv[1] - f = open(filename) - data = f.read() - f.close() - except IndexError: - sys.stdout.write("Reading from standard input (type EOF to end):\n") - data = sys.stdin.read() - - if lexer: - _input = lexer.input - else: - _input = input - _input(data) - if lexer: - _token = lexer.token - else: - _token = token - - while 1: - tok = _token() - if not tok: break - sys.stdout.write("(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno,tok.lexpos)) - -# ----------------------------------------------------------------------------- -# @TOKEN(regex) -# -# This decorator function can be used to set the regex expression on a function -# when its docstring might need to be set in an alternative way -# ----------------------------------------------------------------------------- - -def TOKEN(r): - def set_doc(f): - if hasattr(r,"__call__"): - f.__doc__ = r.__doc__ - else: - f.__doc__ = r - return f - return set_doc - -# Alternative spelling of the TOKEN decorator -Token = TOKEN - diff --git a/ply/ply/yacc.py b/ply/ply/yacc.py deleted file mode 100644 index f70439e..0000000 --- a/ply/ply/yacc.py +++ /dev/null @@ -1,3276 +0,0 @@ -# ----------------------------------------------------------------------------- -# ply: yacc.py -# -# Copyright (C) 2001-2011, -# David M. Beazley (Dabeaz LLC) -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * Neither the name of the David Beazley or Dabeaz LLC may be used to -# endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ----------------------------------------------------------------------------- -# -# This implements an LR parser that is constructed from grammar rules defined -# as Python functions. The grammer is specified by supplying the BNF inside -# Python documentation strings. The inspiration for this technique was borrowed -# from John Aycock's Spark parsing system. PLY might be viewed as cross between -# Spark and the GNU bison utility. -# -# The current implementation is only somewhat object-oriented. The -# LR parser itself is defined in terms of an object (which allows multiple -# parsers to co-exist). However, most of the variables used during table -# construction are defined in terms of global variables. Users shouldn't -# notice unless they are trying to define multiple parsers at the same -# time using threads (in which case they should have their head examined). -# -# This implementation supports both SLR and LALR(1) parsing. LALR(1) -# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), -# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, -# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced -# by the more efficient DeRemer and Pennello algorithm. -# -# :::::::: WARNING ::::::: -# -# Construction of LR parsing tables is fairly complicated and expensive. -# To make this module run fast, a *LOT* of work has been put into -# optimization---often at the expensive of readability and what might -# consider to be good Python "coding style." Modify the code at your -# own risk! -# ---------------------------------------------------------------------------- - -__version__ = "3.4" -__tabversion__ = "3.2" # Table version - -#----------------------------------------------------------------------------- -# === User configurable parameters === -# -# Change these to modify the default behavior of yacc (if you wish) -#----------------------------------------------------------------------------- - -yaccdebug = 1 # Debugging mode. If set, yacc generates a - # a 'parser.out' file in the current directory - -debug_file = 'parser.out' # Default name of the debugging file -tab_module = 'parsetab' # Default name of the table module -default_lr = 'LALR' # Default LR table generation method - -error_count = 3 # Number of symbols that must be shifted to leave recovery mode - -yaccdevel = 0 # Set to True if developing yacc. This turns off optimized - # implementations of certain functions. - -resultlimit = 40 # Size limit of results when running in debug mode. - -pickle_protocol = 0 # Protocol to use when writing pickle files - -import re, types, sys, os.path - -# Compatibility function for python 2.6/3.0 -if sys.version_info[0] < 3: - def func_code(f): - return f.func_code -else: - def func_code(f): - return f.__code__ - -# Compatibility -try: - MAXINT = sys.maxint -except AttributeError: - MAXINT = sys.maxsize - -# Python 2.x/3.0 compatibility. -def load_ply_lex(): - if sys.version_info[0] < 3: - import lex - else: - import ply.lex as lex - return lex - -# This object is a stand-in for a logging object created by the -# logging module. PLY will use this by default to create things -# such as the parser.out file. If a user wants more detailed -# information, they can create their own logging object and pass -# it into PLY. - -class PlyLogger(object): - def __init__(self,f): - self.f = f - def debug(self,msg,*args,**kwargs): - self.f.write((msg % args) + "\n") - info = debug - - def warning(self,msg,*args,**kwargs): - self.f.write("WARNING: "+ (msg % args) + "\n") - - def error(self,msg,*args,**kwargs): - self.f.write("ERROR: " + (msg % args) + "\n") - - critical = debug - -# Null logger is used when no output is generated. Does nothing. -class NullLogger(object): - def __getattribute__(self,name): - return self - def __call__(self,*args,**kwargs): - return self - -# Exception raised for yacc-related errors -class YaccError(Exception): pass - -# Format the result message that the parser produces when running in debug mode. -def format_result(r): - repr_str = repr(r) - if '\n' in repr_str: repr_str = repr(repr_str) - if len(repr_str) > resultlimit: - repr_str = repr_str[:resultlimit]+" ..." - result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str) - return result - - -# Format stack entries when the parser is running in debug mode -def format_stack_entry(r): - repr_str = repr(r) - if '\n' in repr_str: repr_str = repr(repr_str) - if len(repr_str) < 16: - return repr_str - else: - return "<%s @ 0x%x>" % (type(r).__name__,id(r)) - -#----------------------------------------------------------------------------- -# === LR Parsing Engine === -# -# The following classes are used for the LR parser itself. These are not -# used during table construction and are independent of the actual LR -# table generation algorithm -#----------------------------------------------------------------------------- - -# This class is used to hold non-terminal grammar symbols during parsing. -# It normally has the following attributes set: -# .type = Grammar symbol type -# .value = Symbol value -# .lineno = Starting line number -# .endlineno = Ending line number (optional, set automatically) -# .lexpos = Starting lex position -# .endlexpos = Ending lex position (optional, set automatically) - -class YaccSymbol: - def __str__(self): return self.type - def __repr__(self): return str(self) - -# This class is a wrapper around the objects actually passed to each -# grammar rule. Index lookup and assignment actually assign the -# .value attribute of the underlying YaccSymbol object. -# The lineno() method returns the line number of a given -# item (or 0 if not defined). The linespan() method returns -# a tuple of (startline,endline) representing the range of lines -# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) -# representing the range of positional information for a symbol. - -class YaccProduction: - def __init__(self,s,stack=None): - self.slice = s - self.stack = stack - self.lexer = None - self.parser= None - def __getitem__(self,n): - if n >= 0: return self.slice[n].value - else: return self.stack[n].value - - def __setitem__(self,n,v): - self.slice[n].value = v - - def __getslice__(self,i,j): - return [s.value for s in self.slice[i:j]] - - def __len__(self): - return len(self.slice) - - def lineno(self,n): - return getattr(self.slice[n],"lineno",0) - - def set_lineno(self,n,lineno): - self.slice[n].lineno = lineno - - def linespan(self,n): - startline = getattr(self.slice[n],"lineno",0) - endline = getattr(self.slice[n],"endlineno",startline) - return startline,endline - - def lexpos(self,n): - return getattr(self.slice[n],"lexpos",0) - - def lexspan(self,n): - startpos = getattr(self.slice[n],"lexpos",0) - endpos = getattr(self.slice[n],"endlexpos",startpos) - return startpos,endpos - - def error(self): - raise SyntaxError - - -# ----------------------------------------------------------------------------- -# == LRParser == -# -# The LR Parsing engine. -# ----------------------------------------------------------------------------- - -class LRParser: - def __init__(self,lrtab,errorf): - self.productions = lrtab.lr_productions - self.action = lrtab.lr_action - self.goto = lrtab.lr_goto - self.errorfunc = errorf - - def errok(self): - self.errorok = 1 - - def restart(self): - del self.statestack[:] - del self.symstack[:] - sym = YaccSymbol() - sym.type = '$end' - self.symstack.append(sym) - self.statestack.append(0) - - def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): - if debug or yaccdevel: - if isinstance(debug,int): - debug = PlyLogger(sys.stderr) - return self.parsedebug(input,lexer,debug,tracking,tokenfunc) - elif tracking: - return self.parseopt(input,lexer,debug,tracking,tokenfunc) - else: - return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc) - - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # parsedebug(). - # - # This is the debugging enabled version of parse(). All changes made to the - # parsing engine should be made here. For the non-debugging version, - # copy this code to a method parseopt() and delete all of the sections - # enclosed in: - # - # #--! DEBUG - # statements - # #--! DEBUG - # - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery - - # --! DEBUG - debug.info("PLY: PARSE DEBUG START") - # --! DEBUG - - # If no lexer was given, we will try to use the lex module - if not lexer: - lex = load_ply_lex() - lexer = lex.lexer - - # Set up the lexer and parser objects on pslice - pslice.lexer = lexer - pslice.parser = self - - # If input was supplied, pass to lexer - if input is not None: - lexer.input(input) - - if tokenfunc is None: - # Tokenize function - get_token = lexer.token - else: - get_token = tokenfunc - - # Set up the state and symbol stacks - - statestack = [ ] # Stack of parsing states - self.statestack = statestack - symstack = [ ] # Stack of grammar symbols - self.symstack = symstack - - pslice.stack = symstack # Put in the production - errtoken = None # Err token - - # The start state is assumed to be (0,$end) - - statestack.append(0) - sym = YaccSymbol() - sym.type = "$end" - symstack.append(sym) - state = 0 - while 1: - # Get the next symbol on the input. If a lookahead symbol - # is already set, we just use that. Otherwise, we'll pull - # the next token off of the lookaheadstack or from the lexer - - # --! DEBUG - debug.debug('') - debug.debug('State : %s', state) - # --! DEBUG - - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = "$end" - - # --! DEBUG - debug.debug('Stack : %s', - ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - # --! DEBUG - - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) - - if t is not None: - if t > 0: - # shift a symbol on the stack - statestack.append(t) - state = t - - # --! DEBUG - debug.debug("Action : Shift and goto state %s", t) - # --! DEBUG - - symstack.append(lookahead) - lookahead = None - - # Decrease error count on successful shift - if errorcount: errorcount -=1 - continue - - if t < 0: - # reduce a symbol on the stack, emit a production - p = prod[-t] - pname = p.name - plen = p.len - - # Get production function - sym = YaccSymbol() - sym.type = pname # Production name - sym.value = None - - # --! DEBUG - if plen: - debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t) - else: - debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t) - - # --! DEBUG - - if plen: - targ = symstack[-plen-1:] - targ[0] = sym - - # --! TRACKING - if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1,"endlineno",t1.lineno) - sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) - - # --! TRACKING - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # below as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - del symstack[-plen:] - del statestack[-plen:] - p.callable(pslice) - # --! DEBUG - debug.info("Result : %s", format_result(pslice[0])) - # --! DEBUG - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = 0 - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - else: - - # --! TRACKING - if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - # --! TRACKING - - targ = [ sym ] - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # above as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - p.callable(pslice) - # --! DEBUG - debug.info("Result : %s", format_result(pslice[0])) - # --! DEBUG - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = 0 - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - if t == 0: - n = symstack[-1] - result = getattr(n,"value",None) - # --! DEBUG - debug.info("Done : Returning %s", format_result(result)) - debug.info("PLY: PARSE DEBUG END") - # --! DEBUG - return result - - if t == None: - - # --! DEBUG - debug.error('Error : %s', - ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - # --! DEBUG - - # We have some kind of parsing error here. To handle - # this, we are going to push the current token onto - # the tokenstack and replace it with an 'error' token. - # If there are any synchronization rules, they may - # catch it. - # - # In addition to pushing the error token, we call call - # the user defined p_error() function if this is the - # first syntax error. This function is only called if - # errorcount == 0. - if errorcount == 0 or self.errorok: - errorcount = error_count - self.errorok = 0 - errtoken = lookahead - if errtoken.type == "$end": - errtoken = None # End of file! - if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): - errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - - if self.errorok: - # User must have done some kind of panic - # mode recovery on their own. The - # returned token is the next lookahead - lookahead = tok - errtoken = None - continue - else: - if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 - if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) - else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) - else: - sys.stderr.write("yacc: Parse error in input. EOF\n") - return - - else: - errorcount = error_count - - # case 1: the statestack only has 1 entry on it. If we're in this state, the - # entire parse has been rolled back and we're completely hosed. The token is - # discarded and we just keep going. - - if len(statestack) <= 1 and lookahead.type != "$end": - lookahead = None - errtoken = None - state = 0 - # Nuke the pushback stack - del lookaheadstack[:] - continue - - # case 2: the statestack has a couple of entries on it, but we're - # at the end of the file. nuke the top entry and generate an error token - - # Start nuking entries on the stack - if lookahead.type == "$end": - # Whoa. We're really hosed here. Bail out - return - - if lookahead.type != 'error': - sym = symstack[-1] - if sym.type == 'error': - # Hmmm. Error is on top of stack, we'll just nuke input - # symbol and continue - lookahead = None - continue - t = YaccSymbol() - t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno - t.value = lookahead - lookaheadstack.append(lookahead) - lookahead = t - else: - symstack.pop() - statestack.pop() - state = statestack[-1] # Potential bug fix - - continue - - # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # parseopt(). - # - # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY. - # Edit the debug version above, then copy any modifications to the method - # below while removing #--! DEBUG sections. - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - - def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery - - # If no lexer was given, we will try to use the lex module - if not lexer: - lex = load_ply_lex() - lexer = lex.lexer - - # Set up the lexer and parser objects on pslice - pslice.lexer = lexer - pslice.parser = self - - # If input was supplied, pass to lexer - if input is not None: - lexer.input(input) - - if tokenfunc is None: - # Tokenize function - get_token = lexer.token - else: - get_token = tokenfunc - - # Set up the state and symbol stacks - - statestack = [ ] # Stack of parsing states - self.statestack = statestack - symstack = [ ] # Stack of grammar symbols - self.symstack = symstack - - pslice.stack = symstack # Put in the production - errtoken = None # Err token - - # The start state is assumed to be (0,$end) - - statestack.append(0) - sym = YaccSymbol() - sym.type = '$end' - symstack.append(sym) - state = 0 - while 1: - # Get the next symbol on the input. If a lookahead symbol - # is already set, we just use that. Otherwise, we'll pull - # the next token off of the lookaheadstack or from the lexer - - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' - - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) - - if t is not None: - if t > 0: - # shift a symbol on the stack - statestack.append(t) - state = t - - symstack.append(lookahead) - lookahead = None - - # Decrease error count on successful shift - if errorcount: errorcount -=1 - continue - - if t < 0: - # reduce a symbol on the stack, emit a production - p = prod[-t] - pname = p.name - plen = p.len - - # Get production function - sym = YaccSymbol() - sym.type = pname # Production name - sym.value = None - - if plen: - targ = symstack[-plen-1:] - targ[0] = sym - - # --! TRACKING - if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1,"endlineno",t1.lineno) - sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) - - # --! TRACKING - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # below as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - del symstack[-plen:] - del statestack[-plen:] - p.callable(pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = 0 - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - else: - - # --! TRACKING - if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - # --! TRACKING - - targ = [ sym ] - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # above as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - p.callable(pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = 0 - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - if t == 0: - n = symstack[-1] - return getattr(n,"value",None) - - if t == None: - - # We have some kind of parsing error here. To handle - # this, we are going to push the current token onto - # the tokenstack and replace it with an 'error' token. - # If there are any synchronization rules, they may - # catch it. - # - # In addition to pushing the error token, we call call - # the user defined p_error() function if this is the - # first syntax error. This function is only called if - # errorcount == 0. - if errorcount == 0 or self.errorok: - errorcount = error_count - self.errorok = 0 - errtoken = lookahead - if errtoken.type == '$end': - errtoken = None # End of file! - if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): - errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - - if self.errorok: - # User must have done some kind of panic - # mode recovery on their own. The - # returned token is the next lookahead - lookahead = tok - errtoken = None - continue - else: - if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 - if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) - else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) - else: - sys.stderr.write("yacc: Parse error in input. EOF\n") - return - - else: - errorcount = error_count - - # case 1: the statestack only has 1 entry on it. If we're in this state, the - # entire parse has been rolled back and we're completely hosed. The token is - # discarded and we just keep going. - - if len(statestack) <= 1 and lookahead.type != '$end': - lookahead = None - errtoken = None - state = 0 - # Nuke the pushback stack - del lookaheadstack[:] - continue - - # case 2: the statestack has a couple of entries on it, but we're - # at the end of the file. nuke the top entry and generate an error token - - # Start nuking entries on the stack - if lookahead.type == '$end': - # Whoa. We're really hosed here. Bail out - return - - if lookahead.type != 'error': - sym = symstack[-1] - if sym.type == 'error': - # Hmmm. Error is on top of stack, we'll just nuke input - # symbol and continue - lookahead = None - continue - t = YaccSymbol() - t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno - t.value = lookahead - lookaheadstack.append(lookahead) - lookahead = t - else: - symstack.pop() - statestack.pop() - state = statestack[-1] # Potential bug fix - - continue - - # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # parseopt_notrack(). - # - # Optimized version of parseopt() with line number tracking removed. - # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove - # code in the #--! TRACKING sections - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery - - # If no lexer was given, we will try to use the lex module - if not lexer: - lex = load_ply_lex() - lexer = lex.lexer - - # Set up the lexer and parser objects on pslice - pslice.lexer = lexer - pslice.parser = self - - # If input was supplied, pass to lexer - if input is not None: - lexer.input(input) - - if tokenfunc is None: - # Tokenize function - get_token = lexer.token - else: - get_token = tokenfunc - - # Set up the state and symbol stacks - - statestack = [ ] # Stack of parsing states - self.statestack = statestack - symstack = [ ] # Stack of grammar symbols - self.symstack = symstack - - pslice.stack = symstack # Put in the production - errtoken = None # Err token - - # The start state is assumed to be (0,$end) - - statestack.append(0) - sym = YaccSymbol() - sym.type = '$end' - symstack.append(sym) - state = 0 - while 1: - # Get the next symbol on the input. If a lookahead symbol - # is already set, we just use that. Otherwise, we'll pull - # the next token off of the lookaheadstack or from the lexer - - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' - - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) - - if t is not None: - if t > 0: - # shift a symbol on the stack - statestack.append(t) - state = t - - symstack.append(lookahead) - lookahead = None - - # Decrease error count on successful shift - if errorcount: errorcount -=1 - continue - - if t < 0: - # reduce a symbol on the stack, emit a production - p = prod[-t] - pname = p.name - plen = p.len - - # Get production function - sym = YaccSymbol() - sym.type = pname # Production name - sym.value = None - - if plen: - targ = symstack[-plen-1:] - targ[0] = sym - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # below as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - del symstack[-plen:] - del statestack[-plen:] - p.callable(pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = 0 - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - else: - - targ = [ sym ] - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # above as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - p.callable(pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = 0 - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - if t == 0: - n = symstack[-1] - return getattr(n,"value",None) - - if t == None: - - # We have some kind of parsing error here. To handle - # this, we are going to push the current token onto - # the tokenstack and replace it with an 'error' token. - # If there are any synchronization rules, they may - # catch it. - # - # In addition to pushing the error token, we call call - # the user defined p_error() function if this is the - # first syntax error. This function is only called if - # errorcount == 0. - if errorcount == 0 or self.errorok: - errorcount = error_count - self.errorok = 0 - errtoken = lookahead - if errtoken.type == '$end': - errtoken = None # End of file! - if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): - errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - - if self.errorok: - # User must have done some kind of panic - # mode recovery on their own. The - # returned token is the next lookahead - lookahead = tok - errtoken = None - continue - else: - if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 - if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) - else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) - else: - sys.stderr.write("yacc: Parse error in input. EOF\n") - return - - else: - errorcount = error_count - - # case 1: the statestack only has 1 entry on it. If we're in this state, the - # entire parse has been rolled back and we're completely hosed. The token is - # discarded and we just keep going. - - if len(statestack) <= 1 and lookahead.type != '$end': - lookahead = None - errtoken = None - state = 0 - # Nuke the pushback stack - del lookaheadstack[:] - continue - - # case 2: the statestack has a couple of entries on it, but we're - # at the end of the file. nuke the top entry and generate an error token - - # Start nuking entries on the stack - if lookahead.type == '$end': - # Whoa. We're really hosed here. Bail out - return - - if lookahead.type != 'error': - sym = symstack[-1] - if sym.type == 'error': - # Hmmm. Error is on top of stack, we'll just nuke input - # symbol and continue - lookahead = None - continue - t = YaccSymbol() - t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno - t.value = lookahead - lookaheadstack.append(lookahead) - lookahead = t - else: - symstack.pop() - statestack.pop() - state = statestack[-1] # Potential bug fix - - continue - - # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") - -# ----------------------------------------------------------------------------- -# === Grammar Representation === -# -# The following functions, classes, and variables are used to represent and -# manipulate the rules that make up a grammar. -# ----------------------------------------------------------------------------- - -import re - -# regex matching identifiers -_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') - -# ----------------------------------------------------------------------------- -# class Production: -# -# This class stores the raw information about a single production or grammar rule. -# A grammar rule refers to a specification such as this: -# -# expr : expr PLUS term -# -# Here are the basic attributes defined on all productions -# -# name - Name of the production. For example 'expr' -# prod - A list of symbols on the right side ['expr','PLUS','term'] -# prec - Production precedence level -# number - Production number. -# func - Function that executes on reduce -# file - File where production function is defined -# lineno - Line number where production function is defined -# -# The following attributes are defined or optional. -# -# len - Length of the production (number of symbols on right hand side) -# usyms - Set of unique symbols found in the production -# ----------------------------------------------------------------------------- - -class Production(object): - reduced = 0 - def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0): - self.name = name - self.prod = tuple(prod) - self.number = number - self.func = func - self.callable = None - self.file = file - self.line = line - self.prec = precedence - - # Internal settings used during table construction - - self.len = len(self.prod) # Length of the production - - # Create a list of unique production symbols used in the production - self.usyms = [ ] - for s in self.prod: - if s not in self.usyms: - self.usyms.append(s) - - # List of all LR items for the production - self.lr_items = [] - self.lr_next = None - - # Create a string representation - if self.prod: - self.str = "%s -> %s" % (self.name," ".join(self.prod)) - else: - self.str = "%s ->" % self.name - - def __str__(self): - return self.str - - def __repr__(self): - return "Production("+str(self)+")" - - def __len__(self): - return len(self.prod) - - def __nonzero__(self): - return 1 - - def __getitem__(self,index): - return self.prod[index] - - # Return the nth lr_item from the production (or None if at the end) - def lr_item(self,n): - if n > len(self.prod): return None - p = LRItem(self,n) - - # Precompute the list of productions immediately following. Hack. Remove later - try: - p.lr_after = Prodnames[p.prod[n+1]] - except (IndexError,KeyError): - p.lr_after = [] - try: - p.lr_before = p.prod[n-1] - except IndexError: - p.lr_before = None - - return p - - # Bind the production function name to a callable - def bind(self,pdict): - if self.func: - self.callable = pdict[self.func] - -# This class serves as a minimal standin for Production objects when -# reading table data from files. It only contains information -# actually used by the LR parsing engine, plus some additional -# debugging information. -class MiniProduction(object): - def __init__(self,str,name,len,func,file,line): - self.name = name - self.len = len - self.func = func - self.callable = None - self.file = file - self.line = line - self.str = str - def __str__(self): - return self.str - def __repr__(self): - return "MiniProduction(%s)" % self.str - - # Bind the production function name to a callable - def bind(self,pdict): - if self.func: - self.callable = pdict[self.func] - - -# ----------------------------------------------------------------------------- -# class LRItem -# -# This class represents a specific stage of parsing a production rule. For -# example: -# -# expr : expr . PLUS term -# -# In the above, the "." represents the current location of the parse. Here -# basic attributes: -# -# name - Name of the production. For example 'expr' -# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] -# number - Production number. -# -# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' -# then lr_next refers to 'expr -> expr PLUS . term' -# lr_index - LR item index (location of the ".") in the prod list. -# lookaheads - LALR lookahead symbols for this item -# len - Length of the production (number of symbols on right hand side) -# lr_after - List of all productions that immediately follow -# lr_before - Grammar symbol immediately before -# ----------------------------------------------------------------------------- - -class LRItem(object): - def __init__(self,p,n): - self.name = p.name - self.prod = list(p.prod) - self.number = p.number - self.lr_index = n - self.lookaheads = { } - self.prod.insert(n,".") - self.prod = tuple(self.prod) - self.len = len(self.prod) - self.usyms = p.usyms - - def __str__(self): - if self.prod: - s = "%s -> %s" % (self.name," ".join(self.prod)) - else: - s = "%s -> " % self.name - return s - - def __repr__(self): - return "LRItem("+str(self)+")" - -# ----------------------------------------------------------------------------- -# rightmost_terminal() -# -# Return the rightmost terminal from a list of symbols. Used in add_production() -# ----------------------------------------------------------------------------- -def rightmost_terminal(symbols, terminals): - i = len(symbols) - 1 - while i >= 0: - if symbols[i] in terminals: - return symbols[i] - i -= 1 - return None - -# ----------------------------------------------------------------------------- -# === GRAMMAR CLASS === -# -# The following class represents the contents of the specified grammar along -# with various computed properties such as first sets, follow sets, LR items, etc. -# This data is used for critical parts of the table generation process later. -# ----------------------------------------------------------------------------- - -class GrammarError(YaccError): pass - -class Grammar(object): - def __init__(self,terminals): - self.Productions = [None] # A list of all of the productions. The first - # entry is always reserved for the purpose of - # building an augmented grammar - - self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all - # productions of that nonterminal. - - self.Prodmap = { } # A dictionary that is only used to detect duplicate - # productions. - - self.Terminals = { } # A dictionary mapping the names of terminal symbols to a - # list of the rules where they are used. - - for term in terminals: - self.Terminals[term] = [] - - self.Terminals['error'] = [] - - self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list - # of rule numbers where they are used. - - self.First = { } # A dictionary of precomputed FIRST(x) symbols - - self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols - - self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the - # form ('right',level) or ('nonassoc', level) or ('left',level) - - self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer. - # This is only used to provide error checking and to generate - # a warning about unused precedence rules. - - self.Start = None # Starting symbol for the grammar - - - def __len__(self): - return len(self.Productions) - - def __getitem__(self,index): - return self.Productions[index] - - # ----------------------------------------------------------------------------- - # set_precedence() - # - # Sets the precedence for a given terminal. assoc is the associativity such as - # 'left','right', or 'nonassoc'. level is a numeric level. - # - # ----------------------------------------------------------------------------- - - def set_precedence(self,term,assoc,level): - assert self.Productions == [None],"Must call set_precedence() before add_production()" - if term in self.Precedence: - raise GrammarError("Precedence already specified for terminal '%s'" % term) - if assoc not in ['left','right','nonassoc']: - raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") - self.Precedence[term] = (assoc,level) - - # ----------------------------------------------------------------------------- - # add_production() - # - # Given an action function, this function assembles a production rule and - # computes its precedence level. - # - # The production rule is supplied as a list of symbols. For example, - # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and - # symbols ['expr','PLUS','term']. - # - # Precedence is determined by the precedence of the right-most non-terminal - # or the precedence of a terminal specified by %prec. - # - # A variety of error checks are performed to make sure production symbols - # are valid and that %prec is used correctly. - # ----------------------------------------------------------------------------- - - def add_production(self,prodname,syms,func=None,file='',line=0): - - if prodname in self.Terminals: - raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname)) - if prodname == 'error': - raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname)) - if not _is_identifier.match(prodname): - raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname)) - - # Look for literal tokens - for n,s in enumerate(syms): - if s[0] in "'\"": - try: - c = eval(s) - if (len(c) > 1): - raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname)) - if not c in self.Terminals: - self.Terminals[c] = [] - syms[n] = c - continue - except SyntaxError: - pass - if not _is_identifier.match(s) and s != '%prec': - raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname)) - - # Determine the precedence level - if '%prec' in syms: - if syms[-1] == '%prec': - raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line)) - if syms[-2] != '%prec': - raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line)) - precname = syms[-1] - prodprec = self.Precedence.get(precname,None) - if not prodprec: - raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname)) - else: - self.UsedPrecedence[precname] = 1 - del syms[-2:] # Drop %prec from the rule - else: - # If no %prec, precedence is determined by the rightmost terminal symbol - precname = rightmost_terminal(syms,self.Terminals) - prodprec = self.Precedence.get(precname,('right',0)) - - # See if the rule is already in the rulemap - map = "%s -> %s" % (prodname,syms) - if map in self.Prodmap: - m = self.Prodmap[map] - raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) + - "Previous definition at %s:%d" % (m.file, m.line)) - - # From this point on, everything is valid. Create a new Production instance - pnumber = len(self.Productions) - if not prodname in self.Nonterminals: - self.Nonterminals[prodname] = [ ] - - # Add the production number to Terminals and Nonterminals - for t in syms: - if t in self.Terminals: - self.Terminals[t].append(pnumber) - else: - if not t in self.Nonterminals: - self.Nonterminals[t] = [ ] - self.Nonterminals[t].append(pnumber) - - # Create a production and add it to the list of productions - p = Production(pnumber,prodname,syms,prodprec,func,file,line) - self.Productions.append(p) - self.Prodmap[map] = p - - # Add to the global productions list - try: - self.Prodnames[prodname].append(p) - except KeyError: - self.Prodnames[prodname] = [ p ] - return 0 - - # ----------------------------------------------------------------------------- - # set_start() - # - # Sets the starting symbol and creates the augmented grammar. Production - # rule 0 is S' -> start where start is the start symbol. - # ----------------------------------------------------------------------------- - - def set_start(self,start=None): - if not start: - start = self.Productions[1].name - if start not in self.Nonterminals: - raise GrammarError("start symbol %s undefined" % start) - self.Productions[0] = Production(0,"S'",[start]) - self.Nonterminals[start].append(0) - self.Start = start - - # ----------------------------------------------------------------------------- - # find_unreachable() - # - # Find all of the nonterminal symbols that can't be reached from the starting - # symbol. Returns a list of nonterminals that can't be reached. - # ----------------------------------------------------------------------------- - - def find_unreachable(self): - - # Mark all symbols that are reachable from a symbol s - def mark_reachable_from(s): - if reachable[s]: - # We've already reached symbol s. - return - reachable[s] = 1 - for p in self.Prodnames.get(s,[]): - for r in p.prod: - mark_reachable_from(r) - - reachable = { } - for s in list(self.Terminals) + list(self.Nonterminals): - reachable[s] = 0 - - mark_reachable_from( self.Productions[0].prod[0] ) - - return [s for s in list(self.Nonterminals) - if not reachable[s]] - - # ----------------------------------------------------------------------------- - # infinite_cycles() - # - # This function looks at the various parsing rules and tries to detect - # infinite recursion cycles (grammar rules where there is no possible way - # to derive a string of only terminals). - # ----------------------------------------------------------------------------- - - def infinite_cycles(self): - terminates = {} - - # Terminals: - for t in self.Terminals: - terminates[t] = 1 - - terminates['$end'] = 1 - - # Nonterminals: - - # Initialize to false: - for n in self.Nonterminals: - terminates[n] = 0 - - # Then propagate termination until no change: - while 1: - some_change = 0 - for (n,pl) in self.Prodnames.items(): - # Nonterminal n terminates iff any of its productions terminates. - for p in pl: - # Production p terminates iff all of its rhs symbols terminate. - for s in p.prod: - if not terminates[s]: - # The symbol s does not terminate, - # so production p does not terminate. - p_terminates = 0 - break - else: - # didn't break from the loop, - # so every symbol s terminates - # so production p terminates. - p_terminates = 1 - - if p_terminates: - # symbol n terminates! - if not terminates[n]: - terminates[n] = 1 - some_change = 1 - # Don't need to consider any more productions for this n. - break - - if not some_change: - break - - infinite = [] - for (s,term) in terminates.items(): - if not term: - if not s in self.Prodnames and not s in self.Terminals and s != 'error': - # s is used-but-not-defined, and we've already warned of that, - # so it would be overkill to say that it's also non-terminating. - pass - else: - infinite.append(s) - - return infinite - - - # ----------------------------------------------------------------------------- - # undefined_symbols() - # - # Find all symbols that were used the grammar, but not defined as tokens or - # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol - # and prod is the production where the symbol was used. - # ----------------------------------------------------------------------------- - def undefined_symbols(self): - result = [] - for p in self.Productions: - if not p: continue - - for s in p.prod: - if not s in self.Prodnames and not s in self.Terminals and s != 'error': - result.append((s,p)) - return result - - # ----------------------------------------------------------------------------- - # unused_terminals() - # - # Find all terminals that were defined, but not used by the grammar. Returns - # a list of all symbols. - # ----------------------------------------------------------------------------- - def unused_terminals(self): - unused_tok = [] - for s,v in self.Terminals.items(): - if s != 'error' and not v: - unused_tok.append(s) - - return unused_tok - - # ------------------------------------------------------------------------------ - # unused_rules() - # - # Find all grammar rules that were defined, but not used (maybe not reachable) - # Returns a list of productions. - # ------------------------------------------------------------------------------ - - def unused_rules(self): - unused_prod = [] - for s,v in self.Nonterminals.items(): - if not v: - p = self.Prodnames[s][0] - unused_prod.append(p) - return unused_prod - - # ----------------------------------------------------------------------------- - # unused_precedence() - # - # Returns a list of tuples (term,precedence) corresponding to precedence - # rules that were never used by the grammar. term is the name of the terminal - # on which precedence was applied and precedence is a string such as 'left' or - # 'right' corresponding to the type of precedence. - # ----------------------------------------------------------------------------- - - def unused_precedence(self): - unused = [] - for termname in self.Precedence: - if not (termname in self.Terminals or termname in self.UsedPrecedence): - unused.append((termname,self.Precedence[termname][0])) - - return unused - - # ------------------------------------------------------------------------- - # _first() - # - # Compute the value of FIRST1(beta) where beta is a tuple of symbols. - # - # During execution of compute_first1, the result may be incomplete. - # Afterward (e.g., when called from compute_follow()), it will be complete. - # ------------------------------------------------------------------------- - def _first(self,beta): - - # We are computing First(x1,x2,x3,...,xn) - result = [ ] - for x in beta: - x_produces_empty = 0 - - # Add all the non- symbols of First[x] to the result. - for f in self.First[x]: - if f == ' ': - x_produces_empty = 1 - else: - if f not in result: result.append(f) - - if x_produces_empty: - # We have to consider the next x in beta, - # i.e. stay in the loop. - pass - else: - # We don't have to consider any further symbols in beta. - break - else: - # There was no 'break' from the loop, - # so x_produces_empty was true for all x in beta, - # so beta produces empty as well. - result.append(' ') - - return result - - # ------------------------------------------------------------------------- - # compute_first() - # - # Compute the value of FIRST1(X) for all symbols - # ------------------------------------------------------------------------- - def compute_first(self): - if self.First: - return self.First - - # Terminals: - for t in self.Terminals: - self.First[t] = [t] - - self.First['$end'] = ['$end'] - - # Nonterminals: - - # Initialize to the empty set: - for n in self.Nonterminals: - self.First[n] = [] - - # Then propagate symbols until no change: - while 1: - some_change = 0 - for n in self.Nonterminals: - for p in self.Prodnames[n]: - for f in self._first(p.prod): - if f not in self.First[n]: - self.First[n].append( f ) - some_change = 1 - if not some_change: - break - - return self.First - - # --------------------------------------------------------------------- - # compute_follow() - # - # Computes all of the follow sets for every non-terminal symbol. The - # follow set is the set of all symbols that might follow a given - # non-terminal. See the Dragon book, 2nd Ed. p. 189. - # --------------------------------------------------------------------- - def compute_follow(self,start=None): - # If already computed, return the result - if self.Follow: - return self.Follow - - # If first sets not computed yet, do that first. - if not self.First: - self.compute_first() - - # Add '$end' to the follow list of the start symbol - for k in self.Nonterminals: - self.Follow[k] = [ ] - - if not start: - start = self.Productions[1].name - - self.Follow[start] = [ '$end' ] - - while 1: - didadd = 0 - for p in self.Productions[1:]: - # Here is the production set - for i in range(len(p.prod)): - B = p.prod[i] - if B in self.Nonterminals: - # Okay. We got a non-terminal in a production - fst = self._first(p.prod[i+1:]) - hasempty = 0 - for f in fst: - if f != ' ' and f not in self.Follow[B]: - self.Follow[B].append(f) - didadd = 1 - if f == ' ': - hasempty = 1 - if hasempty or i == (len(p.prod)-1): - # Add elements of follow(a) to follow(b) - for f in self.Follow[p.name]: - if f not in self.Follow[B]: - self.Follow[B].append(f) - didadd = 1 - if not didadd: break - return self.Follow - - - # ----------------------------------------------------------------------------- - # build_lritems() - # - # This function walks the list of productions and builds a complete set of the - # LR items. The LR items are stored in two ways: First, they are uniquely - # numbered and placed in the list _lritems. Second, a linked list of LR items - # is built for each production. For example: - # - # E -> E PLUS E - # - # Creates the list - # - # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] - # ----------------------------------------------------------------------------- - - def build_lritems(self): - for p in self.Productions: - lastlri = p - i = 0 - lr_items = [] - while 1: - if i > len(p): - lri = None - else: - lri = LRItem(p,i) - # Precompute the list of productions immediately following - try: - lri.lr_after = self.Prodnames[lri.prod[i+1]] - except (IndexError,KeyError): - lri.lr_after = [] - try: - lri.lr_before = lri.prod[i-1] - except IndexError: - lri.lr_before = None - - lastlri.lr_next = lri - if not lri: break - lr_items.append(lri) - lastlri = lri - i += 1 - p.lr_items = lr_items - -# ----------------------------------------------------------------------------- -# == Class LRTable == -# -# This basic class represents a basic table of LR parsing information. -# Methods for generating the tables are not defined here. They are defined -# in the derived class LRGeneratedTable. -# ----------------------------------------------------------------------------- - -class VersionError(YaccError): pass - -class LRTable(object): - def __init__(self): - self.lr_action = None - self.lr_goto = None - self.lr_productions = None - self.lr_method = None - - def read_table(self,module): - if isinstance(module,types.ModuleType): - parsetab = module - else: - if sys.version_info[0] < 3: - exec("import %s as parsetab" % module) - else: - env = { } - exec("import %s as parsetab" % module, env, env) - parsetab = env['parsetab'] - - if parsetab._tabversion != __tabversion__: - raise VersionError("yacc table file version is out of date") - - self.lr_action = parsetab._lr_action - self.lr_goto = parsetab._lr_goto - - self.lr_productions = [] - for p in parsetab._lr_productions: - self.lr_productions.append(MiniProduction(*p)) - - self.lr_method = parsetab._lr_method - return parsetab._lr_signature - - def read_pickle(self,filename): - try: - import cPickle as pickle - except ImportError: - import pickle - - in_f = open(filename,"rb") - - tabversion = pickle.load(in_f) - if tabversion != __tabversion__: - raise VersionError("yacc table file version is out of date") - self.lr_method = pickle.load(in_f) - signature = pickle.load(in_f) - self.lr_action = pickle.load(in_f) - self.lr_goto = pickle.load(in_f) - productions = pickle.load(in_f) - - self.lr_productions = [] - for p in productions: - self.lr_productions.append(MiniProduction(*p)) - - in_f.close() - return signature - - # Bind all production function names to callable objects in pdict - def bind_callables(self,pdict): - for p in self.lr_productions: - p.bind(pdict) - -# ----------------------------------------------------------------------------- -# === LR Generator === -# -# The following classes and functions are used to generate LR parsing tables on -# a grammar. -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# digraph() -# traverse() -# -# The following two functions are used to compute set valued functions -# of the form: -# -# F(x) = F'(x) U U{F(y) | x R y} -# -# This is used to compute the values of Read() sets as well as FOLLOW sets -# in LALR(1) generation. -# -# Inputs: X - An input set -# R - A relation -# FP - Set-valued function -# ------------------------------------------------------------------------------ - -def digraph(X,R,FP): - N = { } - for x in X: - N[x] = 0 - stack = [] - F = { } - for x in X: - if N[x] == 0: traverse(x,N,stack,F,X,R,FP) - return F - -def traverse(x,N,stack,F,X,R,FP): - stack.append(x) - d = len(stack) - N[x] = d - F[x] = FP(x) # F(X) <- F'(x) - - rel = R(x) # Get y's related to x - for y in rel: - if N[y] == 0: - traverse(y,N,stack,F,X,R,FP) - N[x] = min(N[x],N[y]) - for a in F.get(y,[]): - if a not in F[x]: F[x].append(a) - if N[x] == d: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] - element = stack.pop() - while element != x: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] - element = stack.pop() - -class LALRError(YaccError): pass - -# ----------------------------------------------------------------------------- -# == LRGeneratedTable == -# -# This class implements the LR table generation algorithm. There are no -# public methods except for write() -# ----------------------------------------------------------------------------- - -class LRGeneratedTable(LRTable): - def __init__(self,grammar,method='LALR',log=None): - if method not in ['SLR','LALR']: - raise LALRError("Unsupported method %s" % method) - - self.grammar = grammar - self.lr_method = method - - # Set up the logger - if not log: - log = NullLogger() - self.log = log - - # Internal attributes - self.lr_action = {} # Action table - self.lr_goto = {} # Goto table - self.lr_productions = grammar.Productions # Copy of grammar Production array - self.lr_goto_cache = {} # Cache of computed gotos - self.lr0_cidhash = {} # Cache of closures - - self._add_count = 0 # Internal counter used to detect cycles - - # Diagonistic information filled in by the table generator - self.sr_conflict = 0 - self.rr_conflict = 0 - self.conflicts = [] # List of conflicts - - self.sr_conflicts = [] - self.rr_conflicts = [] - - # Build the tables - self.grammar.build_lritems() - self.grammar.compute_first() - self.grammar.compute_follow() - self.lr_parse_table() - - # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. - - def lr0_closure(self,I): - self._add_count += 1 - - # Add everything in I to J - J = I[:] - didadd = 1 - while didadd: - didadd = 0 - for j in J: - for x in j.lr_after: - if getattr(x,"lr0_added",0) == self._add_count: continue - # Add B --> .G to J - J.append(x.lr_next) - x.lr0_added = self._add_count - didadd = 1 - - return J - - # Compute the LR(0) goto function goto(I,X) where I is a set - # of LR(0) items and X is a grammar symbol. This function is written - # in a way that guarantees uniqueness of the generated goto sets - # (i.e. the same goto set will never be returned as two different Python - # objects). With uniqueness, we can later do fast set comparisons using - # id(obj) instead of element-wise comparison. - - def lr0_goto(self,I,x): - # First we look for a previously cached entry - g = self.lr_goto_cache.get((id(I),x),None) - if g: return g - - # Now we generate the goto set in a way that guarantees uniqueness - # of the result - - s = self.lr_goto_cache.get(x,None) - if not s: - s = { } - self.lr_goto_cache[x] = s - - gs = [ ] - for p in I: - n = p.lr_next - if n and n.lr_before == x: - s1 = s.get(id(n),None) - if not s1: - s1 = { } - s[id(n)] = s1 - gs.append(n) - s = s1 - g = s.get('$end',None) - if not g: - if gs: - g = self.lr0_closure(gs) - s['$end'] = g - else: - s['$end'] = gs - self.lr_goto_cache[(id(I),x)] = g - return g - - # Compute the LR(0) sets of item function - def lr0_items(self): - - C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ] - i = 0 - for I in C: - self.lr0_cidhash[id(I)] = i - i += 1 - - # Loop over the items in C and each grammar symbols - i = 0 - while i < len(C): - I = C[i] - i += 1 - - # Collect all of the symbols that could possibly be in the goto(I,X) sets - asyms = { } - for ii in I: - for s in ii.usyms: - asyms[s] = None - - for x in asyms: - g = self.lr0_goto(I,x) - if not g: continue - if id(g) in self.lr0_cidhash: continue - self.lr0_cidhash[id(g)] = len(C) - C.append(g) - - return C - - # ----------------------------------------------------------------------------- - # ==== LALR(1) Parsing ==== - # - # LALR(1) parsing is almost exactly the same as SLR except that instead of - # relying upon Follow() sets when performing reductions, a more selective - # lookahead set that incorporates the state of the LR(0) machine is utilized. - # Thus, we mainly just have to focus on calculating the lookahead sets. - # - # The method used here is due to DeRemer and Pennelo (1982). - # - # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) - # Lookahead Sets", ACM Transactions on Programming Languages and Systems, - # Vol. 4, No. 4, Oct. 1982, pp. 615-649 - # - # Further details can also be found in: - # - # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", - # McGraw-Hill Book Company, (1985). - # - # ----------------------------------------------------------------------------- - - # ----------------------------------------------------------------------------- - # compute_nullable_nonterminals() - # - # Creates a dictionary containing all of the non-terminals that might produce - # an empty production. - # ----------------------------------------------------------------------------- - - def compute_nullable_nonterminals(self): - nullable = {} - num_nullable = 0 - while 1: - for p in self.grammar.Productions[1:]: - if p.len == 0: - nullable[p.name] = 1 - continue - for t in p.prod: - if not t in nullable: break - else: - nullable[p.name] = 1 - if len(nullable) == num_nullable: break - num_nullable = len(nullable) - return nullable - - # ----------------------------------------------------------------------------- - # find_nonterminal_trans(C) - # - # Given a set of LR(0) items, this functions finds all of the non-terminal - # transitions. These are transitions in which a dot appears immediately before - # a non-terminal. Returns a list of tuples of the form (state,N) where state - # is the state number and N is the nonterminal symbol. - # - # The input C is the set of LR(0) items. - # ----------------------------------------------------------------------------- - - def find_nonterminal_transitions(self,C): - trans = [] - for state in range(len(C)): - for p in C[state]: - if p.lr_index < p.len - 1: - t = (state,p.prod[p.lr_index+1]) - if t[1] in self.grammar.Nonterminals: - if t not in trans: trans.append(t) - state = state + 1 - return trans - - # ----------------------------------------------------------------------------- - # dr_relation() - # - # Computes the DR(p,A) relationships for non-terminal transitions. The input - # is a tuple (state,N) where state is a number and N is a nonterminal symbol. - # - # Returns a list of terminals. - # ----------------------------------------------------------------------------- - - def dr_relation(self,C,trans,nullable): - dr_set = { } - state,N = trans - terms = [] - - g = self.lr0_goto(C[state],N) - for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index+1] - if a in self.grammar.Terminals: - if a not in terms: terms.append(a) - - # This extra bit is to handle the start state - if state == 0 and N == self.grammar.Productions[0].prod[0]: - terms.append('$end') - - return terms - - # ----------------------------------------------------------------------------- - # reads_relation() - # - # Computes the READS() relation (p,A) READS (t,C). - # ----------------------------------------------------------------------------- - - def reads_relation(self,C, trans, empty): - # Look for empty transitions - rel = [] - state, N = trans - - g = self.lr0_goto(C[state],N) - j = self.lr0_cidhash.get(id(g),-1) - for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index + 1] - if a in empty: - rel.append((j,a)) - - return rel - - # ----------------------------------------------------------------------------- - # compute_lookback_includes() - # - # Determines the lookback and includes relations - # - # LOOKBACK: - # - # This relation is determined by running the LR(0) state machine forward. - # For example, starting with a production "N : . A B C", we run it forward - # to obtain "N : A B C ." We then build a relationship between this final - # state and the starting state. These relationships are stored in a dictionary - # lookdict. - # - # INCLUDES: - # - # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). - # - # This relation is used to determine non-terminal transitions that occur - # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) - # if the following holds: - # - # B -> LAT, where T -> epsilon and p' -L-> p - # - # L is essentially a prefix (which may be empty), T is a suffix that must be - # able to derive an empty string. State p' must lead to state p with the string L. - # - # ----------------------------------------------------------------------------- - - def compute_lookback_includes(self,C,trans,nullable): - - lookdict = {} # Dictionary of lookback relations - includedict = {} # Dictionary of include relations - - # Make a dictionary of non-terminal transitions - dtrans = {} - for t in trans: - dtrans[t] = 1 - - # Loop over all transitions and compute lookbacks and includes - for state,N in trans: - lookb = [] - includes = [] - for p in C[state]: - if p.name != N: continue - - # Okay, we have a name match. We now follow the production all the way - # through the state machine until we get the . on the right hand side - - lr_index = p.lr_index - j = state - while lr_index < p.len - 1: - lr_index = lr_index + 1 - t = p.prod[lr_index] - - # Check to see if this symbol and state are a non-terminal transition - if (j,t) in dtrans: - # Yes. Okay, there is some chance that this is an includes relation - # the only way to know for certain is whether the rest of the - # production derives empty - - li = lr_index + 1 - while li < p.len: - if p.prod[li] in self.grammar.Terminals: break # No forget it - if not p.prod[li] in nullable: break - li = li + 1 - else: - # Appears to be a relation between (j,t) and (state,N) - includes.append((j,t)) - - g = self.lr0_goto(C[j],t) # Go to next set - j = self.lr0_cidhash.get(id(g),-1) # Go to next state - - # When we get here, j is the final state, now we have to locate the production - for r in C[j]: - if r.name != p.name: continue - if r.len != p.len: continue - i = 0 - # This look is comparing a production ". A B C" with "A B C ." - while i < r.lr_index: - if r.prod[i] != p.prod[i+1]: break - i = i + 1 - else: - lookb.append((j,r)) - for i in includes: - if not i in includedict: includedict[i] = [] - includedict[i].append((state,N)) - lookdict[(state,N)] = lookb - - return lookdict,includedict - - # ----------------------------------------------------------------------------- - # compute_read_sets() - # - # Given a set of LR(0) items, this function computes the read sets. - # - # Inputs: C = Set of LR(0) items - # ntrans = Set of nonterminal transitions - # nullable = Set of empty transitions - # - # Returns a set containing the read sets - # ----------------------------------------------------------------------------- - - def compute_read_sets(self,C, ntrans, nullable): - FP = lambda x: self.dr_relation(C,x,nullable) - R = lambda x: self.reads_relation(C,x,nullable) - F = digraph(ntrans,R,FP) - return F - - # ----------------------------------------------------------------------------- - # compute_follow_sets() - # - # Given a set of LR(0) items, a set of non-terminal transitions, a readset, - # and an include set, this function computes the follow sets - # - # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} - # - # Inputs: - # ntrans = Set of nonterminal transitions - # readsets = Readset (previously computed) - # inclsets = Include sets (previously computed) - # - # Returns a set containing the follow sets - # ----------------------------------------------------------------------------- - - def compute_follow_sets(self,ntrans,readsets,inclsets): - FP = lambda x: readsets[x] - R = lambda x: inclsets.get(x,[]) - F = digraph(ntrans,R,FP) - return F - - # ----------------------------------------------------------------------------- - # add_lookaheads() - # - # Attaches the lookahead symbols to grammar rules. - # - # Inputs: lookbacks - Set of lookback relations - # followset - Computed follow set - # - # This function directly attaches the lookaheads to productions contained - # in the lookbacks set - # ----------------------------------------------------------------------------- - - def add_lookaheads(self,lookbacks,followset): - for trans,lb in lookbacks.items(): - # Loop over productions in lookback - for state,p in lb: - if not state in p.lookaheads: - p.lookaheads[state] = [] - f = followset.get(trans,[]) - for a in f: - if a not in p.lookaheads[state]: p.lookaheads[state].append(a) - - # ----------------------------------------------------------------------------- - # add_lalr_lookaheads() - # - # This function does all of the work of adding lookahead information for use - # with LALR parsing - # ----------------------------------------------------------------------------- - - def add_lalr_lookaheads(self,C): - # Determine all of the nullable nonterminals - nullable = self.compute_nullable_nonterminals() - - # Find all non-terminal transitions - trans = self.find_nonterminal_transitions(C) - - # Compute read sets - readsets = self.compute_read_sets(C,trans,nullable) - - # Compute lookback/includes relations - lookd, included = self.compute_lookback_includes(C,trans,nullable) - - # Compute LALR FOLLOW sets - followsets = self.compute_follow_sets(trans,readsets,included) - - # Add all of the lookaheads - self.add_lookaheads(lookd,followsets) - - # ----------------------------------------------------------------------------- - # lr_parse_table() - # - # This function constructs the parse tables for SLR or LALR - # ----------------------------------------------------------------------------- - def lr_parse_table(self): - Productions = self.grammar.Productions - Precedence = self.grammar.Precedence - goto = self.lr_goto # Goto array - action = self.lr_action # Action array - log = self.log # Logger for output - - actionp = { } # Action production array (temporary) - - log.info("Parsing method: %s", self.lr_method) - - # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items - # This determines the number of states - - C = self.lr0_items() - - if self.lr_method == 'LALR': - self.add_lalr_lookaheads(C) - - # Build the parser table, state by state - st = 0 - for I in C: - # Loop over each production in I - actlist = [ ] # List of actions - st_action = { } - st_actionp = { } - st_goto = { } - log.info("") - log.info("state %d", st) - log.info("") - for p in I: - log.info(" (%d) %s", p.number, str(p)) - log.info("") - - for p in I: - if p.len == p.lr_index + 1: - if p.name == "S'": - # Start symbol. Accept! - st_action["$end"] = 0 - st_actionp["$end"] = p - else: - # We are at the end of a production. Reduce! - if self.lr_method == 'LALR': - laheads = p.lookaheads[st] - else: - laheads = self.grammar.Follow[p.name] - for a in laheads: - actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) - r = st_action.get(a,None) - if r is not None: - # Whoa. Have a shift/reduce or reduce/reduce conflict - if r > 0: - # Need to decide on shift or reduce here - # By default we favor shifting. Need to add - # some precedence rules here. - sprec,slevel = Productions[st_actionp[a].number].prec - rprec,rlevel = Precedence.get(a,('right',0)) - if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): - # We really need to reduce here. - st_action[a] = -p.number - st_actionp[a] = p - if not slevel and not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as reduce",a) - self.sr_conflicts.append((st,a,'reduce')) - Productions[p.number].reduced += 1 - elif (slevel == rlevel) and (rprec == 'nonassoc'): - st_action[a] = None - else: - # Hmmm. Guess we'll keep the shift - if not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as shift",a) - self.sr_conflicts.append((st,a,'shift')) - elif r < 0: - # Reduce/reduce conflict. In this case, we favor the rule - # that was defined first in the grammar file - oldp = Productions[-r] - pp = Productions[p.number] - if oldp.line > pp.line: - st_action[a] = -p.number - st_actionp[a] = p - chosenp,rejectp = pp,oldp - Productions[p.number].reduced += 1 - Productions[oldp.number].reduced -= 1 - else: - chosenp,rejectp = oldp,pp - self.rr_conflicts.append((st,chosenp,rejectp)) - log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a]) - else: - raise LALRError("Unknown conflict in state %d" % st) - else: - st_action[a] = -p.number - st_actionp[a] = p - Productions[p.number].reduced += 1 - else: - i = p.lr_index - a = p.prod[i+1] # Get symbol right after the "." - if a in self.grammar.Terminals: - g = self.lr0_goto(I,a) - j = self.lr0_cidhash.get(id(g),-1) - if j >= 0: - # We are in a shift state - actlist.append((a,p,"shift and go to state %d" % j)) - r = st_action.get(a,None) - if r is not None: - # Whoa have a shift/reduce or shift/shift conflict - if r > 0: - if r != j: - raise LALRError("Shift/shift conflict in state %d" % st) - elif r < 0: - # Do a precedence check. - # - if precedence of reduce rule is higher, we reduce. - # - if precedence of reduce is same and left assoc, we reduce. - # - otherwise we shift - rprec,rlevel = Productions[st_actionp[a].number].prec - sprec,slevel = Precedence.get(a,('right',0)) - if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): - # We decide to shift here... highest precedence to shift - Productions[st_actionp[a].number].reduced -= 1 - st_action[a] = j - st_actionp[a] = p - if not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as shift",a) - self.sr_conflicts.append((st,a,'shift')) - elif (slevel == rlevel) and (rprec == 'nonassoc'): - st_action[a] = None - else: - # Hmmm. Guess we'll keep the reduce - if not slevel and not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as reduce",a) - self.sr_conflicts.append((st,a,'reduce')) - - else: - raise LALRError("Unknown conflict in state %d" % st) - else: - st_action[a] = j - st_actionp[a] = p - - # Print the actions associated with each terminal - _actprint = { } - for a,p,m in actlist: - if a in st_action: - if p is st_actionp[a]: - log.info(" %-15s %s",a,m) - _actprint[(a,m)] = 1 - log.info("") - # Print the actions that were not used. (debugging) - not_used = 0 - for a,p,m in actlist: - if a in st_action: - if p is not st_actionp[a]: - if not (a,m) in _actprint: - log.debug(" ! %-15s [ %s ]",a,m) - not_used = 1 - _actprint[(a,m)] = 1 - if not_used: - log.debug("") - - # Construct the goto table for this state - - nkeys = { } - for ii in I: - for s in ii.usyms: - if s in self.grammar.Nonterminals: - nkeys[s] = None - for n in nkeys: - g = self.lr0_goto(I,n) - j = self.lr0_cidhash.get(id(g),-1) - if j >= 0: - st_goto[n] = j - log.info(" %-30s shift and go to state %d",n,j) - - action[st] = st_action - actionp[st] = st_actionp - goto[st] = st_goto - st += 1 - - - # ----------------------------------------------------------------------------- - # write() - # - # This function writes the LR parsing tables to a file - # ----------------------------------------------------------------------------- - - def write_table(self,modulename,outputdir='',signature=""): - basemodulename = modulename.split(".")[-1] - filename = os.path.join(outputdir,basemodulename) + ".py" - try: - f = open(filename,"w") - - f.write(""" -# %s -# This file is automatically generated. Do not edit. -_tabversion = %r - -_lr_method = %r - -_lr_signature = %r - """ % (filename, __tabversion__, self.lr_method, signature)) - - # Change smaller to 0 to go back to original tables - smaller = 1 - - # Factor out names to try and make smaller - if smaller: - items = { } - - for s,nd in self.lr_action.items(): - for name,v in nd.items(): - i = items.get(name) - if not i: - i = ([],[]) - items[name] = i - i[0].append(s) - i[1].append(v) - - f.write("\n_lr_action_items = {") - for k,v in items.items(): - f.write("%r:([" % k) - for i in v[0]: - f.write("%r," % i) - f.write("],[") - for i in v[1]: - f.write("%r," % i) - - f.write("]),") - f.write("}\n") - - f.write(""" -_lr_action = { } -for _k, _v in _lr_action_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_action: _lr_action[_x] = { } - _lr_action[_x][_k] = _y -del _lr_action_items -""") - - else: - f.write("\n_lr_action = { "); - for k,v in self.lr_action.items(): - f.write("(%r,%r):%r," % (k[0],k[1],v)) - f.write("}\n"); - - if smaller: - # Factor out names to try and make smaller - items = { } - - for s,nd in self.lr_goto.items(): - for name,v in nd.items(): - i = items.get(name) - if not i: - i = ([],[]) - items[name] = i - i[0].append(s) - i[1].append(v) - - f.write("\n_lr_goto_items = {") - for k,v in items.items(): - f.write("%r:([" % k) - for i in v[0]: - f.write("%r," % i) - f.write("],[") - for i in v[1]: - f.write("%r," % i) - - f.write("]),") - f.write("}\n") - - f.write(""" -_lr_goto = { } -for _k, _v in _lr_goto_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_goto: _lr_goto[_x] = { } - _lr_goto[_x][_k] = _y -del _lr_goto_items -""") - else: - f.write("\n_lr_goto = { "); - for k,v in self.lr_goto.items(): - f.write("(%r,%r):%r," % (k[0],k[1],v)) - f.write("}\n"); - - # Write production table - f.write("_lr_productions = [\n") - for p in self.lr_productions: - if p.func: - f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line)) - else: - f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len)) - f.write("]\n") - f.close() - - except IOError: - e = sys.exc_info()[1] - sys.stderr.write("Unable to create '%s'\n" % filename) - sys.stderr.write(str(e)+"\n") - return - - - # ----------------------------------------------------------------------------- - # pickle_table() - # - # This function pickles the LR parsing tables to a supplied file object - # ----------------------------------------------------------------------------- - - def pickle_table(self,filename,signature=""): - try: - import cPickle as pickle - except ImportError: - import pickle - outf = open(filename,"wb") - pickle.dump(__tabversion__,outf,pickle_protocol) - pickle.dump(self.lr_method,outf,pickle_protocol) - pickle.dump(signature,outf,pickle_protocol) - pickle.dump(self.lr_action,outf,pickle_protocol) - pickle.dump(self.lr_goto,outf,pickle_protocol) - - outp = [] - for p in self.lr_productions: - if p.func: - outp.append((p.str,p.name, p.len, p.func,p.file,p.line)) - else: - outp.append((str(p),p.name,p.len,None,None,None)) - pickle.dump(outp,outf,pickle_protocol) - outf.close() - -# ----------------------------------------------------------------------------- -# === INTROSPECTION === -# -# The following functions and classes are used to implement the PLY -# introspection features followed by the yacc() function itself. -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# get_caller_module_dict() -# -# This function returns a dictionary containing all of the symbols defined within -# a caller further down the call stack. This is used to get the environment -# associated with the yacc() call if none was provided. -# ----------------------------------------------------------------------------- - -def get_caller_module_dict(levels): - try: - raise RuntimeError - except RuntimeError: - e,b,t = sys.exc_info() - f = t.tb_frame - while levels > 0: - f = f.f_back - levels -= 1 - ldict = f.f_globals.copy() - if f.f_globals != f.f_locals: - ldict.update(f.f_locals) - - return ldict - -# ----------------------------------------------------------------------------- -# parse_grammar() -# -# This takes a raw grammar rule string and parses it into production data -# ----------------------------------------------------------------------------- -def parse_grammar(doc,file,line): - grammar = [] - # Split the doc string into lines - pstrings = doc.splitlines() - lastp = None - dline = line - for ps in pstrings: - dline += 1 - p = ps.split() - if not p: continue - try: - if p[0] == '|': - # This is a continuation of a previous rule - if not lastp: - raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline)) - prodname = lastp - syms = p[1:] - else: - prodname = p[0] - lastp = prodname - syms = p[2:] - assign = p[1] - if assign != ':' and assign != '::=': - raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline)) - - grammar.append((file,dline,prodname,syms)) - except SyntaxError: - raise - except Exception: - raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip())) - - return grammar - -# ----------------------------------------------------------------------------- -# ParserReflect() -# -# This class represents information extracted for building a parser including -# start symbol, error function, tokens, precedence list, action functions, -# etc. -# ----------------------------------------------------------------------------- -class ParserReflect(object): - def __init__(self,pdict,log=None): - self.pdict = pdict - self.start = None - self.error_func = None - self.tokens = None - self.files = {} - self.grammar = [] - self.error = 0 - - if log is None: - self.log = PlyLogger(sys.stderr) - else: - self.log = log - - # Get all of the basic information - def get_all(self): - self.get_start() - self.get_error_func() - self.get_tokens() - self.get_precedence() - self.get_pfunctions() - - # Validate all of the information - def validate_all(self): - self.validate_start() - self.validate_error_func() - self.validate_tokens() - self.validate_precedence() - self.validate_pfunctions() - self.validate_files() - return self.error - - # Compute a signature over the grammar - def signature(self): - try: - from hashlib import md5 - except ImportError: - from md5 import md5 - try: - sig = md5() - if self.start: - sig.update(self.start.encode('latin-1')) - if self.prec: - sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1')) - if self.tokens: - sig.update(" ".join(self.tokens).encode('latin-1')) - for f in self.pfuncs: - if f[3]: - sig.update(f[3].encode('latin-1')) - except (TypeError,ValueError): - pass - return sig.digest() - - # ----------------------------------------------------------------------------- - # validate_file() - # - # This method checks to see if there are duplicated p_rulename() functions - # in the parser module file. Without this function, it is really easy for - # users to make mistakes by cutting and pasting code fragments (and it's a real - # bugger to try and figure out why the resulting parser doesn't work). Therefore, - # we just do a little regular expression pattern matching of def statements - # to try and detect duplicates. - # ----------------------------------------------------------------------------- - - def validate_files(self): - # Match def p_funcname( - fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') - - for filename in self.files.keys(): - base,ext = os.path.splitext(filename) - if ext != '.py': return 1 # No idea. Assume it's okay. - - try: - f = open(filename) - lines = f.readlines() - f.close() - except IOError: - continue - - counthash = { } - for linen,l in enumerate(lines): - linen += 1 - m = fre.match(l) - if m: - name = m.group(1) - prev = counthash.get(name) - if not prev: - counthash[name] = linen - else: - self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev) - - # Get the start symbol - def get_start(self): - self.start = self.pdict.get('start') - - # Validate the start symbol - def validate_start(self): - if self.start is not None: - if not isinstance(self.start,str): - self.log.error("'start' must be a string") - - # Look for error handler - def get_error_func(self): - self.error_func = self.pdict.get('p_error') - - # Validate the error function - def validate_error_func(self): - if self.error_func: - if isinstance(self.error_func,types.FunctionType): - ismethod = 0 - elif isinstance(self.error_func, types.MethodType): - ismethod = 1 - else: - self.log.error("'p_error' defined, but is not a function or method") - self.error = 1 - return - - eline = func_code(self.error_func).co_firstlineno - efile = func_code(self.error_func).co_filename - self.files[efile] = 1 - - if (func_code(self.error_func).co_argcount != 1+ismethod): - self.log.error("%s:%d: p_error() requires 1 argument",efile,eline) - self.error = 1 - - # Get the tokens map - def get_tokens(self): - tokens = self.pdict.get("tokens",None) - if not tokens: - self.log.error("No token list is defined") - self.error = 1 - return - - if not isinstance(tokens,(list, tuple)): - self.log.error("tokens must be a list or tuple") - self.error = 1 - return - - if not tokens: - self.log.error("tokens is empty") - self.error = 1 - return - - self.tokens = tokens - - # Validate the tokens - def validate_tokens(self): - # Validate the tokens. - if 'error' in self.tokens: - self.log.error("Illegal token name 'error'. Is a reserved word") - self.error = 1 - return - - terminals = {} - for n in self.tokens: - if n in terminals: - self.log.warning("Token '%s' multiply defined", n) - terminals[n] = 1 - - # Get the precedence map (if any) - def get_precedence(self): - self.prec = self.pdict.get("precedence",None) - - # Validate and parse the precedence map - def validate_precedence(self): - preclist = [] - if self.prec: - if not isinstance(self.prec,(list,tuple)): - self.log.error("precedence must be a list or tuple") - self.error = 1 - return - for level,p in enumerate(self.prec): - if not isinstance(p,(list,tuple)): - self.log.error("Bad precedence table") - self.error = 1 - return - - if len(p) < 2: - self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p) - self.error = 1 - return - assoc = p[0] - if not isinstance(assoc,str): - self.log.error("precedence associativity must be a string") - self.error = 1 - return - for term in p[1:]: - if not isinstance(term,str): - self.log.error("precedence items must be strings") - self.error = 1 - return - preclist.append((term,assoc,level+1)) - self.preclist = preclist - - # Get all p_functions from the grammar - def get_pfunctions(self): - p_functions = [] - for name, item in self.pdict.items(): - if name[:2] != 'p_': continue - if name == 'p_error': continue - if isinstance(item,(types.FunctionType,types.MethodType)): - line = func_code(item).co_firstlineno - file = func_code(item).co_filename - p_functions.append((line,file,name,item.__doc__)) - - # Sort all of the actions by line number - p_functions.sort() - self.pfuncs = p_functions - - - # Validate all of the p_functions - def validate_pfunctions(self): - grammar = [] - # Check for non-empty symbols - if len(self.pfuncs) == 0: - self.log.error("no rules of the form p_rulename are defined") - self.error = 1 - return - - for line, file, name, doc in self.pfuncs: - func = self.pdict[name] - if isinstance(func, types.MethodType): - reqargs = 2 - else: - reqargs = 1 - if func_code(func).co_argcount > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__) - self.error = 1 - elif func_code(func).co_argcount < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__) - self.error = 1 - elif not func.__doc__: - self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__) - else: - try: - parsed_g = parse_grammar(doc,file,line) - for g in parsed_g: - grammar.append((name, g)) - except SyntaxError: - e = sys.exc_info()[1] - self.log.error(str(e)) - self.error = 1 - - # Looks like a valid grammar rule - # Mark the file in which defined. - self.files[file] = 1 - - # Secondary validation step that looks for p_ definitions that are not functions - # or functions that look like they might be grammar rules. - - for n,v in self.pdict.items(): - if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue - if n[0:2] == 't_': continue - if n[0:2] == 'p_' and n != 'p_error': - self.log.warning("'%s' not defined as a function", n) - if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or - (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)): - try: - doc = v.__doc__.split(" ") - if doc[1] == ':': - self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix", - func_code(v).co_filename, func_code(v).co_firstlineno,n) - except Exception: - pass - - self.grammar = grammar - -# ----------------------------------------------------------------------------- -# yacc(module) -# -# Build a parser -# ----------------------------------------------------------------------------- - -def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, - check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='', - debuglog=None, errorlog = None, picklefile=None): - - global parse # Reference to the parsing method of the last built parser - - # If pickling is enabled, table files are not created - - if picklefile: - write_tables = 0 - - if errorlog is None: - errorlog = PlyLogger(sys.stderr) - - # Get the module dictionary used for the parser - if module: - _items = [(k,getattr(module,k)) for k in dir(module)] - pdict = dict(_items) - else: - pdict = get_caller_module_dict(2) - - # Collect parser information from the dictionary - pinfo = ParserReflect(pdict,log=errorlog) - pinfo.get_all() - - if pinfo.error: - raise YaccError("Unable to build parser") - - # Check signature against table files (if any) - signature = pinfo.signature() - - # Read the tables - try: - lr = LRTable() - if picklefile: - read_signature = lr.read_pickle(picklefile) - else: - read_signature = lr.read_table(tabmodule) - if optimize or (read_signature == signature): - try: - lr.bind_callables(pinfo.pdict) - parser = LRParser(lr,pinfo.error_func) - parse = parser.parse - return parser - except Exception: - e = sys.exc_info()[1] - errorlog.warning("There was a problem loading the table file: %s", repr(e)) - except VersionError: - e = sys.exc_info() - errorlog.warning(str(e)) - except Exception: - pass - - if debuglog is None: - if debug: - debuglog = PlyLogger(open(debugfile,"w")) - else: - debuglog = NullLogger() - - debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__) - - - errors = 0 - - # Validate the parser information - if pinfo.validate_all(): - raise YaccError("Unable to build parser") - - if not pinfo.error_func: - errorlog.warning("no p_error() function is defined") - - # Create a grammar object - grammar = Grammar(pinfo.tokens) - - # Set precedence level for terminals - for term, assoc, level in pinfo.preclist: - try: - grammar.set_precedence(term,assoc,level) - except GrammarError: - e = sys.exc_info()[1] - errorlog.warning("%s",str(e)) - - # Add productions to the grammar - for funcname, gram in pinfo.grammar: - file, line, prodname, syms = gram - try: - grammar.add_production(prodname,syms,funcname,file,line) - except GrammarError: - e = sys.exc_info()[1] - errorlog.error("%s",str(e)) - errors = 1 - - # Set the grammar start symbols - try: - if start is None: - grammar.set_start(pinfo.start) - else: - grammar.set_start(start) - except GrammarError: - e = sys.exc_info()[1] - errorlog.error(str(e)) - errors = 1 - - if errors: - raise YaccError("Unable to build parser") - - # Verify the grammar structure - undefined_symbols = grammar.undefined_symbols() - for sym, prod in undefined_symbols: - errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym) - errors = 1 - - unused_terminals = grammar.unused_terminals() - if unused_terminals: - debuglog.info("") - debuglog.info("Unused terminals:") - debuglog.info("") - for term in unused_terminals: - errorlog.warning("Token '%s' defined, but not used", term) - debuglog.info(" %s", term) - - # Print out all productions to the debug log - if debug: - debuglog.info("") - debuglog.info("Grammar") - debuglog.info("") - for n,p in enumerate(grammar.Productions): - debuglog.info("Rule %-5d %s", n, p) - - # Find unused non-terminals - unused_rules = grammar.unused_rules() - for prod in unused_rules: - errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name) - - if len(unused_terminals) == 1: - errorlog.warning("There is 1 unused token") - if len(unused_terminals) > 1: - errorlog.warning("There are %d unused tokens", len(unused_terminals)) - - if len(unused_rules) == 1: - errorlog.warning("There is 1 unused rule") - if len(unused_rules) > 1: - errorlog.warning("There are %d unused rules", len(unused_rules)) - - if debug: - debuglog.info("") - debuglog.info("Terminals, with rules where they appear") - debuglog.info("") - terms = list(grammar.Terminals) - terms.sort() - for term in terms: - debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]])) - - debuglog.info("") - debuglog.info("Nonterminals, with rules where they appear") - debuglog.info("") - nonterms = list(grammar.Nonterminals) - nonterms.sort() - for nonterm in nonterms: - debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]])) - debuglog.info("") - - if check_recursion: - unreachable = grammar.find_unreachable() - for u in unreachable: - errorlog.warning("Symbol '%s' is unreachable",u) - - infinite = grammar.infinite_cycles() - for inf in infinite: - errorlog.error("Infinite recursion detected for symbol '%s'", inf) - errors = 1 - - unused_prec = grammar.unused_precedence() - for term, assoc in unused_prec: - errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term) - errors = 1 - - if errors: - raise YaccError("Unable to build parser") - - # Run the LRGeneratedTable on the grammar - if debug: - errorlog.debug("Generating %s tables", method) - - lr = LRGeneratedTable(grammar,method,debuglog) - - if debug: - num_sr = len(lr.sr_conflicts) - - # Report shift/reduce and reduce/reduce conflicts - if num_sr == 1: - errorlog.warning("1 shift/reduce conflict") - elif num_sr > 1: - errorlog.warning("%d shift/reduce conflicts", num_sr) - - num_rr = len(lr.rr_conflicts) - if num_rr == 1: - errorlog.warning("1 reduce/reduce conflict") - elif num_rr > 1: - errorlog.warning("%d reduce/reduce conflicts", num_rr) - - # Write out conflicts to the output file - if debug and (lr.sr_conflicts or lr.rr_conflicts): - debuglog.warning("") - debuglog.warning("Conflicts:") - debuglog.warning("") - - for state, tok, resolution in lr.sr_conflicts: - debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution) - - already_reported = {} - for state, rule, rejected in lr.rr_conflicts: - if (state,id(rule),id(rejected)) in already_reported: - continue - debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) - debuglog.warning("rejected rule (%s) in state %d", rejected,state) - errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) - errorlog.warning("rejected rule (%s) in state %d", rejected, state) - already_reported[state,id(rule),id(rejected)] = 1 - - warned_never = [] - for state, rule, rejected in lr.rr_conflicts: - if not rejected.reduced and (rejected not in warned_never): - debuglog.warning("Rule (%s) is never reduced", rejected) - errorlog.warning("Rule (%s) is never reduced", rejected) - warned_never.append(rejected) - - # Write the table file if requested - if write_tables: - lr.write_table(tabmodule,outputdir,signature) - - # Write a pickled version of the tables - if picklefile: - lr.pickle_table(picklefile,signature) - - # Build the parser - lr.bind_callables(pinfo.pdict) - parser = LRParser(lr,pinfo.error_func) - - parse = parser.parse - return parser diff --git a/ply/setup.py b/ply/setup.py deleted file mode 100755 index 408d5b8..0000000 --- a/ply/setup.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/python -try: - from setuptools import setup -except ImportError: - from distutils.core import setup - -setup(name = "ply", - description="Python Lex & Yacc", - long_description = """ -PLY is yet another implementation of lex and yacc for Python. Some notable -features include the fact that its implemented entirely in Python and it -uses LALR(1) parsing which is efficient and well suited for larger grammars. - -PLY provides most of the standard lex/yacc features including support for empty -productions, precedence rules, error recovery, and support for ambiguous grammars. - -PLY is extremely easy to use and provides very extensive error checking. -It is compatible with both Python 2 and Python 3. -""", - license="""BSD""", - version = "3.4", - author = "David Beazley", - author_email = "dave@dabeaz.com", - maintainer = "David Beazley", - maintainer_email = "dave@dabeaz.com", - url = "http://www.dabeaz.com/ply/", - packages = ['ply'], - classifiers = [ - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 2', - ] - ) diff --git a/ply/test/README b/ply/test/README deleted file mode 100644 index dc74ba3..0000000 --- a/ply/test/README +++ /dev/null @@ -1,7 +0,0 @@ -This directory mostly contains tests for various types of error -conditions. To run: - - $ python testlex.py . - $ python testyacc.py . - -The script 'cleanup.sh' cleans up this directory to its original state. diff --git a/ply/test/calclex.py b/ply/test/calclex.py deleted file mode 100644 index 67d245f..0000000 --- a/ply/test/calclex.py +++ /dev/null @@ -1,49 +0,0 @@ -# ----------------------------------------------------------------------------- -# calclex.py -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.lex as lex - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - return t - -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -lex.lex() - - - diff --git a/ply/test/cleanup.sh b/ply/test/cleanup.sh deleted file mode 100755 index 9374f2c..0000000 --- a/ply/test/cleanup.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh - -rm -rf *~ *.pyc *.pyo *.dif *.out __pycache__ - diff --git a/ply/test/lex_closure.py b/ply/test/lex_closure.py deleted file mode 100644 index 30ee679..0000000 --- a/ply/test/lex_closure.py +++ /dev/null @@ -1,54 +0,0 @@ -# ----------------------------------------------------------------------------- -# lex_closure.py -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.lex as lex - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -def make_calc(): - - # Tokens - - t_PLUS = r'\+' - t_MINUS = r'-' - t_TIMES = r'\*' - t_DIVIDE = r'/' - t_EQUALS = r'=' - t_LPAREN = r'\(' - t_RPAREN = r'\)' - t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - - def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - return t - - t_ignore = " \t" - - def t_newline(t): - r'\n+' - t.lineno += t.value.count("\n") - - def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - - # Build the lexer - return lex.lex() - -make_calc() -lex.runmain(data="3+4") - - - diff --git a/ply/test/lex_doc1.py b/ply/test/lex_doc1.py deleted file mode 100644 index 8a2bfcc..0000000 --- a/ply/test/lex_doc1.py +++ /dev/null @@ -1,26 +0,0 @@ -# lex_doc1.py -# -# Missing documentation string - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -def t_NUMBER(t): - pass - -def t_error(t): - pass - -lex.lex() - - diff --git a/ply/test/lex_dup1.py b/ply/test/lex_dup1.py deleted file mode 100644 index fd04cdb..0000000 --- a/ply/test/lex_dup1.py +++ /dev/null @@ -1,29 +0,0 @@ -# lex_dup1.py -# -# Duplicated rule specifiers - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -t_NUMBER = r'\d+' - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_dup2.py b/ply/test/lex_dup2.py deleted file mode 100644 index 870e5e7..0000000 --- a/ply/test/lex_dup2.py +++ /dev/null @@ -1,33 +0,0 @@ -# lex_dup2.py -# -# Duplicated rule specifiers - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -def t_NUMBER(t): - r'\d+' - pass - -def t_NUMBER(t): - r'\d+' - pass - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_dup3.py b/ply/test/lex_dup3.py deleted file mode 100644 index 94b5592..0000000 --- a/ply/test/lex_dup3.py +++ /dev/null @@ -1,31 +0,0 @@ -# lex_dup3.py -# -# Duplicated rule specifiers - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_NUMBER(t): - r'\d+' - pass - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_empty.py b/ply/test/lex_empty.py deleted file mode 100644 index e0368bf..0000000 --- a/ply/test/lex_empty.py +++ /dev/null @@ -1,20 +0,0 @@ -# lex_empty.py -# -# No rules defined - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - - - -lex.lex() - - diff --git a/ply/test/lex_error1.py b/ply/test/lex_error1.py deleted file mode 100644 index 4508a80..0000000 --- a/ply/test/lex_error1.py +++ /dev/null @@ -1,24 +0,0 @@ -# lex_error1.py -# -# Missing t_error() rule - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - - - -lex.lex() - - diff --git a/ply/test/lex_error2.py b/ply/test/lex_error2.py deleted file mode 100644 index 8040d39..0000000 --- a/ply/test/lex_error2.py +++ /dev/null @@ -1,26 +0,0 @@ -# lex_error2.py -# -# t_error defined, but not function - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -t_error = "foo" - - - -lex.lex() - - diff --git a/ply/test/lex_error3.py b/ply/test/lex_error3.py deleted file mode 100644 index 1feefb6..0000000 --- a/ply/test/lex_error3.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_error3.py -# -# t_error defined as function, but with wrong # args - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_error4.py b/ply/test/lex_error4.py deleted file mode 100644 index f4f48db..0000000 --- a/ply/test/lex_error4.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_error4.py -# -# t_error defined as function, but too many args - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(t,s): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_hedit.py b/ply/test/lex_hedit.py deleted file mode 100644 index 34f15a1..0000000 --- a/ply/test/lex_hedit.py +++ /dev/null @@ -1,47 +0,0 @@ -# ----------------------------------------------------------------------------- -# hedit.py -# -# Paring of Fortran H Edit descriptions (Contributed by Pearu Peterson) -# -# These tokens can't be easily tokenized because they are of the following -# form: -# -# nHc1...cn -# -# where n is a positive integer and c1 ... cn are characters. -# -# This example shows how to modify the state of the lexer to parse -# such tokens -# ----------------------------------------------------------------------------- -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = ( - 'H_EDIT_DESCRIPTOR', - ) - -# Tokens -t_ignore = " \t\n" - -def t_H_EDIT_DESCRIPTOR(t): - r"\d+H.*" # This grabs all of the remaining text - i = t.value.index('H') - n = eval(t.value[:i]) - - # Adjust the tokenizing position - t.lexer.lexpos -= len(t.value) - (i+1+n) - t.value = t.value[i+1:i+1+n] - return t - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -lex.lex() -lex.runmain(data="3Habc 10Habcdefghij 2Hxy") - - - diff --git a/ply/test/lex_ignore.py b/ply/test/lex_ignore.py deleted file mode 100644 index 6c43b4c..0000000 --- a/ply/test/lex_ignore.py +++ /dev/null @@ -1,31 +0,0 @@ -# lex_ignore.py -# -# Improperly specific ignore declaration - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_ignore(t): - ' \t' - pass - -def t_error(t): - pass - -import sys - -lex.lex() - - diff --git a/ply/test/lex_ignore2.py b/ply/test/lex_ignore2.py deleted file mode 100644 index f60987a..0000000 --- a/ply/test/lex_ignore2.py +++ /dev/null @@ -1,29 +0,0 @@ -# lex_ignore2.py -# -# ignore declaration as a raw string - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -t_ignore = r' \t' - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_literal1.py b/ply/test/lex_literal1.py deleted file mode 100644 index db389c3..0000000 --- a/ply/test/lex_literal1.py +++ /dev/null @@ -1,25 +0,0 @@ -# lex_literal1.py -# -# Bad literal specification - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "NUMBER", - ] - -literals = ["+","-","**"] - -def t_NUMBER(t): - r'\d+' - return t - -def t_error(t): - pass - -lex.lex() - - diff --git a/ply/test/lex_literal2.py b/ply/test/lex_literal2.py deleted file mode 100644 index b50b92c..0000000 --- a/ply/test/lex_literal2.py +++ /dev/null @@ -1,25 +0,0 @@ -# lex_literal2.py -# -# Bad literal specification - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "NUMBER", - ] - -literals = 23 - -def t_NUMBER(t): - r'\d+' - return t - -def t_error(t): - pass - -lex.lex() - - diff --git a/ply/test/lex_many_tokens.py b/ply/test/lex_many_tokens.py deleted file mode 100644 index 77ae12b..0000000 --- a/ply/test/lex_many_tokens.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_many_tokens.py -# -# Test lex's ability to handle a large number of tokens (beyond the -# 100-group limit of the re module) - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = ["TOK%d" % i for i in range(1000)] - -for tok in tokens: - if sys.version_info[0] < 3: - exec("t_%s = '%s:'" % (tok,tok)) - else: - exec("t_%s = '%s:'" % (tok,tok), globals()) - -t_ignore = " \t" - -def t_error(t): - pass - -lex.lex(optimize=1,lextab="manytab") -lex.runmain(data="TOK34: TOK143: TOK269: TOK372: TOK452: TOK561: TOK999:") - - diff --git a/ply/test/lex_module.py b/ply/test/lex_module.py deleted file mode 100644 index 8bdd3ed..0000000 --- a/ply/test/lex_module.py +++ /dev/null @@ -1,10 +0,0 @@ -# lex_module.py -# - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex -import lex_module_import -lex.lex(module=lex_module_import) -lex.runmain(data="3+4") diff --git a/ply/test/lex_module_import.py b/ply/test/lex_module_import.py deleted file mode 100644 index df42082..0000000 --- a/ply/test/lex_module_import.py +++ /dev/null @@ -1,42 +0,0 @@ -# ----------------------------------------------------------------------------- -# lex_module_import.py -# -# A lexer defined in a module, but built in lex_module.py -# ----------------------------------------------------------------------------- - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - return t - -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - diff --git a/ply/test/lex_object.py b/ply/test/lex_object.py deleted file mode 100644 index 7e9f389..0000000 --- a/ply/test/lex_object.py +++ /dev/null @@ -1,55 +0,0 @@ -# ----------------------------------------------------------------------------- -# lex_object.py -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.lex as lex - -class CalcLexer: - tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - - # Tokens - - t_PLUS = r'\+' - t_MINUS = r'-' - t_TIMES = r'\*' - t_DIVIDE = r'/' - t_EQUALS = r'=' - t_LPAREN = r'\(' - t_RPAREN = r'\)' - t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - - def t_NUMBER(self,t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - return t - - t_ignore = " \t" - - def t_newline(self,t): - r'\n+' - t.lineno += t.value.count("\n") - - def t_error(self,t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - - -calc = CalcLexer() - -# Build the lexer -lex.lex(object=calc) -lex.runmain(data="3+4") - - - - diff --git a/ply/test/lex_opt_alias.py b/ply/test/lex_opt_alias.py deleted file mode 100644 index 5d5ed4c..0000000 --- a/ply/test/lex_opt_alias.py +++ /dev/null @@ -1,54 +0,0 @@ -# ----------------------------------------------------------------------------- -# lex_opt_alias.py -# -# Tests ability to match up functions with states, aliases, and -# lexing tables. -# ----------------------------------------------------------------------------- - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -tokens = ( - 'NAME','NUMBER', - ) - -states = (('instdef','inclusive'),('spam','exclusive')) - -literals = ['=','+','-','*','/', '(',')'] - -# Tokens - -def t_instdef_spam_BITS(t): - r'[01-]+' - return t - -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - return t - -t_ANY_NUMBER = NUMBER - -t_ignore = " \t" -t_spam_ignore = t_ignore - -def t_newline(t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -t_spam_error = t_error - -# Build the lexer -import ply.lex as lex -lex.lex(optimize=1,lextab="aliastab") -lex.runmain(data="3+4") diff --git a/ply/test/lex_optimize.py b/ply/test/lex_optimize.py deleted file mode 100644 index 0e447e6..0000000 --- a/ply/test/lex_optimize.py +++ /dev/null @@ -1,50 +0,0 @@ -# ----------------------------------------------------------------------------- -# lex_optimize.py -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.lex as lex - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - return t - -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -lex.lex(optimize=1) -lex.runmain(data="3+4") - - - diff --git a/ply/test/lex_optimize2.py b/ply/test/lex_optimize2.py deleted file mode 100644 index 64555f6..0000000 --- a/ply/test/lex_optimize2.py +++ /dev/null @@ -1,50 +0,0 @@ -# ----------------------------------------------------------------------------- -# lex_optimize2.py -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.lex as lex - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - return t - -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -lex.lex(optimize=1,lextab="opt2tab") -lex.runmain(data="3+4") - - - diff --git a/ply/test/lex_optimize3.py b/ply/test/lex_optimize3.py deleted file mode 100644 index c6c8cce..0000000 --- a/ply/test/lex_optimize3.py +++ /dev/null @@ -1,52 +0,0 @@ -# ----------------------------------------------------------------------------- -# lex_optimize3.py -# -# Writes table in a subdirectory structure. -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.lex as lex - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - return t - -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -lex.lex(optimize=1,lextab="lexdir.sub.calctab",outputdir="lexdir/sub") -lex.runmain(data="3+4") - - - diff --git a/ply/test/lex_re1.py b/ply/test/lex_re1.py deleted file mode 100644 index 5be7aef..0000000 --- a/ply/test/lex_re1.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_re1.py -# -# Bad regular expression in a string - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'(\d+' - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_re2.py b/ply/test/lex_re2.py deleted file mode 100644 index 8dfb8e3..0000000 --- a/ply/test/lex_re2.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_re2.py -# -# Regular expression rule matches empty string - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+?' -t_MINUS = r'-' -t_NUMBER = r'(\d+)' - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_re3.py b/ply/test/lex_re3.py deleted file mode 100644 index e179925..0000000 --- a/ply/test/lex_re3.py +++ /dev/null @@ -1,29 +0,0 @@ -# lex_re3.py -# -# Regular expression rule matches empty string - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - "POUND", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'(\d+)' -t_POUND = r'#' - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_rule1.py b/ply/test/lex_rule1.py deleted file mode 100644 index 0406c6f..0000000 --- a/ply/test/lex_rule1.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_rule1.py -# -# Rule function with incorrect number of arguments - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = 1 - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_rule2.py b/ply/test/lex_rule2.py deleted file mode 100644 index 1c29d87..0000000 --- a/ply/test/lex_rule2.py +++ /dev/null @@ -1,29 +0,0 @@ -# lex_rule2.py -# -# Rule function with incorrect number of arguments - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -def t_NUMBER(): - r'\d+' - return t - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_rule3.py b/ply/test/lex_rule3.py deleted file mode 100644 index 9ea94da..0000000 --- a/ply/test/lex_rule3.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_rule3.py -# -# Rule function with incorrect number of arguments - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -def t_NUMBER(t,s): - r'\d+' - return t - -def t_error(t): - pass - -lex.lex() - - diff --git a/ply/test/lex_state1.py b/ply/test/lex_state1.py deleted file mode 100644 index 7528c91..0000000 --- a/ply/test/lex_state1.py +++ /dev/null @@ -1,40 +0,0 @@ -# lex_state1.py -# -# Bad state declaration - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -states = 'comment' - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_state2.py b/ply/test/lex_state2.py deleted file mode 100644 index 3aef69e..0000000 --- a/ply/test/lex_state2.py +++ /dev/null @@ -1,40 +0,0 @@ -# lex_state2.py -# -# Bad state declaration - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -states = ('comment','example') - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_state3.py b/ply/test/lex_state3.py deleted file mode 100644 index 616e484..0000000 --- a/ply/test/lex_state3.py +++ /dev/null @@ -1,42 +0,0 @@ -# lex_state3.py -# -# Bad state declaration - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -comment = 1 -states = ((comment, 'inclusive'), - ('example', 'exclusive')) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_state4.py b/ply/test/lex_state4.py deleted file mode 100644 index 1825016..0000000 --- a/ply/test/lex_state4.py +++ /dev/null @@ -1,41 +0,0 @@ -# lex_state4.py -# -# Bad state declaration - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - - -states = (('comment', 'exclsive'),) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/ply/test/lex_state5.py b/ply/test/lex_state5.py deleted file mode 100644 index 4ce828e..0000000 --- a/ply/test/lex_state5.py +++ /dev/null @@ -1,40 +0,0 @@ -# lex_state5.py -# -# Bad state declaration - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -states = (('comment', 'exclusive'), - ('comment', 'exclusive')) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - - -lex.lex() - - diff --git a/ply/test/lex_state_noerror.py b/ply/test/lex_state_noerror.py deleted file mode 100644 index 90bbea8..0000000 --- a/ply/test/lex_state_noerror.py +++ /dev/null @@ -1,39 +0,0 @@ -# lex_state_noerror.py -# -# Declaration of a state for which no rules are defined - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -states = (('comment', 'exclusive'),) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - - -lex.lex() - - diff --git a/ply/test/lex_state_norule.py b/ply/test/lex_state_norule.py deleted file mode 100644 index 64ec6d3..0000000 --- a/ply/test/lex_state_norule.py +++ /dev/null @@ -1,40 +0,0 @@ -# lex_state_norule.py -# -# Declaration of a state for which no rules are defined - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -states = (('comment', 'exclusive'), - ('example', 'exclusive')) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - - -lex.lex() - - diff --git a/ply/test/lex_state_try.py b/ply/test/lex_state_try.py deleted file mode 100644 index fd5ba22..0000000 --- a/ply/test/lex_state_try.py +++ /dev/null @@ -1,45 +0,0 @@ -# lex_state_try.py -# -# Declaration of a state for which no rules are defined - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -states = (('comment', 'exclusive'),) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -t_ignore = " \t" - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - -t_comment_error = t_error -t_comment_ignore = t_ignore - -lex.lex() - -data = "3 + 4 /* This is a comment */ + 10" - -lex.runmain(data=data) diff --git a/ply/test/lex_token1.py b/ply/test/lex_token1.py deleted file mode 100644 index 6fca300..0000000 --- a/ply/test/lex_token1.py +++ /dev/null @@ -1,19 +0,0 @@ -# lex_token1.py -# -# Tests for absence of tokens variable - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(t): - pass - -lex.lex() - - diff --git a/ply/test/lex_token2.py b/ply/test/lex_token2.py deleted file mode 100644 index 6e65ab0..0000000 --- a/ply/test/lex_token2.py +++ /dev/null @@ -1,22 +0,0 @@ -# lex_token2.py -# -# Tests for tokens of wrong type - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = "PLUS MINUS NUMBER" - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(t): - pass - - -lex.lex() - - diff --git a/ply/test/lex_token3.py b/ply/test/lex_token3.py deleted file mode 100644 index 636452e..0000000 --- a/ply/test/lex_token3.py +++ /dev/null @@ -1,24 +0,0 @@ -# lex_token3.py -# -# tokens is right type, but is missing a token for one rule - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(t): - pass - -lex.lex() - - diff --git a/ply/test/lex_token4.py b/ply/test/lex_token4.py deleted file mode 100644 index 52947e9..0000000 --- a/ply/test/lex_token4.py +++ /dev/null @@ -1,26 +0,0 @@ -# lex_token4.py -# -# Bad token name - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "-", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(t): - pass - -lex.lex() - - diff --git a/ply/test/lex_token5.py b/ply/test/lex_token5.py deleted file mode 100644 index ef7a3c5..0000000 --- a/ply/test/lex_token5.py +++ /dev/null @@ -1,31 +0,0 @@ -# lex_token5.py -# -# Return a bad token name - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' - -def t_NUMBER(t): - r'\d+' - t.type = "NUM" - return t - -def t_error(t): - pass - -lex.lex() -lex.input("1234") -t = lex.token() - - diff --git a/ply/test/lex_token_dup.py b/ply/test/lex_token_dup.py deleted file mode 100644 index 384f4e9..0000000 --- a/ply/test/lex_token_dup.py +++ /dev/null @@ -1,29 +0,0 @@ -# lex_token_dup.py -# -# Duplicate token name in tokens - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - "MINUS" - ] - -t_PLUS = r'\+' -t_MINUS = r'-' - -def t_NUMBER(t): - r'\d+' - return t - -def t_error(t): - pass - -lex.lex() - - diff --git a/ply/test/testlex.py b/ply/test/testlex.py deleted file mode 100755 index 1f7dd1b..0000000 --- a/ply/test/testlex.py +++ /dev/null @@ -1,606 +0,0 @@ -# testlex.py - -import unittest -try: - import StringIO -except ImportError: - import io as StringIO - -import sys -import os -import imp -import warnings - -sys.path.insert(0,"..") -sys.tracebacklimit = 0 - -import ply.lex - -def make_pymodule_path(filename): - path = os.path.dirname(filename) - file = os.path.basename(filename) - mod, ext = os.path.splitext(file) - - if sys.hexversion >= 0x3020000: - modname = mod+"."+imp.get_tag()+ext - fullpath = os.path.join(path,'__pycache__',modname) - else: - fullpath = filename - return fullpath - -def pymodule_out_exists(filename): - return os.path.exists(make_pymodule_path(filename)) - -def pymodule_out_remove(filename): - os.remove(make_pymodule_path(filename)) - -def check_expected(result,expected): - if sys.version_info[0] >= 3: - if isinstance(result,str): - result = result.encode('ascii') - if isinstance(expected,str): - expected = expected.encode('ascii') - resultlines = result.splitlines() - expectedlines = expected.splitlines() - - - if len(resultlines) != len(expectedlines): - return False - - for rline,eline in zip(resultlines,expectedlines): - if not rline.endswith(eline): - return False - return True - -def run_import(module): - code = "import "+module - exec(code) - del sys.modules[module] - -# Tests related to errors and warnings when building lexers -class LexErrorWarningTests(unittest.TestCase): - def setUp(self): - sys.stderr = StringIO.StringIO() - sys.stdout = StringIO.StringIO() - if sys.hexversion >= 0x3020000: - warnings.filterwarnings('ignore',category=ResourceWarning) - - def tearDown(self): - sys.stderr = sys.__stderr__ - sys.stdout = sys.__stdout__ - def test_lex_doc1(self): - self.assertRaises(SyntaxError,run_import,"lex_doc1") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "lex_doc1.py:18: No regular expression defined for rule 't_NUMBER'\n")) - def test_lex_dup1(self): - self.assertRaises(SyntaxError,run_import,"lex_dup1") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "lex_dup1.py:20: Rule t_NUMBER redefined. Previously defined on line 18\n" )) - - def test_lex_dup2(self): - self.assertRaises(SyntaxError,run_import,"lex_dup2") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "lex_dup2.py:22: Rule t_NUMBER redefined. Previously defined on line 18\n" )) - - def test_lex_dup3(self): - self.assertRaises(SyntaxError,run_import,"lex_dup3") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "lex_dup3.py:20: Rule t_NUMBER redefined. Previously defined on line 18\n" )) - - def test_lex_empty(self): - self.assertRaises(SyntaxError,run_import,"lex_empty") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "No rules of the form t_rulename are defined\n" - "No rules defined for state 'INITIAL'\n")) - - def test_lex_error1(self): - run_import("lex_error1") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "No t_error rule is defined\n")) - - def test_lex_error2(self): - self.assertRaises(SyntaxError,run_import,"lex_error2") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Rule 't_error' must be defined as a function\n") - ) - - def test_lex_error3(self): - self.assertRaises(SyntaxError,run_import,"lex_error3") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "lex_error3.py:20: Rule 't_error' requires an argument\n")) - - def test_lex_error4(self): - self.assertRaises(SyntaxError,run_import,"lex_error4") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "lex_error4.py:20: Rule 't_error' has too many arguments\n")) - - def test_lex_ignore(self): - self.assertRaises(SyntaxError,run_import,"lex_ignore") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "lex_ignore.py:20: Rule 't_ignore' must be defined as a string\n")) - - def test_lex_ignore2(self): - run_import("lex_ignore2") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "t_ignore contains a literal backslash '\\'\n")) - - - def test_lex_re1(self): - self.assertRaises(SyntaxError,run_import,"lex_re1") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Invalid regular expression for rule 't_NUMBER'. unbalanced parenthesis\n")) - - def test_lex_re2(self): - self.assertRaises(SyntaxError,run_import,"lex_re2") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Regular expression for rule 't_PLUS' matches empty string\n")) - - def test_lex_re3(self): - self.assertRaises(SyntaxError,run_import,"lex_re3") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Invalid regular expression for rule 't_POUND'. unbalanced parenthesis\n" - "Make sure '#' in rule 't_POUND' is escaped with '\\#'\n")) - - def test_lex_rule1(self): - self.assertRaises(SyntaxError,run_import,"lex_rule1") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "t_NUMBER not defined as a function or string\n")) - - def test_lex_rule2(self): - self.assertRaises(SyntaxError,run_import,"lex_rule2") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "lex_rule2.py:18: Rule 't_NUMBER' requires an argument\n")) - - def test_lex_rule3(self): - self.assertRaises(SyntaxError,run_import,"lex_rule3") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "lex_rule3.py:18: Rule 't_NUMBER' has too many arguments\n")) - - - def test_lex_state1(self): - self.assertRaises(SyntaxError,run_import,"lex_state1") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "states must be defined as a tuple or list\n")) - - def test_lex_state2(self): - self.assertRaises(SyntaxError,run_import,"lex_state2") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Invalid state specifier 'comment'. Must be a tuple (statename,'exclusive|inclusive')\n" - "Invalid state specifier 'example'. Must be a tuple (statename,'exclusive|inclusive')\n")) - - def test_lex_state3(self): - self.assertRaises(SyntaxError,run_import,"lex_state3") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "State name 1 must be a string\n" - "No rules defined for state 'example'\n")) - - def test_lex_state4(self): - self.assertRaises(SyntaxError,run_import,"lex_state4") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "State type for state comment must be 'inclusive' or 'exclusive'\n")) - - - def test_lex_state5(self): - self.assertRaises(SyntaxError,run_import,"lex_state5") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "State 'comment' already defined\n")) - - def test_lex_state_noerror(self): - run_import("lex_state_noerror") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "No error rule is defined for exclusive state 'comment'\n")) - - def test_lex_state_norule(self): - self.assertRaises(SyntaxError,run_import,"lex_state_norule") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "No rules defined for state 'example'\n")) - - def test_lex_token1(self): - self.assertRaises(SyntaxError,run_import,"lex_token1") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "No token list is defined\n" - "Rule 't_NUMBER' defined for an unspecified token NUMBER\n" - "Rule 't_PLUS' defined for an unspecified token PLUS\n" - "Rule 't_MINUS' defined for an unspecified token MINUS\n" -)) - - def test_lex_token2(self): - self.assertRaises(SyntaxError,run_import,"lex_token2") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "tokens must be a list or tuple\n" - "Rule 't_NUMBER' defined for an unspecified token NUMBER\n" - "Rule 't_PLUS' defined for an unspecified token PLUS\n" - "Rule 't_MINUS' defined for an unspecified token MINUS\n" -)) - - def test_lex_token3(self): - self.assertRaises(SyntaxError,run_import,"lex_token3") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Rule 't_MINUS' defined for an unspecified token MINUS\n")) - - - def test_lex_token4(self): - self.assertRaises(SyntaxError,run_import,"lex_token4") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Bad token name '-'\n")) - - - def test_lex_token5(self): - try: - run_import("lex_token5") - except ply.lex.LexError: - e = sys.exc_info()[1] - self.assert_(check_expected(str(e),"lex_token5.py:19: Rule 't_NUMBER' returned an unknown token type 'NUM'")) - - def test_lex_token_dup(self): - run_import("lex_token_dup") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Token 'MINUS' multiply defined\n")) - - - def test_lex_literal1(self): - self.assertRaises(SyntaxError,run_import,"lex_literal1") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Invalid literal '**'. Must be a single character\n")) - - def test_lex_literal2(self): - self.assertRaises(SyntaxError,run_import,"lex_literal2") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Invalid literals specification. literals must be a sequence of characters\n")) - -import os -import subprocess -import shutil - -# Tests related to various build options associated with lexers -class LexBuildOptionTests(unittest.TestCase): - def setUp(self): - sys.stderr = StringIO.StringIO() - sys.stdout = StringIO.StringIO() - def tearDown(self): - sys.stderr = sys.__stderr__ - sys.stdout = sys.__stdout__ - try: - shutil.rmtree("lexdir") - except OSError: - pass - - def test_lex_module(self): - run_import("lex_module") - result = sys.stdout.getvalue() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - - def test_lex_object(self): - run_import("lex_object") - result = sys.stdout.getvalue() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - - def test_lex_closure(self): - run_import("lex_closure") - result = sys.stdout.getvalue() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - def test_lex_optimize(self): - try: - os.remove("lextab.py") - except OSError: - pass - try: - os.remove("lextab.pyc") - except OSError: - pass - try: - os.remove("lextab.pyo") - except OSError: - pass - run_import("lex_optimize") - - result = sys.stdout.getvalue() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("lextab.py")) - - - p = subprocess.Popen([sys.executable,'-O','lex_optimize.py'], - stdout=subprocess.PIPE) - result = p.stdout.read() - - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(pymodule_out_exists("lextab.pyo")) - - pymodule_out_remove("lextab.pyo") - p = subprocess.Popen([sys.executable,'-OO','lex_optimize.py'], - stdout=subprocess.PIPE) - result = p.stdout.read() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(pymodule_out_exists("lextab.pyo")) - try: - os.remove("lextab.py") - except OSError: - pass - try: - pymodule_out_remove("lextab.pyc") - except OSError: - pass - try: - pymodule_out_remove("lextab.pyo") - except OSError: - pass - - def test_lex_optimize2(self): - try: - os.remove("opt2tab.py") - except OSError: - pass - try: - os.remove("opt2tab.pyc") - except OSError: - pass - try: - os.remove("opt2tab.pyo") - except OSError: - pass - run_import("lex_optimize2") - result = sys.stdout.getvalue() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("opt2tab.py")) - - p = subprocess.Popen([sys.executable,'-O','lex_optimize2.py'], - stdout=subprocess.PIPE) - result = p.stdout.read() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(pymodule_out_exists("opt2tab.pyo")) - pymodule_out_remove("opt2tab.pyo") - p = subprocess.Popen([sys.executable,'-OO','lex_optimize2.py'], - stdout=subprocess.PIPE) - result = p.stdout.read() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(pymodule_out_exists("opt2tab.pyo")) - try: - os.remove("opt2tab.py") - except OSError: - pass - try: - pymodule_out_remove("opt2tab.pyc") - except OSError: - pass - try: - pymodule_out_remove("opt2tab.pyo") - except OSError: - pass - - def test_lex_optimize3(self): - try: - shutil.rmtree("lexdir") - except OSError: - pass - - os.mkdir("lexdir") - os.mkdir("lexdir/sub") - open("lexdir/__init__.py","w").write("") - open("lexdir/sub/__init__.py","w").write("") - run_import("lex_optimize3") - result = sys.stdout.getvalue() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("lexdir/sub/calctab.py")) - - p = subprocess.Popen([sys.executable,'-O','lex_optimize3.py'], - stdout=subprocess.PIPE) - result = p.stdout.read() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(pymodule_out_exists("lexdir/sub/calctab.pyo")) - pymodule_out_remove("lexdir/sub/calctab.pyo") - p = subprocess.Popen([sys.executable,'-OO','lex_optimize3.py'], - stdout=subprocess.PIPE) - result = p.stdout.read() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(pymodule_out_exists("lexdir/sub/calctab.pyo")) - try: - shutil.rmtree("lexdir") - except OSError: - pass - - def test_lex_opt_alias(self): - try: - os.remove("aliastab.py") - except OSError: - pass - try: - os.remove("aliastab.pyc") - except OSError: - pass - try: - os.remove("aliastab.pyo") - except OSError: - pass - run_import("lex_opt_alias") - result = sys.stdout.getvalue() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(+,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("aliastab.py")) - - p = subprocess.Popen([sys.executable,'-O','lex_opt_alias.py'], - stdout=subprocess.PIPE) - result = p.stdout.read() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(+,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(pymodule_out_exists("aliastab.pyo")) - pymodule_out_remove("aliastab.pyo") - p = subprocess.Popen([sys.executable,'-OO','lex_opt_alias.py'], - stdout=subprocess.PIPE) - result = p.stdout.read() - self.assert_(check_expected(result, - "(NUMBER,3,1,0)\n" - "(+,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - self.assert_(pymodule_out_exists("aliastab.pyo")) - try: - os.remove("aliastab.py") - except OSError: - pass - try: - pymodule_out_remove("aliastab.pyc") - except OSError: - pass - try: - pymodule_out_remove("aliastab.pyo") - except OSError: - pass - - def test_lex_many_tokens(self): - try: - os.remove("manytab.py") - except OSError: - pass - try: - os.remove("manytab.pyc") - except OSError: - pass - try: - os.remove("manytab.pyo") - except OSError: - pass - run_import("lex_many_tokens") - result = sys.stdout.getvalue() - self.assert_(check_expected(result, - "(TOK34,'TOK34:',1,0)\n" - "(TOK143,'TOK143:',1,7)\n" - "(TOK269,'TOK269:',1,15)\n" - "(TOK372,'TOK372:',1,23)\n" - "(TOK452,'TOK452:',1,31)\n" - "(TOK561,'TOK561:',1,39)\n" - "(TOK999,'TOK999:',1,47)\n" - )) - - self.assert_(os.path.exists("manytab.py")) - - p = subprocess.Popen([sys.executable,'-O','lex_many_tokens.py'], - stdout=subprocess.PIPE) - result = p.stdout.read() - self.assert_(check_expected(result, - "(TOK34,'TOK34:',1,0)\n" - "(TOK143,'TOK143:',1,7)\n" - "(TOK269,'TOK269:',1,15)\n" - "(TOK372,'TOK372:',1,23)\n" - "(TOK452,'TOK452:',1,31)\n" - "(TOK561,'TOK561:',1,39)\n" - "(TOK999,'TOK999:',1,47)\n" - )) - - self.assert_(pymodule_out_exists("manytab.pyo")) - pymodule_out_remove("manytab.pyo") - try: - os.remove("manytab.py") - except OSError: - pass - try: - os.remove("manytab.pyc") - except OSError: - pass - try: - os.remove("manytab.pyo") - except OSError: - pass - -# Tests related to run-time behavior of lexers -class LexRunTests(unittest.TestCase): - def setUp(self): - sys.stderr = StringIO.StringIO() - sys.stdout = StringIO.StringIO() - def tearDown(self): - sys.stderr = sys.__stderr__ - sys.stdout = sys.__stdout__ - - def test_lex_hedit(self): - run_import("lex_hedit") - result = sys.stdout.getvalue() - self.assert_(check_expected(result, - "(H_EDIT_DESCRIPTOR,'abc',1,0)\n" - "(H_EDIT_DESCRIPTOR,'abcdefghij',1,6)\n" - "(H_EDIT_DESCRIPTOR,'xy',1,20)\n")) - - def test_lex_state_try(self): - run_import("lex_state_try") - result = sys.stdout.getvalue() - self.assert_(check_expected(result, - "(NUMBER,'3',1,0)\n" - "(PLUS,'+',1,2)\n" - "(NUMBER,'4',1,4)\n" - "Entering comment state\n" - "comment body LexToken(body_part,'This is a comment */',1,9)\n" - "(PLUS,'+',1,30)\n" - "(NUMBER,'10',1,32)\n" - )) - - - -unittest.main() diff --git a/ply/test/testyacc.py b/ply/test/testyacc.py deleted file mode 100644 index 2b06b44..0000000 --- a/ply/test/testyacc.py +++ /dev/null @@ -1,347 +0,0 @@ -# testyacc.py - -import unittest -try: - import StringIO -except ImportError: - import io as StringIO - -import sys -import os -import warnings - -sys.path.insert(0,"..") -sys.tracebacklimit = 0 - -import ply.yacc -import imp - -def make_pymodule_path(filename): - path = os.path.dirname(filename) - file = os.path.basename(filename) - mod, ext = os.path.splitext(file) - - if sys.hexversion >= 0x3020000: - modname = mod+"."+imp.get_tag()+ext - fullpath = os.path.join(path,'__pycache__',modname) - else: - fullpath = filename - return fullpath - -def pymodule_out_exists(filename): - return os.path.exists(make_pymodule_path(filename)) - -def pymodule_out_remove(filename): - os.remove(make_pymodule_path(filename)) - - -def check_expected(result,expected): - resultlines = [] - for line in result.splitlines(): - if line.startswith("WARNING: "): - line = line[9:] - elif line.startswith("ERROR: "): - line = line[7:] - resultlines.append(line) - - expectedlines = expected.splitlines() - if len(resultlines) != len(expectedlines): - return False - for rline,eline in zip(resultlines,expectedlines): - if not rline.endswith(eline): - return False - return True - -def run_import(module): - code = "import "+module - exec(code) - del sys.modules[module] - -# Tests related to errors and warnings when building parsers -class YaccErrorWarningTests(unittest.TestCase): - def setUp(self): - sys.stderr = StringIO.StringIO() - sys.stdout = StringIO.StringIO() - try: - os.remove("parsetab.py") - pymodule_out_remove("parsetab.pyc") - except OSError: - pass - - if sys.hexversion >= 0x3020000: - warnings.filterwarnings('ignore',category=ResourceWarning) - - def tearDown(self): - sys.stderr = sys.__stderr__ - sys.stdout = sys.__stdout__ - def test_yacc_badargs(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_badargs") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_badargs.py:23: Rule 'p_statement_assign' has too many arguments\n" - "yacc_badargs.py:27: Rule 'p_statement_expr' requires an argument\n" - )) - def test_yacc_badid(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_badid") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_badid.py:32: Illegal name 'bad&rule' in rule 'statement'\n" - "yacc_badid.py:36: Illegal rule name 'bad&rule'\n" - )) - - def test_yacc_badprec(self): - try: - run_import("yacc_badprec") - except ply.yacc.YaccError: - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "precedence must be a list or tuple\n" - )) - def test_yacc_badprec2(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_badprec2") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Bad precedence table\n" - )) - - def test_yacc_badprec3(self): - run_import("yacc_badprec3") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Precedence already specified for terminal 'MINUS'\n" - "Generating LALR tables\n" - - )) - - def test_yacc_badrule(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_badrule") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_badrule.py:24: Syntax error. Expected ':'\n" - "yacc_badrule.py:28: Syntax error in rule 'statement'\n" - "yacc_badrule.py:33: Syntax error. Expected ':'\n" - "yacc_badrule.py:42: Syntax error. Expected ':'\n" - )) - - def test_yacc_badtok(self): - try: - run_import("yacc_badtok") - except ply.yacc.YaccError: - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "tokens must be a list or tuple\n")) - - def test_yacc_dup(self): - run_import("yacc_dup") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_dup.py:27: Function p_statement redefined. Previously defined on line 23\n" - "Token 'EQUALS' defined, but not used\n" - "There is 1 unused token\n" - "Generating LALR tables\n" - - )) - def test_yacc_error1(self): - try: - run_import("yacc_error1") - except ply.yacc.YaccError: - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_error1.py:61: p_error() requires 1 argument\n")) - - def test_yacc_error2(self): - try: - run_import("yacc_error2") - except ply.yacc.YaccError: - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_error2.py:61: p_error() requires 1 argument\n")) - - def test_yacc_error3(self): - try: - run_import("yacc_error3") - except ply.yacc.YaccError: - e = sys.exc_info()[1] - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "'p_error' defined, but is not a function or method\n")) - - def test_yacc_error4(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_error4") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_error4.py:62: Illegal rule name 'error'. Already defined as a token\n" - )) - - def test_yacc_inf(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_inf") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Token 'NUMBER' defined, but not used\n" - "There is 1 unused token\n" - "Infinite recursion detected for symbol 'statement'\n" - "Infinite recursion detected for symbol 'expression'\n" - )) - def test_yacc_literal(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_literal") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_literal.py:36: Literal token '**' in rule 'expression' may only be a single character\n" - )) - def test_yacc_misplaced(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_misplaced") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_misplaced.py:32: Misplaced '|'\n" - )) - - def test_yacc_missing1(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_missing1") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_missing1.py:24: Symbol 'location' used, but not defined as a token or a rule\n" - )) - - def test_yacc_nested(self): - run_import("yacc_nested") - result = sys.stdout.getvalue() - self.assert_(check_expected(result, - "A\n" - "A\n" - "A\n", - )) - - def test_yacc_nodoc(self): - run_import("yacc_nodoc") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_nodoc.py:27: No documentation string specified in function 'p_statement_expr' (ignored)\n" - "Generating LALR tables\n" - )) - - def test_yacc_noerror(self): - run_import("yacc_noerror") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "no p_error() function is defined\n" - "Generating LALR tables\n" - )) - - def test_yacc_nop(self): - run_import("yacc_nop") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_nop.py:27: Possible grammar rule 'statement_expr' defined without p_ prefix\n" - "Generating LALR tables\n" - )) - - def test_yacc_notfunc(self): - run_import("yacc_notfunc") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "'p_statement_assign' not defined as a function\n" - "Token 'EQUALS' defined, but not used\n" - "There is 1 unused token\n" - "Generating LALR tables\n" - )) - def test_yacc_notok(self): - try: - run_import("yacc_notok") - except ply.yacc.YaccError: - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "No token list is defined\n")) - - def test_yacc_rr(self): - run_import("yacc_rr") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Generating LALR tables\n" - "1 reduce/reduce conflict\n" - "reduce/reduce conflict in state 15 resolved using rule (statement -> NAME EQUALS NUMBER)\n" - "rejected rule (expression -> NUMBER) in state 15\n" - - )) - - def test_yacc_rr_unused(self): - run_import("yacc_rr_unused") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "no p_error() function is defined\n" - "Generating LALR tables\n" - "3 reduce/reduce conflicts\n" - "reduce/reduce conflict in state 1 resolved using rule (rule3 -> A)\n" - "rejected rule (rule4 -> A) in state 1\n" - "reduce/reduce conflict in state 1 resolved using rule (rule3 -> A)\n" - "rejected rule (rule5 -> A) in state 1\n" - "reduce/reduce conflict in state 1 resolved using rule (rule4 -> A)\n" - "rejected rule (rule5 -> A) in state 1\n" - "Rule (rule5 -> A) is never reduced\n" - )) - - def test_yacc_simple(self): - run_import("yacc_simple") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Generating LALR tables\n" - )) - def test_yacc_sr(self): - run_import("yacc_sr") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Generating LALR tables\n" - "20 shift/reduce conflicts\n" - )) - - def test_yacc_term1(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_term1") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_term1.py:24: Illegal rule name 'NUMBER'. Already defined as a token\n" - )) - - def test_yacc_unused(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_unused") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_unused.py:62: Symbol 'COMMA' used, but not defined as a token or a rule\n" - "Symbol 'COMMA' is unreachable\n" - "Symbol 'exprlist' is unreachable\n" - )) - def test_yacc_unused_rule(self): - run_import("yacc_unused_rule") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_unused_rule.py:62: Rule 'integer' defined, but not used\n" - "There is 1 unused rule\n" - "Symbol 'integer' is unreachable\n" - "Generating LALR tables\n" - )) - - def test_yacc_uprec(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_uprec") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_uprec.py:37: Nothing known about the precedence of 'UMINUS'\n" - )) - - def test_yacc_uprec2(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_uprec2") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "yacc_uprec2.py:37: Syntax error. Nothing follows %prec\n" - )) - - def test_yacc_prec1(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_prec1") - result = sys.stderr.getvalue() - self.assert_(check_expected(result, - "Precedence rule 'left' defined for unknown symbol '+'\n" - "Precedence rule 'left' defined for unknown symbol '*'\n" - "Precedence rule 'left' defined for unknown symbol '-'\n" - "Precedence rule 'left' defined for unknown symbol '/'\n" - )) - - - -unittest.main() diff --git a/ply/test/yacc_badargs.py b/ply/test/yacc_badargs.py deleted file mode 100644 index 9a1d03f..0000000 --- a/ply/test/yacc_badargs.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badargs.py -# -# Rules with wrong # args -# ----------------------------------------------------------------------------- -import sys -sys.tracebacklimit = 0 -sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t,s): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_badid.py b/ply/test/yacc_badid.py deleted file mode 100644 index e4b9f5e..0000000 --- a/ply/test/yacc_badid.py +++ /dev/null @@ -1,77 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badid.py -# -# Attempt to define a rule with a bad-identifier name -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_statement_expr2(t): - 'statement : bad&rule' - pass - -def p_badrule(t): - 'bad&rule : expression' - pass - - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - pass - -yacc.yacc() - - - - diff --git a/ply/test/yacc_badprec.py b/ply/test/yacc_badprec.py deleted file mode 100644 index 3013bb6..0000000 --- a/ply/test/yacc_badprec.py +++ /dev/null @@ -1,64 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badprec.py -# -# Bad precedence specifier -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = "blah" - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_badprec2.py b/ply/test/yacc_badprec2.py deleted file mode 100644 index 83093b4..0000000 --- a/ply/test/yacc_badprec2.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badprec2.py -# -# Bad precedence -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - 42, - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_badprec3.py b/ply/test/yacc_badprec3.py deleted file mode 100644 index d925ecd..0000000 --- a/ply/test/yacc_badprec3.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badprec3.py -# -# Bad precedence -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE','MINUS'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_badrule.py b/ply/test/yacc_badrule.py deleted file mode 100644 index 92af646..0000000 --- a/ply/test/yacc_badrule.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badrule.py -# -# Syntax problems in the rule strings -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression: MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_badtok.py b/ply/test/yacc_badtok.py deleted file mode 100644 index fc4afe1..0000000 --- a/ply/test/yacc_badtok.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badtok.py -# -# A grammar, but tokens is a bad datatype -# ----------------------------------------------------------------------------- - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -tokens = "Hello" - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_dup.py b/ply/test/yacc_dup.py deleted file mode 100644 index 309ba32..0000000 --- a/ply/test/yacc_dup.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_dup.py -# -# Duplicated rule name -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_error1.py b/ply/test/yacc_error1.py deleted file mode 100644 index 10ac6a9..0000000 --- a/ply/test/yacc_error1.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_error1.py -# -# Bad p_error() function -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t,s): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_error2.py b/ply/test/yacc_error2.py deleted file mode 100644 index 7591418..0000000 --- a/ply/test/yacc_error2.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_error2.py -# -# Bad p_error() function -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_error3.py b/ply/test/yacc_error3.py deleted file mode 100644 index 4604a48..0000000 --- a/ply/test/yacc_error3.py +++ /dev/null @@ -1,67 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_error3.py -# -# Bad p_error() function -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -p_error = "blah" - -yacc.yacc() - - - - diff --git a/ply/test/yacc_error4.py b/ply/test/yacc_error4.py deleted file mode 100644 index 9c550cd..0000000 --- a/ply/test/yacc_error4.py +++ /dev/null @@ -1,72 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_error4.py -# -# Attempt to define a rule named 'error' -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error_handler(t): - 'error : NAME' - pass - -def p_error(t): - pass - -yacc.yacc() - - - - diff --git a/ply/test/yacc_inf.py b/ply/test/yacc_inf.py deleted file mode 100644 index efd3612..0000000 --- a/ply/test/yacc_inf.py +++ /dev/null @@ -1,56 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_inf.py -# -# Infinite recursion -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_literal.py b/ply/test/yacc_literal.py deleted file mode 100644 index 0d62803..0000000 --- a/ply/test/yacc_literal.py +++ /dev/null @@ -1,69 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_literal.py -# -# Grammar with bad literal characters -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','+','-'), - ('left','*','/'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression '+' expression - | expression '-' expression - | expression '*' expression - | expression '/' expression - | expression '**' expression ''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_misplaced.py b/ply/test/yacc_misplaced.py deleted file mode 100644 index 9159b01..0000000 --- a/ply/test/yacc_misplaced.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_misplaced.py -# -# A misplaced | in grammar rules -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - ''' | expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_missing1.py b/ply/test/yacc_missing1.py deleted file mode 100644 index d1b5105..0000000 --- a/ply/test/yacc_missing1.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_missing1.py -# -# Grammar with a missing rule -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : location EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_nested.py b/ply/test/yacc_nested.py deleted file mode 100644 index a1b061e..0000000 --- a/ply/test/yacc_nested.py +++ /dev/null @@ -1,33 +0,0 @@ -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") - -from ply import lex, yacc - -t_A = 'A' -t_B = 'B' -t_C = 'C' - -tokens = ('A', 'B', 'C') - -the_lexer = lex.lex() - -def t_error(t): - pass - -def p_error(p): - pass - -def p_start(t): - '''start : A nest C''' - pass - -def p_nest(t): - '''nest : B''' - print(t[-1]) - -the_parser = yacc.yacc(debug = False, write_tables = False) - -the_parser.parse('ABC', the_lexer) -the_parser.parse('ABC', the_lexer, tracking=True) -the_parser.parse('ABC', the_lexer, tracking=True, debug=1) diff --git a/ply/test/yacc_nodoc.py b/ply/test/yacc_nodoc.py deleted file mode 100644 index 0f61920..0000000 --- a/ply/test/yacc_nodoc.py +++ /dev/null @@ -1,67 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_nodoc.py -# -# Rule with a missing doc-string -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_noerror.py b/ply/test/yacc_noerror.py deleted file mode 100644 index b38c758..0000000 --- a/ply/test/yacc_noerror.py +++ /dev/null @@ -1,66 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_noerror.py -# -# No p_error() rule defined. -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - - -yacc.yacc() - - - - diff --git a/ply/test/yacc_nop.py b/ply/test/yacc_nop.py deleted file mode 100644 index 789a9cf..0000000 --- a/ply/test/yacc_nop.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_nop.py -# -# Possible grammar rule defined without p_ prefix -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_notfunc.py b/ply/test/yacc_notfunc.py deleted file mode 100644 index 5093a74..0000000 --- a/ply/test/yacc_notfunc.py +++ /dev/null @@ -1,66 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_notfunc.py -# -# p_rule not defined as a function -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -p_statement_assign = "Blah" - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_notok.py b/ply/test/yacc_notok.py deleted file mode 100644 index cff55a8..0000000 --- a/ply/test/yacc_notok.py +++ /dev/null @@ -1,67 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_notok.py -# -# A grammar, but we forgot to import the tokens list -# ----------------------------------------------------------------------------- - -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_prec1.py b/ply/test/yacc_prec1.py deleted file mode 100644 index 2ca6afc..0000000 --- a/ply/test/yacc_prec1.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_prec1.py -# -# Tests case where precedence specifier doesn't match up to terminals -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','+','-'), - ('left','*','/'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_rr.py b/ply/test/yacc_rr.py deleted file mode 100644 index e7336c2..0000000 --- a/ply/test/yacc_rr.py +++ /dev/null @@ -1,72 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_rr.py -# -# A grammar with a reduce/reduce conflict -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_assign_2(t): - 'statement : NAME EQUALS NUMBER' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_rr_unused.py b/ply/test/yacc_rr_unused.py deleted file mode 100644 index 1ca5f7e..0000000 --- a/ply/test/yacc_rr_unused.py +++ /dev/null @@ -1,30 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_rr_unused.py -# -# A grammar with reduce/reduce conflicts and a rule that never -# gets reduced. -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -tokens = ('A', 'B', 'C') - -def p_grammar(p): - ''' - rule1 : rule2 B - | rule2 C - - rule2 : rule3 B - | rule4 - | rule5 - - rule3 : A - - rule4 : A - - rule5 : A - ''' - -yacc.yacc() diff --git a/ply/test/yacc_simple.py b/ply/test/yacc_simple.py deleted file mode 100644 index bd989f4..0000000 --- a/ply/test/yacc_simple.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_simple.py -# -# A simple, properly specifier grammar -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_sr.py b/ply/test/yacc_sr.py deleted file mode 100644 index 69a1e9c..0000000 --- a/ply/test/yacc_sr.py +++ /dev/null @@ -1,63 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_sr.py -# -# A grammar with shift-reduce conflicts -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_term1.py b/ply/test/yacc_term1.py deleted file mode 100644 index eaa36e9..0000000 --- a/ply/test/yacc_term1.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_term1.py -# -# Terminal used on the left-hand-side -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'NUMBER : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_unused.py b/ply/test/yacc_unused.py deleted file mode 100644 index 55b677b..0000000 --- a/ply/test/yacc_unused.py +++ /dev/null @@ -1,77 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_unused.py -# -# A grammar with an unused rule -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_expr_list(t): - 'exprlist : exprlist COMMA expression' - pass - -def p_expr_list_2(t): - 'exprlist : expression' - pass - - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_unused_rule.py b/ply/test/yacc_unused_rule.py deleted file mode 100644 index 4868ef8..0000000 --- a/ply/test/yacc_unused_rule.py +++ /dev/null @@ -1,72 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_unused_rule.py -# -# Grammar with an unused rule -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_integer(t): - 'integer : NUMBER' - t[0] = t[1] - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_uprec.py b/ply/test/yacc_uprec.py deleted file mode 100644 index 569adb8..0000000 --- a/ply/test/yacc_uprec.py +++ /dev/null @@ -1,63 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_uprec.py -# -# A grammar with a bad %prec specifier -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/ply/test/yacc_uprec2.py b/ply/test/yacc_uprec2.py deleted file mode 100644 index 73274bf..0000000 --- a/ply/test/yacc_uprec2.py +++ /dev/null @@ -1,63 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_uprec2.py -# -# A grammar with a bad %prec specifier -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/theme/base.css b/theme/base.css deleted file mode 100644 index 22c4ee2..0000000 --- a/theme/base.css +++ /dev/null @@ -1,194 +0,0 @@ - -html { - display: block; -} - -body { - font-family: 'Ubuntu',Tahoma,sans-serif; - padding-top: 40px; - padding-bottom: 40px; - font-size: 15px; - line-height: 150%; - margin: 0; - color: #333333; - background-color: #ffffff; - display: block; - margin-left: 250px; - margin-right: 50px; -}; - -.container{ - width:940px; - margin-right: auto; - margin-left: auto; - display: block; -}; - -.navbar { - z-index: 1; - overflow: visible; - color: #ffffff; - display: block; -} - -.navbar div { - display: block; - margin-left: 5px; - margin-right: 5px; -} - -.navbar-fixed-top { - width:210px; - display: block; - position: fixed; - padding-top: 0px; - top: 0; - height: 100%; - right: 0; - left: 0; - margin-bottom: 0; - background-color: #d44413; - border: 1px solid #c64012; - font-size: 15px; - font-weight: 200; - color: #ffffff; - text-shadow: 0 1px 0 #ce4213; - padding: 10px 20px 10px; - margin-left: -20px; - //overflow:scroll; - //overflow-x:hidden; -} -/* -.navbar ul { - font-size: 15px; -}; -*/ -h1, h2, h3, h4, h5, h6 { - display: block; - margin: 10px 0; - font-family: inherit; - font-weight: bold; - line-height: 1; - color: inherit; - text-rendering: optimizelegibility; -} - -p { - margin: 0 0 10px; - display: block; -} - -pre { - #margin-left: 20px; - display: block; - padding: 9.5px; - margin: 0 0 10px; - font-size: 13px; - line-height: 20px; - word-break: break-all; - word-wrap: break-word; - white-space: pre; - white-space: pre-wrap; - background-color: #f5f5f5; - border: 1px solid #ccc; - border: 1px solid rgba(0, 0, 0, 0.15); - border-radius: 4px; -} - - - -.code-function { - text-decoration:none; - color:#09857e; - font-weight:bold; -} - -.code-type { - text-decoration:none; - color:#376d0a; - font-weight:bold; -} - -.code-argument { - text-decoration:none; - color:#B80000; - font-weight:bold; -} - -.code-number { - text-decoration:none; - color:#007b00; -} - -.code-keyword { - text-decoration:none; - color:#215eb8; - font-weight:bold; -} -.code-storage-keyword { - text-decoration:none; - color:#466cb4; -} - -.code-doxygen { - text-decoration:none; - color:#bf3e00; - font-weight:bold; -} - -.code-comment { - text-decoration:none; - color:#b704b5; -} - -.code-preproc { - text-decoration:none; - color:#ac0000; -} - -.code-text-quote { - text-decoration:none; - color:#008e00; -} -.code-number { - text-decoration:none; - color:#007b00; -} -.code-member { - text-decoration:none; - color:#7c5406; -} -.code-input-function { - text-decoration:none; - color:#B80000; - font-weight:bold; -} -.code-function-name { - text-decoration:none; - color:#09857e; - font-weight:bold; -} -.code-function-system { - text-decoration:none; - color:#acaa00; -} -.code-generic-define { - text-decoration:none; - color:#3c850b; -} -.code-macro { - text-decoration:none; - color:#3c850b; -} -.code-operator { - text-decoration:none; - color:#1633a3; -} -.code-keyword { - text-decoration:none; - color:#466cb4; -} -.code-class { - text-decoration:none; - color:#006cb4; -} diff --git a/theme/menu.css b/theme/menu.css deleted file mode 100644 index 26ed389..0000000 --- a/theme/menu.css +++ /dev/null @@ -1,146 +0,0 @@ -/* CSS Document */ - -/*----------------MENU-----------------*/ -div#menu div{ - margin-top: 0px; - background: #6699FF; -} -/* permet l'affichage du haut du menu*/ -div#menu h2{ - color: #000000; - FONT-FAMILY: Arial; - FONT-SIZE: 9pt; - text-align:left; - margin: 0; - padding: 3px; - padding-left: 6px; - background: #1a62db; -} -div#menu h3{ - margin: 0; - padding: 6px; - background: #6699FF; -} - -div#menu a{ - color: #000000; - bgcolor=#6699FF; - FONT-FAMILY: Arial; - FONT-SIZE: 9pt; -} -div#menu li { - position: relative; - list-style:none; - margin:0px; - border-bottom: 1px solid #0008ab; -} -div#menu li.sousmenu { - background: url(sous_menu.gif) 95% 50% no-repeat; -} -div#menu li:hover { - background: #0008ab; -} -div#menu li.sousmenu:hover { - background: #0008ab; -} -div#menu ul ul { - position: absolute; - top: 0px; -} - -/*TAILLE PREMIERE COLONNE*/ -div#menu { - float: center; - width: 200px; - text-align:left; -} -div#menu ul { - margin: 0; - padding: 0; - width: 200px; - background: #6699FF; - border: 0px solid; -} -div#menu ul ul { - left: 199px; - display:none; - background: #FFFFFF; -} -div#menu li a { - display: block; - padding: 2px 0px 2px 4px; - text-decoration: none; - width: 191px; - border-left: 3px solid #6699FF; -} -div#menu form { - border-left: 8px solid #6699FF; - background: #6699FF; - FONT-FAMILY: Arial; - margin:0px; - FONT-SIZE: 8pt; -} -div#menu texte { - border-left: 8px solid #6699FF; - FONT-FAMILY: Arial; - FONT-SIZE: 9pt; - font-weight:bold; - border-bottom: 1px solid #6699FF; -} - -/*TAILLE DEUXIEME COLONE*/ - -div#menu ul.niveau1 ul { - left: 200px; - height: 500px; - border: 1px solid #0008ab; - background: #1a62db; - /* - overflow:scroll; - overflow-y:auto; - overflow-x:hidden; - */ -} -div#menu ul.niveau1 li { - background: #6699FF; -} -div#menu ul.niveau1 li.sousmenu:hover ul.niveau2 { - width:219px; - display:block; -} - - - -/*TAILLE TROISIEME COLONNE*/ -div#menu ul.niveau2 ul { - left: 219px; - height: 500px; -} -div#menu ul.niveau2 li a { - width: 200px; -} -div#menu ul.niveau2 li.sousmenu:hover ul.niveau3 { - width:10em; - display:block; -} - -/*TAILLE Quatrieme COLONNE*/ -div#menu ul.niveau3 ul { - left: 369px; - height: 500px; -} -div#menu ul.niveau3 li a { - width: 200px; -} -div#menu ul.niveau3 li.sousmenu:hover ul.niveau4 { - width:10em; - display:block; -} -/*TAILLE DEUXIEME COLONE BIS????*/ - -/*COULEUR DES BORDURES*/ -div#menu li a:hover { - border-left-color: #000ADE; - background: #6699FF; - font-weight:bold; -} \ No newline at end of file