[DEV] add ply and cppHeaderParser and basic markdown generato of doc

This commit is contained in:
Edouard DUPIN 2013-11-30 18:34:20 +01:00
parent e13bf9da44
commit 50caa0371f
147 changed files with 28602 additions and 21 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,9 @@
# CppHeaderParser package
# Author: Jashua Cloutier (contact via sourceforge username:senexcanis)
import sys
if sys.version_info[0] == 2:
from CppHeaderParser import *
else:
from CppHeaderParser3 import *
#__all__ = ['CppHeaderParser']

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,76 @@
#include <vector>
#include <string>
#define DEF_1 1
#define OS_NAME "Linux"
using namespace std;
class SampleClass
{
public:
SampleClass();
/*!
* Method 1
*/
string meth1();
///
/// Method 2 description
///
/// @param v1 Variable 1
///
int meth2(int v1);
/**
* Method 3 description
*
* \param v1 Variable 1
* \param v2 Variable 2
*/
void meth3(const string & v1, vector<string> & v2);
/**********************************
* Method 4 description
*
* @return Return value
*********************************/
unsigned int meth4();
private:
void * meth5(){return NULL};
/// prop1 description
string prop1;
//! prop5 description
int prop5;
};
namespace Alpha
{
class AlphaClass
{
public:
AlphaClass();
void alphaMethod();
string alphaString;
};
namespace Omega
{
class OmegaClass
{
public:
OmegaClass();
string omegaString;
};
};
}
int sampleFreeFunction(int i)
{
return i + 1;
}
int anotherFreeFunction(void);
}

View File

@ -0,0 +1,63 @@
#!/usr/bin/python
import sys
sys.path = ["../"] + sys.path
import CppHeaderParser
try:
cppHeader = CppHeaderParser.CppHeader("SampleClass.h")
except CppHeaderParser.CppParseError, e:
print e
sys.exit(1)
print "CppHeaderParser view of %s"%cppHeader
sampleClass = cppHeader.classes["SampleClass"]
print "Number of public methods %d"%(len(sampleClass["methods"]["public"]))
print "Number of private properties %d"%(len(sampleClass["properties"]["private"]))
meth3 = [m for m in sampleClass["methods"]["public"] if m["name"] == "meth3"][0] #get meth3
meth3ParamTypes = [t["type"] for t in meth3["parameters"]] #get meth3s parameters
print "Parameter Types for public method meth3 %s"%(meth3ParamTypes)
print "\nReturn type for meth1:"
print cppHeader.classes["SampleClass"]["methods"]["public"][1]["rtnType"]
print "\nDoxygen for meth2:"
print cppHeader.classes["SampleClass"]["methods"]["public"][2]["doxygen"]
print "\nParameters for meth3:"
print cppHeader.classes["SampleClass"]["methods"]["public"][3]["parameters"]
print "\nDoxygen for meth4:"
print cppHeader.classes["SampleClass"]["methods"]["public"][4]["doxygen"]
print "\nReturn type for meth5:"
print cppHeader.classes["SampleClass"]["methods"]["private"][0]["rtnType"]
print "\nDoxygen type for prop1:"
print cppHeader.classes["SampleClass"]["properties"]["private"][0]["doxygen"]
print "\nType for prop5:"
print cppHeader.classes["SampleClass"]["properties"]["private"][1]["type"]
print "\nNamespace for AlphaClass is:"
print cppHeader.classes["AlphaClass"]["namespace"]
print "\nReturn type for alphaMethod is:"
print cppHeader.classes["AlphaClass"]["methods"]["public"][0]["rtnType"]
print "\nNamespace for OmegaClass is:"
print cppHeader.classes["OmegaClass"]["namespace"]
print "\nType for omegaString is:"
print cppHeader.classes["AlphaClass"]["properties"]["public"][0]["type"]
print "\nFree functions are:"
for func in cppHeader.functions:
print " %s"%func["name"]
print "\n#includes are:"
for incl in cppHeader.includes:
print " %s"%incl
print "\n#defines are:"
for define in cppHeader.defines:
print " %s"%define

View File

@ -0,0 +1,669 @@
#!/usr/bin/python
#
# Author: Jashua R. Cloutier (contact via sourceforge username:senexcanis)
#
# Copyright (C) 2010, Jashua R. Cloutier
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# * Neither the name of Jashua R. Cloutier nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
#
# The CppHeaderParser.py script is written in Python 2.4 and released to
# the open source community for continuous improvements under the BSD
# 2.0 new license, which can be found at:
#
# http://www.opensource.org/licenses/bsd-license.php
#
"""Parse C++ header files and generate a data structure
representing the class
"""
import ply.lex as lex
import os
import sys
import re
import inspect
def lineno():
"""Returns the current line number in our program."""
return inspect.currentframe().f_back.f_lineno
__version__ = "1.9"
version = "1.9"
tokens = [
'NUMBER',
'NAME',
'OPEN_PAREN',
'CLOSE_PAREN',
'OPEN_BRACE',
'CLOSE_BRACE',
'COLON',
'SEMI_COLON',
'COMMA',
'COMMENT_SINGLELINE',
'COMMENT_MULTILINE',
'PRECOMP_MACRO',
'PRECOMP_MACRO_CONT',
'ASTERISK',
'AMPERSTAND',
'EQUALS',
'MINUS',
'PLUS',
'DIVIDE',
'CHAR_LITERAL',
'STRING_LITERAL',
'OPERATOR_DIVIDE_OVERLOAD',
'NEW_LINE',
]
t_ignore = " \t\r[].|!?%@"
t_NUMBER = r'[0-9][0-9XxA-Fa-f]*'
t_NAME = r'[<>A-Za-z_~][A-Za-z0-9_]*'
t_OPERATOR_DIVIDE_OVERLOAD = r'/='
t_OPEN_PAREN = r'\('
t_CLOSE_PAREN = r'\)'
t_OPEN_BRACE = r'{'
t_CLOSE_BRACE = r'}'
t_SEMI_COLON = r';'
t_COLON = r':'
t_COMMA = r','
t_PRECOMP_MACRO = r'\#.*'
t_PRECOMP_MACRO_CONT = r'.*\\\n'
def t_COMMENT_SINGLELINE(t):
r'\/\/.*\n'
global doxygenCommentCache
if t.value.startswith("///") or t.value.startswith("//!"):
if doxygenCommentCache:
doxygenCommentCache += "\n"
if t.value.endswith("\n"):
doxygenCommentCache += t.value[:-1]
else:
doxygenCommentCache += t.value
t_ASTERISK = r'\*'
t_MINUS = r'\-'
t_PLUS = r'\+'
t_DIVIDE = r'/[^/]'
t_AMPERSTAND = r'&'
t_EQUALS = r'='
t_CHAR_LITERAL = "'.'"
#found at http://wordaligned.org/articles/string-literals-and-regular-expressions
#TODO: This does not work with the string "bla \" bla"
t_STRING_LITERAL = r'"([^"\\]|\\.)*"'
#Found at http://ostermiller.org/findcomment.html
def t_COMMENT_MULTILINE(t):
r'/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/'
global doxygenCommentCache
if t.value.startswith("/**") or t.value.startswith("/*!"):
#not sure why, but get double new lines
v = t.value.replace("\n\n", "\n")
#strip prefixing whitespace
v = re.sub("\n[\s]+\*", "\n*", v)
doxygenCommentCache += v
def t_NEWLINE(t):
r'\n+'
t.lexer.lineno += len(t.value)
def t_error(v):
print "Lex error: ", v
lex.lex()
debug = 0
supportedAccessSpecifier = [
'public',
'protected',
'private'
]
doxygenCommentCache = ""
def is_namespace(nameStack):
"""Determines if a namespace is being specified"""
if len(nameStack) == 0:
return False
if nameStack[0] == "namespace":
return True
return False
def is_enum_namestack(nameStack):
"""Determines if a namestack is an enum namestack"""
if len(nameStack) == 0:
return False
if nameStack[0] == "enum":
return True
if len(nameStack) > 1 and nameStack[0] == "typedef" and nameStack[1] == "enum":
return True
return False
class CppParseError(Exception): pass
class CppClass(dict):
"""Takes a name stack and turns it into a class
Contains the following Keys:
self['name'] - Name of the class
self['doxygen'] - Doxygen comments associated with the class if they exist
self['inherits'] - List of Classes that this one inherits where the values
are of the form {"access": Anything in supportedAccessSpecifier
"class": Name of the class
self['methods'] - Dictionary where keys are from supportedAccessSpecifier
and values are a lists of CppMethod's
self['properties'] - Dictionary where keys are from supportedAccessSpecifier
and values are lists of CppVariable's
self['enums'] - Dictionary where keys are from supportedAccessSpecifier and
values are lists of CppEnum's
An example of how this could look is as follows:
#self =
{
'name': ""
'inherits':[]
'methods':
{
'public':[],
'protected':[],
'private':[]
},
'properties':
{
'public':[],
'protected':[],
'private':[]
},
'enums':
{
'public':[],
'protected':[],
'private':[]
}
}
"""
def __init__(self, nameStack):
if (debug): print "Class: ", nameStack
if (len(nameStack) < 2):
print "Error detecting class"
return
global doxygenCommentCache
if len(doxygenCommentCache):
self["doxygen"] = doxygenCommentCache
doxygenCommentCache = ""
self["name"] = nameStack[1]
inheritList = []
if ":" in nameStack:
nameStack = nameStack[nameStack.index(":") + 1:]
while len(nameStack):
tmpStack = []
tmpInheritClass = {"access":"private"}
if "," in nameStack:
tmpStack = nameStack[:nameStack.index(",")]
nameStack = nameStack[nameStack.index(",") + 1:]
else:
tmpStack = nameStack
nameStack = []
if len(tmpStack) == 0:
break;
elif len(tmpStack) == 1:
tmpInheritClass["class"] = tmpStack[0]
elif len(tmpStack) == 2:
tmpInheritClass["access"] = tmpStack[0]
tmpInheritClass["class"] = tmpStack[1]
else:
print "Warning: Cant figure out class inheriting %s\n"%(" ".join(tmpStack))
continue
inheritList.append(tmpInheritClass)
methodAccessSpecificList = {}
propertyAccessSpecificList = {}
enumAccessSpecificList = {}
for accessSpecifier in supportedAccessSpecifier:
methodAccessSpecificList[accessSpecifier] = []
propertyAccessSpecificList[accessSpecifier] = []
enumAccessSpecificList[accessSpecifier] = []
self['inherits'] = inheritList
self['methods'] = methodAccessSpecificList
self['properties'] = propertyAccessSpecificList
self['enums'] = enumAccessSpecificList
self['namespace'] = ""
def __repr__(self):
"""Convert class to a string"""
namespace_prefix = ""
if self["namespace"]: namespace_prefix = self["namespace"] + "::"
rtn = "class %s\n"%(namespace_prefix + self["name"])
try:
print self["doxygen"],
except: pass
if "inherits" in self.keys():
rtn += "Inherits: "
for inheritClass in self["inherits"]:
rtn += "%s %s, "%(inheritClass["access"], inheritClass["class"])
rtn += "\n"
rtn += "{\n"
for accessSpecifier in supportedAccessSpecifier:
rtn += "%s\n"%(accessSpecifier)
#Enums
if (len(self["enums"][accessSpecifier])):
rtn += " // Enums\n"
for enum in self["enums"][accessSpecifier]:
rtn += " %s\n"%(repr(enum))
#Properties
if (len(self["properties"][accessSpecifier])):
rtn += " // Properties\n"
for property in self["properties"][accessSpecifier]:
rtn += " %s\n"%(repr(property))
#Methods
if (len(self["methods"][accessSpecifier])):
rtn += " // Method\n"
for method in self["methods"][accessSpecifier]:
rtn += " %s\n"%(repr(method))
rtn += "}\n"
return rtn
class CppMethod(dict):
"""Takes a name stack and turns it into a method
Contains the following Keys:
self['rtnType'] - Return type of the method (ex. "int")
self['name'] - Name of the method (ex. "getSize")
self['doxygen'] - Doxygen comments associated with the method if they exist
self['parameters'] - List of CppVariables
"""
def __init__(self, nameStack, curClass):
if (debug): print "Method: ", nameStack
global doxygenCommentCache
if len(doxygenCommentCache):
self["doxygen"] = doxygenCommentCache
doxygenCommentCache = ""
if "operator" in nameStack:
self["rtnType"] = " ".join(nameStack[:nameStack.index('operator')])
self["name"] = "".join(nameStack[nameStack.index('operator'):nameStack.index('(')])
else:
self["rtnType"] = " ".join(nameStack[:nameStack.index('(') - 1])
self["name"] = " ".join(nameStack[nameStack.index('(') - 1:nameStack.index('(')])
if len(self["rtnType"]) == 0 or self["name"] == curClass:
self["rtnType"] = "void"
paramsStack = nameStack[nameStack.index('(') + 1: ]
#Remove things from the stack till we hit the last paren, this helps handle abstract and normal methods
while (paramsStack[-1] != ")"):
paramsStack.pop()
paramsStack.pop()
params = []
#See if there is a doxygen comment for the variable
doxyVarDesc = {}
#TODO: Put this into a class
if self.has_key("doxygen"):
doxyLines = self["doxygen"].split("\n")
lastParamDesc = ""
for doxyLine in doxyLines:
if " @param " in doxyLine or " \param " in doxyLine:
try:
#Strip out the param
doxyLine = doxyLine[doxyLine.find("param ") + 6:]
(var, desc) = doxyLine.split(" ", 1)
doxyVarDesc[var] = desc.strip()
lastParamDesc = var
except: pass
elif " @return " in doxyLine or " \return " in doxyLine:
lastParamDesc = ""
# not handled for now
elif lastParamDesc:
try:
doxyLine = doxyLine.strip()
if " " not in doxyLine:
lastParamDesc = ""
continue
doxyLine = doxyLine[doxyLine.find(" ") + 1:]
doxyVarDesc[lastParamDesc] += " " + doxyLine
except: pass
#Create the variable now
while (len(paramsStack)):
if (',' in paramsStack):
params.append(CppVariable(paramsStack[0:paramsStack.index(',')], doxyVarDesc=doxyVarDesc))
paramsStack = paramsStack[paramsStack.index(',') + 1:]
else:
param = CppVariable(paramsStack, doxyVarDesc=doxyVarDesc)
if len(param.keys()):
params.append(param)
break
self["parameters"] = params
class CppVariable(dict):
"""Takes a name stack and turns it into a method
Contains the following Keys:
self['type'] - Type for the variable (ex. "const string &")
self['name'] - Name of the variable (ex. "numItems")
self['namespace'] - Namespace containing the enum
self['desc'] - Description of the variable if part of a method (optional)
self['doxygen'] - Doxygen comments associated with the method if they exist
self['defaltValue'] - Default value of the variable, this key will only
exist if there is a default value
"""
def __init__(self, nameStack, **kwargs):
if (debug): print "Variable: ", nameStack
if (len(nameStack) < 2):
return
global doxygenCommentCache
if len(doxygenCommentCache):
self["doxygen"] = doxygenCommentCache
doxygenCommentCache = ""
if ("=" in nameStack):
self["type"] = " ".join(nameStack[:nameStack.index("=") - 1])
self["name"] = nameStack[nameStack.index("=") - 1]
self["defaltValue"] = " ".join(nameStack[nameStack.index("=") + 1:])
else:
self["type"] = " ".join(nameStack[:-1])
self["name"] = nameStack[-1]
self["type"] = self["type"].replace(" :",":")
self["type"] = self["type"].replace(": ",":")
self["type"] = self["type"].replace(" <","<")
self["type"] = self["type"].replace(" >",">")
#Optional doxygen description
try:
self["desc"] = kwargs["doxyVarDesc"][self["name"]]
except: pass
class CppEnum(dict):
"""Takes a name stack and turns it into an Enum
Contains the following Keys:
self['name'] - Name of the enum (ex. "ItemState")
self['namespace'] - Namespace containing the enum
self['values'] - List of values where the values are a dictionary of the
form {"name": name of the key (ex. "PARSING_HEADER"),
"value": Specified value of the enum, this key will only exist
if a value for a given enum value was defined
}
"""
def __init__(self, nameStack):
if len(nameStack) < 4 or "{" not in nameStack or "}" not in nameStack:
#Not enough stuff for an enum
return
global doxygenCommentCache
if len(doxygenCommentCache):
self["doxygen"] = doxygenCommentCache
doxygenCommentCache = ""
valueList = []
#Figure out what values it has
valueStack = nameStack[nameStack.index('{') + 1: nameStack.index('}')]
while len(valueStack):
tmpStack = []
if "," in valueStack:
tmpStack = valueStack[:valueStack.index(",")]
valueStack = valueStack[valueStack.index(",") + 1:]
else:
tmpStack = valueStack
valueStack = []
if len(tmpStack) == 1:
valueList.append({"name": tmpStack[0]})
elif len(tmpStack) >= 3 and tmpStack[1] == "=":
valueList.append({"name": tmpStack[0], "value": " ".join(tmpStack[2:])})
elif len(tmpStack) == 2 and tmpStack[1] == "=":
if (debug): print "Missed value for %s"%tmpStack[0]
valueList.append({"name": tmpStack[0]})
if len(valueList):
self["values"] = valueList
else:
#An enum without any values is useless, dont bother existing
return
#Figure out if it has a name
preBraceStack = nameStack[:nameStack.index("{")]
postBraceStack = nameStack[nameStack.index("}") + 1:]
if (len(preBraceStack) == 2 and "typedef" not in nameStack):
self["name"] = preBraceStack[1]
elif len(postBraceStack) and "typedef" in nameStack:
self["name"] = " ".join(postBraceStack)
#See if there are instances of this
if "typedef" not in nameStack and len(postBraceStack):
self["instances"] = []
for var in postBraceStack:
if "," in var:
continue
self["instances"].append(var)
self["namespace"] = ""
class CppHeader:
"""Parsed C++ class header
Variables produced:
self.classes - Dictionary of classes found in a given header file where the
key is the name of the class
"""
def __init__(self, headerFileName, argType = "file"):
if (argType == "file"):
self.headerFileName = os.path.expandvars(headerFileName)
self.mainClass = os.path.split(self.headerFileName)[1][:-2]
headerFileStr = ""
# if headerFileName[-2:] != ".h":
# raise Exception("file must be a header file and end with .h")
elif argType == "string":
self.headerFileName = ""
self.mainClass = "???"
headerFileStr = headerFileName
else:
raise Exception("Arg type must be either file or string")
self.curClass = ""
self.classes = {}
self.enums = []
self.nameStack = []
self.nameSpaces = []
self.curAccessSpecifier = 'private'
if (len(self.headerFileName)):
headerFileStr = "\n".join(open(self.headerFileName).readlines())
self.braceDepth = 0
lex.input(headerFileStr)
curLine = 0
curChar = 0
try:
while True:
tok = lex.token()
# Example: LexToken(COLON,';',1,373)
# where (tok.name, tok.value, ?, ?)
if not tok:
break
curLine = tok.lineno
curChar = tok.lexpos
if (tok.type == 'OPEN_BRACE'):
if len(self.nameStack) and is_namespace(self.nameStack):
self.nameSpaces.append(self.nameStack[1])
if len(self.nameStack) and not is_enum_namestack(self.nameStack):
self.evaluate_stack()
else:
self.nameStack.append(tok.value)
self.braceDepth += 1
elif (tok.type == 'CLOSE_BRACE'):
if self.braceDepth == 0:
continue
if (self.braceDepth == len(self.nameSpaces)):
tmp = self.nameSpaces.pop()
if len(self.nameStack) and is_enum_namestack(self.nameStack):
self.nameStack.append(tok.value)
elif self.braceDepth < 10:
self.evaluate_stack()
else:
self.nameStack = []
self.braceDepth -= 1
if (self.braceDepth == 0):
self.curClass = ""
if (tok.type == 'OPEN_PAREN'):
self.nameStack.append(tok.value)
elif (tok.type == 'CLOSE_PAREN'):
self.nameStack.append(tok.value)
elif (tok.type == 'EQUALS'):
self.nameStack.append(tok.value)
elif (tok.type == 'COMMA'):
self.nameStack.append(tok.value)
elif (tok.type == 'NUMBER'):
self.nameStack.append(tok.value)
elif (tok.type == 'MINUS'):
self.nameStack.append(tok.value)
elif (tok.type == 'PLUS'):
self.nameStack.append(tok.value)
elif (tok.type == 'STRING_LITERAL'):
self.nameStack.append(tok.value)
elif (tok.type == 'NAME' or tok.type == 'AMPERSTAND' or tok.type == 'ASTERISK'):
if (tok.value == 'class'):
self.nameStack.append(tok.value)
elif (tok.value in supportedAccessSpecifier and self.braceDepth == len(self.nameSpaces) + 1):
self.curAccessSpecifier = tok.value
else:
self.nameStack.append(tok.value)
elif (tok.type == 'COLON'):
#Dont want colon to be first in stack
if len(self.nameStack) == 0:
continue
self.nameStack.append(tok.value)
elif (tok.type == 'SEMI_COLON'):
if (self.braceDepth < 10):
self.evaluate_stack()
except:
raise CppParseError("Not able to parse %s on line %d evaluating \"%s\"\nError around: %s"
% (self.headerFileName, tok.lineno, tok.value, " ".join(self.nameStack)))
def evaluate_stack(self):
"""Evaluates the current name stack"""
global doxygenCommentCache
if (debug): print "Evaluating stack %s at..."%self.nameStack
if (len(self.curClass)):
if (debug): print "%s (%s) "%(self.curClass, self.curAccessSpecifier),
if (len(self.nameStack) == 0):
if (debug): print "line ",lineno()
if (debug): print "(Empty Stack)"
return
elif (self.nameStack[0] == "namespace"):
#Taken care of outside of here
pass
elif (self.nameStack[0] == "class"):
if (debug): print "line ",lineno()
self.evaluate_class_stack()
elif (self.nameStack[0] == "struct"):
if (debug): print "line ",lineno()
self.curAccessSpecifier = "public"
self.evaluate_class_stack()
elif (len(self.curClass) == 0):
if (debug): print "line ",lineno()
if is_enum_namestack(self.nameStack):
self.evaluate_enum_stack()
self.nameStack = []
doxygenCommentCache = ""
return
elif (self.braceDepth < 1):
if (debug): print "line ",lineno()
#Ignore global stuff for now
if (debug): print "Global stuff: ", self.nameStack
self.nameStack = []
doxygenCommentCache = ""
return
elif (self.braceDepth > len(self.nameSpaces) + 1):
if (debug): print "line ",lineno()
self.nameStack = []
doxygenCommentCache = ""
return
elif is_enum_namestack(self.nameStack):
if (debug): print "line ",lineno()
#elif self.nameStack[0] == "enum":
self.evaluate_enum_stack()
elif ('(' in self.nameStack):
if (debug): print "line ",lineno()
self.evaluate_method_stack()
else:
if (debug): print "line ",lineno()
self.evaluate_property_stack()
self.nameStack = []
doxygenCommentCache = ""
def evaluate_class_stack(self):
"""Create a Class out of the name stack (but not its parts)"""
#dont support sub classes today
if self.braceDepth != len(self.nameSpaces):
return
newClass = CppClass(self.nameStack)
if len(newClass.keys()):
self.curClass = newClass["name"]
self.classes[self.curClass] = newClass
else:
self.curClass = ""
newClass["namespace"] = self.cur_namespace()
def evaluate_method_stack(self):
"""Create a method out of the name stack"""
newMethod = CppMethod(self.nameStack, self.curClass)
if len(newMethod.keys()):
self.classes[self.curClass]["methods"][self.curAccessSpecifier].append(newMethod)
def evaluate_property_stack(self):
"""Create a Property out of the name stack"""
newVar = CppVariable(self.nameStack)
if len(newVar.keys()):
self.classes[self.curClass]["properties"][self.curAccessSpecifier].append(newVar)
def evaluate_enum_stack(self):
"""Create an Enum out of the name stack"""
newEnum = CppEnum(self.nameStack)
if len(newEnum.keys()):
if len(self.curClass):
newEnum["namespace"] = self.cur_namespace()
self.classes[self.curClass]["enums"][self.curAccessSpecifier].append(newEnum)
else:
newEnum["namespace"] = self.cur_namespace()
# print "Adding global enum"
self.enums.append(newEnum)
#This enum has instances, turn them into properties
if newEnum.has_key("instances"):
instanceType = "enum"
if newEnum.has_key("name"):
instanceType = newEnum["name"]
for instance in newEnum["instances"]:
self.nameStack = [instanceType, instance]
self.evaluate_property_stack()
del newEnum["instances"]
def cur_namespace(self, add_double_colon = False):
rtn = ""
i = 0
while i < len(self.nameSpaces):
rtn += self.nameSpaces[i]
if add_double_colon or i < len(self.nameSpaces) - 1:
rtn += "::"
i+=1
return rtn
def __repr__(self):
rtn = ""
for className in self.classes.keys():
rtn += repr(self.classes[className])
return rtn

View File

@ -0,0 +1,12 @@
#!/usr/bin/python
import CppHeaderParser
f = CppHeaderParser.CppHeader("test/TestSampleClass.h")
print f
print "=" * 20
#print f.classes["SampleClass"]["methods"]["public"][2]["parameters"]
print f.classes["AlphaClass"]["enums"]["protected"][0]["values"]

290
cppParser/PKG-INFO Normal file
View File

@ -0,0 +1,290 @@
Metadata-Version: 1.1
Name: CppHeaderParser
Version: 2.4
Summary: Parse C++ header files and generate a data structure representing the class
Home-page: http://senexcanis.com/open-source/cppheaderparser/
Author: Jashua Cloutier
Author-email: jashuac@bellsouth.net
License: BSD
Description: Python package "CppHeaderParser"
--------------------------------
**Purpose:** Parse C++ header files and generate a data structure representing the class
**Author:** Jashua Cloutier
**Licence:** BSD
**External modules required:** PLY
**Quick start**::
#include <vector>
#include <string>
#define DEF_1 1
#define OS_NAME "Linux"
using namespace std;
class SampleClass
{
public:
SampleClass();
/*!
* Method 1
*/
string meth1();
///
/// Method 2 description
///
/// @param v1 Variable 1
///
int meth2(int v1);
/**
* Method 3 description
*
* \param v1 Variable 1
* \param v2 Variable 2
*/
void meth3(const string & v1, vector<string> & v2);
/**********************************
* Method 4 description
*
* @return Return value
*********************************/
unsigned int meth4();
private:
void * meth5(){return NULL};
/// prop1 description
string prop1;
//! prop5 description
int prop5;
};
namespace Alpha
{
class AlphaClass
{
public:
AlphaClass();
void alphaMethod();
string alphaString;
};
namespace Omega
{
class OmegaClass
{
public:
OmegaClass();
string omegaString;
};
};
}
int sampleFreeFunction(int i)
{
return i + 1;
}
int anotherFreeFunction(void);
}
**Python code**::
#!/usr/bin/python
import sys
sys.path = ["../"] + sys.path
import CppHeaderParser
try:
cppHeader = CppHeaderParser.CppHeader("SampleClass.h")
except CppHeaderParser.CppParseError, e:
print e
sys.exit(1)
print "CppHeaderParser view of %s"%cppHeader
sampleClass = cppHeader.classes["SampleClass"]
print "Number of public methods %d"%(len(sampleClass["methods"]["public"]))
print "Number of private properties %d"%(len(sampleClass["properties"]["private"]))
meth3 = [m for m in sampleClass["methods"]["public"] if m["name"] == "meth3"][0] #get meth3
meth3ParamTypes = [t["type"] for t in meth3["parameters"]] #get meth3s parameters
print "Parameter Types for public method meth3 %s"%(meth3ParamTypes)
print "\nReturn type for meth1:"
print cppHeader.classes["SampleClass"]["methods"]["public"][1]["rtnType"]
print "\nDoxygen for meth2:"
print cppHeader.classes["SampleClass"]["methods"]["public"][2]["doxygen"]
print "\nParameters for meth3:"
print cppHeader.classes["SampleClass"]["methods"]["public"][3]["parameters"]
print "\nDoxygen for meth4:"
print cppHeader.classes["SampleClass"]["methods"]["public"][4]["doxygen"]
print "\nReturn type for meth5:"
print cppHeader.classes["SampleClass"]["methods"]["private"][0]["rtnType"]
print "\nDoxygen type for prop1:"
print cppHeader.classes["SampleClass"]["properties"]["private"][0]["doxygen"]
print "\nType for prop5:"
print cppHeader.classes["SampleClass"]["properties"]["private"][1]["type"]
print "\nNamespace for AlphaClass is:"
print cppHeader.classes["AlphaClass"]["namespace"]
print "\nReturn type for alphaMethod is:"
print cppHeader.classes["AlphaClass"]["methods"]["public"][0]["rtnType"]
print "\nNamespace for OmegaClass is:"
print cppHeader.classes["OmegaClass"]["namespace"]
print "\nType for omegaString is:"
print cppHeader.classes["AlphaClass"]["properties"]["public"][0]["type"]
print "\nFree functions are:"
for func in cppHeader.functions:
print " %s"%func["name"]
print "\n#includes are:"
for incl in cppHeader.includes:
print " %s"%incl
print "\n#defines are:"
for define in cppHeader.defines:
print " %s"%define
**Output**::
CppHeaderParser view of class SampleClass
{
public
// Methods
{'line_number': 11, 'static': False, 'rtnType': 'void', 'const': False, 'parameters': [], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': '', 'template': False, 'friend': False, 'returns_class': False, 'extern': False, 'path': 'SampleClass', 'returns_pointer': 0, 'class': None, 'name': 'SampleClass', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': True, 'debug': 'SampleClass ( ) ;', 'inline': False}
{'line_number': 15, 'static': False, 'rtnType': 'string', 'returns_unknown': True, 'const': False, 'parameters': [], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': 'string', 'template': False, 'friend': False, 'returns_class': False, 'inline': False, 'extern': False, 'path': 'SampleClass', 'class': None, 'doxygen': '/*!\n* Method 1\n*/', 'name': 'meth1', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': False, 'constructor': False, 'debug': 'string meth1 ( ) ;', 'returns_pointer': 0}
{'line_number': 22, 'static': False, 'rtnType': 'int', 'const': False, 'parameters': [{'line_number': 22, 'constant': 0, 'name': 'v1', 'reference': 0, 'type': 'int', 'static': 0, 'pointer': 0, 'desc': 'Variable 1'}], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': 'int', 'template': False, 'friend': False, 'returns_class': False, 'inline': False, 'extern': False, 'path': 'SampleClass', 'class': None, 'doxygen': '///\n/// Method 2 description\n///\n/// @param v1 Variable 1\n///', 'name': 'meth2', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': False, 'debug': 'int meth2 ( int v1 ) ;', 'returns_pointer': 0}
{'line_number': 30, 'static': False, 'rtnType': 'void', 'const': False, 'parameters': [{'line_number': 30, 'constant': 1, 'name': 'v1', 'reference': 1, 'type': 'const string &', 'static': 0, 'pointer': 0, 'desc': 'Variable 1'}, {'line_number': 30, 'constant': 0, 'name': 'v2', 'reference': 1, 'type': 'vector<string> &', 'static': 0, 'pointer': 0, 'desc': 'Variable 2'}], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': 'void', 'template': False, 'friend': False, 'unresolved_parameters': True, 'returns_class': False, 'inline': False, 'extern': False, 'path': 'SampleClass', 'class': None, 'doxygen': '/**\n* Method 3 description\n*\n* \\param v1 Variable 1\n* \\param v2 Variable 2\n*/', 'name': 'meth3', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': False, 'debug': 'void meth3 ( const string & v1 , vector <string> & v2 ) ;', 'returns_pointer': 0}
{'line_number': 37, 'static': False, 'rtnType': 'unsigned int', 'const': False, 'parameters': [], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': 'unsigned int', 'template': False, 'friend': False, 'returns_class': False, 'inline': False, 'extern': False, 'path': 'SampleClass', 'class': None, 'doxygen': '/**********************************\n* Method 4 description\n*\n* @return Return value\n*********************************/', 'name': 'meth4', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': False, 'debug': 'unsigned int meth4 ( ) ;', 'returns_pointer': 0}
protected
private
// Properties
{'line_number': 42, 'constant': 0, 'name': 'prop1', 'reference': 0, 'type': 'string', 'static': 0, 'pointer': 0}
{'line_number': 44, 'constant': 0, 'name': 'prop5', 'reference': 0, 'type': 'int', 'static': 0, 'pointer': 0}
// Methods
{'line_number': 39, 'static': False, 'rtnType': 'void *', 'const': False, 'parameters': [], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': 'void', 'template': False, 'friend': False, 'returns_class': False, 'extern': False, 'path': 'SampleClass', 'returns_pointer': 1, 'class': None, 'name': 'meth5', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': False, 'debug': 'void * meth5 ( ) {', 'inline': False}
}
class Alpha::AlphaClass
{
public
// Properties
{'line_number': 55, 'constant': 0, 'name': 'alphaString', 'reference': 0, 'type': 'string', 'static': 0, 'pointer': 0}
// Methods
{'line_number': 51, 'static': False, 'rtnType': 'void', 'const': False, 'parameters': [], 'namespace': 'Alpha::', 'virtual': False, 'destructor': False, 'returns': '', 'template': False, 'friend': False, 'returns_class': False, 'extern': False, 'path': 'Alpha::AlphaClass', 'returns_pointer': 0, 'class': None, 'name': 'AlphaClass', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': True, 'debug': 'AlphaClass ( ) ;', 'inline': False}
{'line_number': 53, 'static': False, 'rtnType': 'void', 'const': False, 'parameters': [], 'namespace': 'Alpha::', 'virtual': False, 'destructor': False, 'returns': 'void', 'template': False, 'friend': False, 'returns_class': False, 'extern': False, 'path': 'Alpha::AlphaClass', 'returns_pointer': 0, 'class': None, 'name': 'alphaMethod', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': False, 'debug': 'void alphaMethod ( ) ;', 'inline': False}
protected
private
}
class Alpha::Omega::OmegaClass
{
public
// Properties
{'line_number': 65, 'constant': 0, 'name': 'omegaString', 'reference': 0, 'type': 'string', 'static': 0, 'pointer': 0}
// Methods
{'line_number': 63, 'static': False, 'rtnType': 'void', 'const': False, 'parameters': [], 'namespace': 'Alpha::Omega::', 'virtual': False, 'destructor': False, 'returns': '', 'template': False, 'friend': False, 'returns_class': False, 'extern': False, 'path': 'Alpha::Omega::OmegaClass', 'returns_pointer': 0, 'class': None, 'name': 'OmegaClass', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': True, 'debug': 'OmegaClass ( ) ;', 'inline': False}
protected
private
}
// functions
{'line_number': 70, 'static': False, 'rtnType': 'int', 'const': False, 'parameters': [{'line_number': 70, 'constant': 0, 'name': 'i', 'reference': 0, 'type': 'int', 'static': 0, 'pointer': 0}], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': 'int', 'template': False, 'friend': False, 'returns_class': False, 'extern': False, 'returns_pointer': 0, 'class': None, 'name': 'sampleFreeFunction', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': False, 'debug': 'int sampleFreeFunction ( int i ) {', 'inline': False}
{'line_number': 75, 'static': False, 'rtnType': 'int', 'const': False, 'parameters': [{'line_number': 75, 'constant': 0, 'name': '', 'reference': 0, 'type': 'void', 'static': 0, 'pointer': 0}], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': 'int', 'template': False, 'friend': False, 'returns_class': False, 'extern': False, 'returns_pointer': 0, 'class': None, 'name': 'anotherFreeFunction', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': False, 'debug': 'int anotherFreeFunction ( void ) ;', 'inline': False}
Number of public methods 5
Number of private properties 2
Parameter Types for public method meth3 ['const string &', 'vector<string> &']
Return type for meth1:
string
Doxygen for meth2:
///
/// Method 2 description
///
/// @param v1 Variable 1
///
Parameters for meth3:
[{'line_number': 30, 'constant': 1, 'name': 'v1', 'reference': 1, 'type': 'const string &', 'static': 0, 'pointer': 0, 'desc': 'Variable 1'}, {'line_number': 30, 'constant': 0, 'name': 'v2', 'reference': 1, 'type': 'vector<string> &', 'static': 0, 'pointer': 0, 'desc': 'Variable 2'}]
Doxygen for meth4:
/**********************************
* Method 4 description
*
* @return Return value
*********************************/
Return type for meth5:
void *
Doxygen type for prop1:
/// prop1 description
Type for prop5:
int
Namespace for AlphaClass is:
Alpha
Return type for alphaMethod is:
void
Namespace for OmegaClass is:
Alpha::Omega
Type for omegaString is:
string
Free functions are:
sampleFreeFunction
anotherFreeFunction
#includes are:
<vector>
<string>
#defines are:
DEF_1 1
OS_NAME "Linux"
Contributors
------------
* Chris Love
* HartsAntler
Keywords: c++ header parser ply
Platform: Platform Independent
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 2
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: C++
Classifier: License :: OSI Approved :: BSD License
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: Topic :: Software Development
Classifier: Topic :: Software Development :: Code Generators
Classifier: Topic :: Software Development :: Compilers
Classifier: Topic :: Software Development :: Disassemblers
Requires: ply

598
cppParser/README.html Normal file
View File

@ -0,0 +1,598 @@
<?xml version="1.0" encoding="utf-8" ?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="generator" content="Docutils 0.9.1: http://docutils.sourceforge.net/" />
<title></title>
<style type="text/css">
/*
:Author: David Goodger (goodger@python.org)
:Id: $Id: html4css1.css 7434 2012-05-11 21:06:27Z milde $
:Copyright: This stylesheet has been placed in the public domain.
Default cascading style sheet for the HTML output of Docutils.
See http://docutils.sf.net/docs/howto/html-stylesheets.html for how to
customize this style sheet.
*/
/* used to remove borders from tables and images */
.borderless, table.borderless td, table.borderless th {
border: 0 }
table.borderless td, table.borderless th {
/* Override padding for "table.docutils td" with "! important".
The right padding separates the table cells. */
padding: 0 0.5em 0 0 ! important }
.first {
/* Override more specific margin styles with "! important". */
margin-top: 0 ! important }
.last, .with-subtitle {
margin-bottom: 0 ! important }
.hidden {
display: none }
a.toc-backref {
text-decoration: none ;
color: black }
blockquote.epigraph {
margin: 2em 5em ; }
dl.docutils dd {
margin-bottom: 0.5em }
object[type="image/svg+xml"], object[type="application/x-shockwave-flash"] {
overflow: hidden;
}
/* Uncomment (and remove this text!) to get bold-faced definition list terms
dl.docutils dt {
font-weight: bold }
*/
div.abstract {
margin: 2em 5em }
div.abstract p.topic-title {
font-weight: bold ;
text-align: center }
div.admonition, div.attention, div.caution, div.danger, div.error,
div.hint, div.important, div.note, div.tip, div.warning {
margin: 2em ;
border: medium outset ;
padding: 1em }
div.admonition p.admonition-title, div.hint p.admonition-title,
div.important p.admonition-title, div.note p.admonition-title,
div.tip p.admonition-title {
font-weight: bold ;
font-family: sans-serif }
div.attention p.admonition-title, div.caution p.admonition-title,
div.danger p.admonition-title, div.error p.admonition-title,
div.warning p.admonition-title {
color: red ;
font-weight: bold ;
font-family: sans-serif }
/* Uncomment (and remove this text!) to get reduced vertical space in
compound paragraphs.
div.compound .compound-first, div.compound .compound-middle {
margin-bottom: 0.5em }
div.compound .compound-last, div.compound .compound-middle {
margin-top: 0.5em }
*/
div.dedication {
margin: 2em 5em ;
text-align: center ;
font-style: italic }
div.dedication p.topic-title {
font-weight: bold ;
font-style: normal }
div.figure {
margin-left: 2em ;
margin-right: 2em }
div.footer, div.header {
clear: both;
font-size: smaller }
div.line-block {
display: block ;
margin-top: 1em ;
margin-bottom: 1em }
div.line-block div.line-block {
margin-top: 0 ;
margin-bottom: 0 ;
margin-left: 1.5em }
div.sidebar {
margin: 0 0 0.5em 1em ;
border: medium outset ;
padding: 1em ;
background-color: #ffffee ;
width: 40% ;
float: right ;
clear: right }
div.sidebar p.rubric {
font-family: sans-serif ;
font-size: medium }
div.system-messages {
margin: 5em }
div.system-messages h1 {
color: red }
div.system-message {
border: medium outset ;
padding: 1em }
div.system-message p.system-message-title {
color: red ;
font-weight: bold }
div.topic {
margin: 2em }
h1.section-subtitle, h2.section-subtitle, h3.section-subtitle,
h4.section-subtitle, h5.section-subtitle, h6.section-subtitle {
margin-top: 0.4em }
h1.title {
text-align: center }
h2.subtitle {
text-align: center }
hr.docutils {
width: 75% }
img.align-left, .figure.align-left, object.align-left {
clear: left ;
float: left ;
margin-right: 1em }
img.align-right, .figure.align-right, object.align-right {
clear: right ;
float: right ;
margin-left: 1em }
img.align-center, .figure.align-center, object.align-center {
display: block;
margin-left: auto;
margin-right: auto;
}
.align-left {
text-align: left }
.align-center {
clear: both ;
text-align: center }
.align-right {
text-align: right }
/* reset inner alignment in figures */
div.align-right {
text-align: inherit }
/* div.align-center * { */
/* text-align: left } */
ol.simple, ul.simple {
margin-bottom: 1em }
ol.arabic {
list-style: decimal }
ol.loweralpha {
list-style: lower-alpha }
ol.upperalpha {
list-style: upper-alpha }
ol.lowerroman {
list-style: lower-roman }
ol.upperroman {
list-style: upper-roman }
p.attribution {
text-align: right ;
margin-left: 50% }
p.caption {
font-style: italic }
p.credits {
font-style: italic ;
font-size: smaller }
p.label {
white-space: nowrap }
p.rubric {
font-weight: bold ;
font-size: larger ;
color: maroon ;
text-align: center }
p.sidebar-title {
font-family: sans-serif ;
font-weight: bold ;
font-size: larger }
p.sidebar-subtitle {
font-family: sans-serif ;
font-weight: bold }
p.topic-title {
font-weight: bold }
pre.address {
margin-bottom: 0 ;
margin-top: 0 ;
font: inherit }
pre.literal-block, pre.doctest-block, pre.math, pre.code {
margin-left: 2em ;
margin-right: 2em }
pre.code .ln { /* line numbers */
color: grey;
}
.code {
background-color: #eeeeee
}
span.classifier {
font-family: sans-serif ;
font-style: oblique }
span.classifier-delimiter {
font-family: sans-serif ;
font-weight: bold }
span.interpreted {
font-family: sans-serif }
span.option {
white-space: nowrap }
span.pre {
white-space: pre }
span.problematic {
color: red }
span.section-subtitle {
/* font-size relative to parent (h1..h6 element) */
font-size: 80% }
table.citation {
border-left: solid 1px gray;
margin-left: 1px }
table.docinfo {
margin: 2em 4em }
table.docutils {
margin-top: 0.5em ;
margin-bottom: 0.5em }
table.footnote {
border-left: solid 1px black;
margin-left: 1px }
table.docutils td, table.docutils th,
table.docinfo td, table.docinfo th {
padding-left: 0.5em ;
padding-right: 0.5em ;
vertical-align: top }
table.docutils th.field-name, table.docinfo th.docinfo-name {
font-weight: bold ;
text-align: left ;
white-space: nowrap ;
padding-left: 0 }
h1 tt.docutils, h2 tt.docutils, h3 tt.docutils,
h4 tt.docutils, h5 tt.docutils, h6 tt.docutils {
font-size: 100% }
ul.auto-toc {
list-style-type: none }
/*customization*/
pre.literal-block{
color: #6A6A6A;
}
</style>
</head>
<body>
<div class="document">
<div class="section" id="python-package-cppheaderparser">
<h1>Python package &quot;CppHeaderParser&quot;</h1>
<p><strong>Purpose:</strong> Parse C++ header files and generate a data structure representing the class</p>
<p><strong>Author:</strong> Jashua Cloutier</p>
<p><strong>Licence:</strong> BSD</p>
<p><strong>External modules required:</strong> PLY</p>
<p><strong>Quick start</strong>:</p>
<pre class="literal-block" width="1200px" style="max-width: 1200px">
#include &lt;vector&gt;
#include &lt;string&gt;
#define DEF_1 1
#define OS_NAME &quot;Linux&quot;
using namespace std;
class SampleClass
{
public:
SampleClass();
/*!
* Method 1
*/
string meth1();
///
/// Method 2 description
///
/// &#64;param v1 Variable 1
///
int meth2(int v1);
/**
* Method 3 description
*
* \param v1 Variable 1
* \param v2 Variable 2
*/
void meth3(const string &amp; v1, vector&lt;string&gt; &amp; v2);
/**********************************
* Method 4 description
*
* &#64;return Return value
*********************************/
unsigned int meth4();
private:
void * meth5(){return NULL};
/// prop1 description
string prop1;
//! prop5 description
int prop5;
};
namespace Alpha
{
class AlphaClass
{
public:
AlphaClass();
void alphaMethod();
string alphaString;
};
namespace Omega
{
class OmegaClass
{
public:
OmegaClass();
string omegaString;
};
};
}
int sampleFreeFunction(int i)
{
return i + 1;
}
int anotherFreeFunction(void);
}
</pre>
<p><strong>Python code</strong>:</p>
<pre class="literal-block" width="1200px" style="max-width: 1200px">
#!/usr/bin/python
import sys
sys.path = [&quot;../&quot;] + sys.path
import CppHeaderParser
try:
cppHeader = CppHeaderParser.CppHeader(&quot;SampleClass.h&quot;)
except CppHeaderParser.CppParseError, e:
print e
sys.exit(1)
print &quot;CppHeaderParser view of %s&quot;%cppHeader
sampleClass = cppHeader.classes[&quot;SampleClass&quot;]
print &quot;Number of public methods %d&quot;%(len(sampleClass[&quot;methods&quot;][&quot;public&quot;]))
print &quot;Number of private properties %d&quot;%(len(sampleClass[&quot;properties&quot;][&quot;private&quot;]))
meth3 = [m for m in sampleClass[&quot;methods&quot;][&quot;public&quot;] if m[&quot;name&quot;] == &quot;meth3&quot;][0] #get meth3
meth3ParamTypes = [t[&quot;type&quot;] for t in meth3[&quot;parameters&quot;]] #get meth3s parameters
print &quot;Parameter Types for public method meth3 %s&quot;%(meth3ParamTypes)
print &quot;\nReturn type for meth1:&quot;
print cppHeader.classes[&quot;SampleClass&quot;][&quot;methods&quot;][&quot;public&quot;][1][&quot;rtnType&quot;]
print &quot;\nDoxygen for meth2:&quot;
print cppHeader.classes[&quot;SampleClass&quot;][&quot;methods&quot;][&quot;public&quot;][2][&quot;doxygen&quot;]
print &quot;\nParameters for meth3:&quot;
print cppHeader.classes[&quot;SampleClass&quot;][&quot;methods&quot;][&quot;public&quot;][3][&quot;parameters&quot;]
print &quot;\nDoxygen for meth4:&quot;
print cppHeader.classes[&quot;SampleClass&quot;][&quot;methods&quot;][&quot;public&quot;][4][&quot;doxygen&quot;]
print &quot;\nReturn type for meth5:&quot;
print cppHeader.classes[&quot;SampleClass&quot;][&quot;methods&quot;][&quot;private&quot;][0][&quot;rtnType&quot;]
print &quot;\nDoxygen type for prop1:&quot;
print cppHeader.classes[&quot;SampleClass&quot;][&quot;properties&quot;][&quot;private&quot;][0][&quot;doxygen&quot;]
print &quot;\nType for prop5:&quot;
print cppHeader.classes[&quot;SampleClass&quot;][&quot;properties&quot;][&quot;private&quot;][1][&quot;type&quot;]
print &quot;\nNamespace for AlphaClass is:&quot;
print cppHeader.classes[&quot;AlphaClass&quot;][&quot;namespace&quot;]
print &quot;\nReturn type for alphaMethod is:&quot;
print cppHeader.classes[&quot;AlphaClass&quot;][&quot;methods&quot;][&quot;public&quot;][0][&quot;rtnType&quot;]
print &quot;\nNamespace for OmegaClass is:&quot;
print cppHeader.classes[&quot;OmegaClass&quot;][&quot;namespace&quot;]
print &quot;\nType for omegaString is:&quot;
print cppHeader.classes[&quot;AlphaClass&quot;][&quot;properties&quot;][&quot;public&quot;][0][&quot;type&quot;]
print &quot;\nFree functions are:&quot;
for func in cppHeader.functions:
print &quot; %s&quot;%func[&quot;name&quot;]
print &quot;\n#includes are:&quot;
for incl in cppHeader.includes:
print &quot; %s&quot;%incl
print &quot;\n#defines are:&quot;
for define in cppHeader.defines:
print &quot; %s&quot;%define
</pre>
<p><strong>Output</strong>:</p>
<pre class="literal-block" width="1200px" style="max-width: 1200px">
CppHeaderParser view of class SampleClass
{
public
// Methods
{'line_number': 11, 'static': False, 'rtnType': 'void', 'const': False, 'parameters': [], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': '', 'template': False, 'friend': False, 'returns_class': False, 'extern': False, 'path': 'SampleClass', 'returns_pointer': 0, 'class': None, 'name': 'SampleClass', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': True, 'debug': 'SampleClass ( ) ;', 'inline': False}
{'line_number': 15, 'static': False, 'rtnType': 'string', 'returns_unknown': True, 'const': False, 'parameters': [], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': 'string', 'template': False, 'friend': False, 'returns_class': False, 'inline': False, 'extern': False, 'path': 'SampleClass', 'class': None, 'doxygen': '/*!\n* Method 1\n*/', 'name': 'meth1', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': False, 'constructor': False, 'debug': 'string meth1 ( ) ;', 'returns_pointer': 0}
{'line_number': 22, 'static': False, 'rtnType': 'int', 'const': False, 'parameters': [{'line_number': 22, 'constant': 0, 'name': 'v1', 'reference': 0, 'type': 'int', 'static': 0, 'pointer': 0, 'desc': 'Variable 1'}], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': 'int', 'template': False, 'friend': False, 'returns_class': False, 'inline': False, 'extern': False, 'path': 'SampleClass', 'class': None, 'doxygen': '///\n/// Method 2 description\n///\n/// &#64;param v1 Variable 1\n///', 'name': 'meth2', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': False, 'debug': 'int meth2 ( int v1 ) ;', 'returns_pointer': 0}
{'line_number': 30, 'static': False, 'rtnType': 'void', 'const': False, 'parameters': [{'line_number': 30, 'constant': 1, 'name': 'v1', 'reference': 1, 'type': 'const string &amp;', 'static': 0, 'pointer': 0, 'desc': 'Variable 1'}, {'line_number': 30, 'constant': 0, 'name': 'v2', 'reference': 1, 'type': 'vector&lt;string&gt; &amp;', 'static': 0, 'pointer': 0, 'desc': 'Variable 2'}], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': 'void', 'template': False, 'friend': False, 'unresolved_parameters': True, 'returns_class': False, 'inline': False, 'extern': False, 'path': 'SampleClass', 'class': None, 'doxygen': '/**\n* Method 3 description\n*\n* \\param v1 Variable 1\n* \\param v2 Variable 2\n*/', 'name': 'meth3', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': False, 'debug': 'void meth3 ( const string &amp; v1 , vector &lt;string&gt; &amp; v2 ) ;', 'returns_pointer': 0}
{'line_number': 37, 'static': False, 'rtnType': 'unsigned int', 'const': False, 'parameters': [], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': 'unsigned int', 'template': False, 'friend': False, 'returns_class': False, 'inline': False, 'extern': False, 'path': 'SampleClass', 'class': None, 'doxygen': '/**********************************\n* Method 4 description\n*\n* &#64;return Return value\n*********************************/', 'name': 'meth4', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': False, 'debug': 'unsigned int meth4 ( ) ;', 'returns_pointer': 0}
protected
private
// Properties
{'line_number': 42, 'constant': 0, 'name': 'prop1', 'reference': 0, 'type': 'string', 'static': 0, 'pointer': 0}
{'line_number': 44, 'constant': 0, 'name': 'prop5', 'reference': 0, 'type': 'int', 'static': 0, 'pointer': 0}
// Methods
{'line_number': 39, 'static': False, 'rtnType': 'void *', 'const': False, 'parameters': [], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': 'void', 'template': False, 'friend': False, 'returns_class': False, 'extern': False, 'path': 'SampleClass', 'returns_pointer': 1, 'class': None, 'name': 'meth5', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': False, 'debug': 'void * meth5 ( ) {', 'inline': False}
}
class Alpha::AlphaClass
{
public
// Properties
{'line_number': 55, 'constant': 0, 'name': 'alphaString', 'reference': 0, 'type': 'string', 'static': 0, 'pointer': 0}
// Methods
{'line_number': 51, 'static': False, 'rtnType': 'void', 'const': False, 'parameters': [], 'namespace': 'Alpha::', 'virtual': False, 'destructor': False, 'returns': '', 'template': False, 'friend': False, 'returns_class': False, 'extern': False, 'path': 'Alpha::AlphaClass', 'returns_pointer': 0, 'class': None, 'name': 'AlphaClass', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': True, 'debug': 'AlphaClass ( ) ;', 'inline': False}
{'line_number': 53, 'static': False, 'rtnType': 'void', 'const': False, 'parameters': [], 'namespace': 'Alpha::', 'virtual': False, 'destructor': False, 'returns': 'void', 'template': False, 'friend': False, 'returns_class': False, 'extern': False, 'path': 'Alpha::AlphaClass', 'returns_pointer': 0, 'class': None, 'name': 'alphaMethod', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': False, 'debug': 'void alphaMethod ( ) ;', 'inline': False}
protected
private
}
class Alpha::Omega::OmegaClass
{
public
// Properties
{'line_number': 65, 'constant': 0, 'name': 'omegaString', 'reference': 0, 'type': 'string', 'static': 0, 'pointer': 0}
// Methods
{'line_number': 63, 'static': False, 'rtnType': 'void', 'const': False, 'parameters': [], 'namespace': 'Alpha::Omega::', 'virtual': False, 'destructor': False, 'returns': '', 'template': False, 'friend': False, 'returns_class': False, 'extern': False, 'path': 'Alpha::Omega::OmegaClass', 'returns_pointer': 0, 'class': None, 'name': 'OmegaClass', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': True, 'debug': 'OmegaClass ( ) ;', 'inline': False}
protected
private
}
// functions
{'line_number': 70, 'static': False, 'rtnType': 'int', 'const': False, 'parameters': [{'line_number': 70, 'constant': 0, 'name': 'i', 'reference': 0, 'type': 'int', 'static': 0, 'pointer': 0}], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': 'int', 'template': False, 'friend': False, 'returns_class': False, 'extern': False, 'returns_pointer': 0, 'class': None, 'name': 'sampleFreeFunction', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': False, 'debug': 'int sampleFreeFunction ( int i ) {', 'inline': False}
{'line_number': 75, 'static': False, 'rtnType': 'int', 'const': False, 'parameters': [{'line_number': 75, 'constant': 0, 'name': '', 'reference': 0, 'type': 'void', 'static': 0, 'pointer': 0}], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': 'int', 'template': False, 'friend': False, 'returns_class': False, 'extern': False, 'returns_pointer': 0, 'class': None, 'name': 'anotherFreeFunction', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': False, 'debug': 'int anotherFreeFunction ( void ) ;', 'inline': False}
Number of public methods 5
Number of private properties 2
Parameter Types for public method meth3 ['const string &amp;', 'vector&lt;string&gt; &amp;']
Return type for meth1:
string
Doxygen for meth2:
///
/// Method 2 description
///
/// &#64;param v1 Variable 1
///
Parameters for meth3:
[{'line_number': 30, 'constant': 1, 'name': 'v1', 'reference': 1, 'type': 'const string &amp;', 'static': 0, 'pointer': 0, 'desc': 'Variable 1'}, {'line_number': 30, 'constant': 0, 'name': 'v2', 'reference': 1, 'type': 'vector&lt;string&gt; &amp;', 'static': 0, 'pointer': 0, 'desc': 'Variable 2'}]
Doxygen for meth4:
/**********************************
* Method 4 description
*
* &#64;return Return value
*********************************/
Return type for meth5:
void *
Doxygen type for prop1:
/// prop1 description
Type for prop5:
int
Namespace for AlphaClass is:
Alpha
Return type for alphaMethod is:
void
Namespace for OmegaClass is:
Alpha::Omega
Type for omegaString is:
string
Free functions are:
sampleFreeFunction
anotherFreeFunction
#includes are:
&lt;vector&gt;
&lt;string&gt;
#defines are:
DEF_1 1
OS_NAME &quot;Linux&quot;
</pre>
</div>
<div class="section" id="contributors">
<h1>Contributors</h1>
<ul class="simple">
<li>Chris Love</li>
<li>HartsAntler</li>
</ul>
</div>
</div>
</body>
</html>

266
cppParser/README.txt Normal file
View File

@ -0,0 +1,266 @@
Python package "CppHeaderParser"
--------------------------------
**Purpose:** Parse C++ header files and generate a data structure representing the class
**Author:** Jashua Cloutier
**Licence:** BSD
**External modules required:** PLY
**Quick start**::
#include <vector>
#include <string>
#define DEF_1 1
#define OS_NAME "Linux"
using namespace std;
class SampleClass
{
public:
SampleClass();
/*!
* Method 1
*/
string meth1();
///
/// Method 2 description
///
/// @param v1 Variable 1
///
int meth2(int v1);
/**
* Method 3 description
*
* \param v1 Variable 1
* \param v2 Variable 2
*/
void meth3(const string & v1, vector<string> & v2);
/**********************************
* Method 4 description
*
* @return Return value
*********************************/
unsigned int meth4();
private:
void * meth5(){return NULL};
/// prop1 description
string prop1;
//! prop5 description
int prop5;
};
namespace Alpha
{
class AlphaClass
{
public:
AlphaClass();
void alphaMethod();
string alphaString;
};
namespace Omega
{
class OmegaClass
{
public:
OmegaClass();
string omegaString;
};
};
}
int sampleFreeFunction(int i)
{
return i + 1;
}
int anotherFreeFunction(void);
}
**Python code**::
#!/usr/bin/python
import sys
sys.path = ["../"] + sys.path
import CppHeaderParser
try:
cppHeader = CppHeaderParser.CppHeader("SampleClass.h")
except CppHeaderParser.CppParseError, e:
print e
sys.exit(1)
print "CppHeaderParser view of %s"%cppHeader
sampleClass = cppHeader.classes["SampleClass"]
print "Number of public methods %d"%(len(sampleClass["methods"]["public"]))
print "Number of private properties %d"%(len(sampleClass["properties"]["private"]))
meth3 = [m for m in sampleClass["methods"]["public"] if m["name"] == "meth3"][0] #get meth3
meth3ParamTypes = [t["type"] for t in meth3["parameters"]] #get meth3s parameters
print "Parameter Types for public method meth3 %s"%(meth3ParamTypes)
print "\nReturn type for meth1:"
print cppHeader.classes["SampleClass"]["methods"]["public"][1]["rtnType"]
print "\nDoxygen for meth2:"
print cppHeader.classes["SampleClass"]["methods"]["public"][2]["doxygen"]
print "\nParameters for meth3:"
print cppHeader.classes["SampleClass"]["methods"]["public"][3]["parameters"]
print "\nDoxygen for meth4:"
print cppHeader.classes["SampleClass"]["methods"]["public"][4]["doxygen"]
print "\nReturn type for meth5:"
print cppHeader.classes["SampleClass"]["methods"]["private"][0]["rtnType"]
print "\nDoxygen type for prop1:"
print cppHeader.classes["SampleClass"]["properties"]["private"][0]["doxygen"]
print "\nType for prop5:"
print cppHeader.classes["SampleClass"]["properties"]["private"][1]["type"]
print "\nNamespace for AlphaClass is:"
print cppHeader.classes["AlphaClass"]["namespace"]
print "\nReturn type for alphaMethod is:"
print cppHeader.classes["AlphaClass"]["methods"]["public"][0]["rtnType"]
print "\nNamespace for OmegaClass is:"
print cppHeader.classes["OmegaClass"]["namespace"]
print "\nType for omegaString is:"
print cppHeader.classes["AlphaClass"]["properties"]["public"][0]["type"]
print "\nFree functions are:"
for func in cppHeader.functions:
print " %s"%func["name"]
print "\n#includes are:"
for incl in cppHeader.includes:
print " %s"%incl
print "\n#defines are:"
for define in cppHeader.defines:
print " %s"%define
**Output**::
CppHeaderParser view of class SampleClass
{
public
// Methods
{'line_number': 11, 'static': False, 'rtnType': 'void', 'const': False, 'parameters': [], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': '', 'template': False, 'friend': False, 'returns_class': False, 'extern': False, 'path': 'SampleClass', 'returns_pointer': 0, 'class': None, 'name': 'SampleClass', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': True, 'debug': 'SampleClass ( ) ;', 'inline': False}
{'line_number': 15, 'static': False, 'rtnType': 'string', 'returns_unknown': True, 'const': False, 'parameters': [], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': 'string', 'template': False, 'friend': False, 'returns_class': False, 'inline': False, 'extern': False, 'path': 'SampleClass', 'class': None, 'doxygen': '/*!\n* Method 1\n*/', 'name': 'meth1', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': False, 'constructor': False, 'debug': 'string meth1 ( ) ;', 'returns_pointer': 0}
{'line_number': 22, 'static': False, 'rtnType': 'int', 'const': False, 'parameters': [{'line_number': 22, 'constant': 0, 'name': 'v1', 'reference': 0, 'type': 'int', 'static': 0, 'pointer': 0, 'desc': 'Variable 1'}], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': 'int', 'template': False, 'friend': False, 'returns_class': False, 'inline': False, 'extern': False, 'path': 'SampleClass', 'class': None, 'doxygen': '///\n/// Method 2 description\n///\n/// @param v1 Variable 1\n///', 'name': 'meth2', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': False, 'debug': 'int meth2 ( int v1 ) ;', 'returns_pointer': 0}
{'line_number': 30, 'static': False, 'rtnType': 'void', 'const': False, 'parameters': [{'line_number': 30, 'constant': 1, 'name': 'v1', 'reference': 1, 'type': 'const string &', 'static': 0, 'pointer': 0, 'desc': 'Variable 1'}, {'line_number': 30, 'constant': 0, 'name': 'v2', 'reference': 1, 'type': 'vector<string> &', 'static': 0, 'pointer': 0, 'desc': 'Variable 2'}], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': 'void', 'template': False, 'friend': False, 'unresolved_parameters': True, 'returns_class': False, 'inline': False, 'extern': False, 'path': 'SampleClass', 'class': None, 'doxygen': '/**\n* Method 3 description\n*\n* \\param v1 Variable 1\n* \\param v2 Variable 2\n*/', 'name': 'meth3', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': False, 'debug': 'void meth3 ( const string & v1 , vector <string> & v2 ) ;', 'returns_pointer': 0}
{'line_number': 37, 'static': False, 'rtnType': 'unsigned int', 'const': False, 'parameters': [], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': 'unsigned int', 'template': False, 'friend': False, 'returns_class': False, 'inline': False, 'extern': False, 'path': 'SampleClass', 'class': None, 'doxygen': '/**********************************\n* Method 4 description\n*\n* @return Return value\n*********************************/', 'name': 'meth4', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': False, 'debug': 'unsigned int meth4 ( ) ;', 'returns_pointer': 0}
protected
private
// Properties
{'line_number': 42, 'constant': 0, 'name': 'prop1', 'reference': 0, 'type': 'string', 'static': 0, 'pointer': 0}
{'line_number': 44, 'constant': 0, 'name': 'prop5', 'reference': 0, 'type': 'int', 'static': 0, 'pointer': 0}
// Methods
{'line_number': 39, 'static': False, 'rtnType': 'void *', 'const': False, 'parameters': [], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': 'void', 'template': False, 'friend': False, 'returns_class': False, 'extern': False, 'path': 'SampleClass', 'returns_pointer': 1, 'class': None, 'name': 'meth5', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': False, 'debug': 'void * meth5 ( ) {', 'inline': False}
}
class Alpha::AlphaClass
{
public
// Properties
{'line_number': 55, 'constant': 0, 'name': 'alphaString', 'reference': 0, 'type': 'string', 'static': 0, 'pointer': 0}
// Methods
{'line_number': 51, 'static': False, 'rtnType': 'void', 'const': False, 'parameters': [], 'namespace': 'Alpha::', 'virtual': False, 'destructor': False, 'returns': '', 'template': False, 'friend': False, 'returns_class': False, 'extern': False, 'path': 'Alpha::AlphaClass', 'returns_pointer': 0, 'class': None, 'name': 'AlphaClass', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': True, 'debug': 'AlphaClass ( ) ;', 'inline': False}
{'line_number': 53, 'static': False, 'rtnType': 'void', 'const': False, 'parameters': [], 'namespace': 'Alpha::', 'virtual': False, 'destructor': False, 'returns': 'void', 'template': False, 'friend': False, 'returns_class': False, 'extern': False, 'path': 'Alpha::AlphaClass', 'returns_pointer': 0, 'class': None, 'name': 'alphaMethod', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': False, 'debug': 'void alphaMethod ( ) ;', 'inline': False}
protected
private
}
class Alpha::Omega::OmegaClass
{
public
// Properties
{'line_number': 65, 'constant': 0, 'name': 'omegaString', 'reference': 0, 'type': 'string', 'static': 0, 'pointer': 0}
// Methods
{'line_number': 63, 'static': False, 'rtnType': 'void', 'const': False, 'parameters': [], 'namespace': 'Alpha::Omega::', 'virtual': False, 'destructor': False, 'returns': '', 'template': False, 'friend': False, 'returns_class': False, 'extern': False, 'path': 'Alpha::Omega::OmegaClass', 'returns_pointer': 0, 'class': None, 'name': 'OmegaClass', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': True, 'debug': 'OmegaClass ( ) ;', 'inline': False}
protected
private
}
// functions
{'line_number': 70, 'static': False, 'rtnType': 'int', 'const': False, 'parameters': [{'line_number': 70, 'constant': 0, 'name': 'i', 'reference': 0, 'type': 'int', 'static': 0, 'pointer': 0}], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': 'int', 'template': False, 'friend': False, 'returns_class': False, 'extern': False, 'returns_pointer': 0, 'class': None, 'name': 'sampleFreeFunction', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': False, 'debug': 'int sampleFreeFunction ( int i ) {', 'inline': False}
{'line_number': 75, 'static': False, 'rtnType': 'int', 'const': False, 'parameters': [{'line_number': 75, 'constant': 0, 'name': '', 'reference': 0, 'type': 'void', 'static': 0, 'pointer': 0}], 'namespace': '', 'virtual': False, 'destructor': False, 'returns': 'int', 'template': False, 'friend': False, 'returns_class': False, 'extern': False, 'returns_pointer': 0, 'class': None, 'name': 'anotherFreeFunction', 'pure_virtual': False, 'explicit': False, 'returns_fundamental': True, 'constructor': False, 'debug': 'int anotherFreeFunction ( void ) ;', 'inline': False}
Number of public methods 5
Number of private properties 2
Parameter Types for public method meth3 ['const string &', 'vector<string> &']
Return type for meth1:
string
Doxygen for meth2:
///
/// Method 2 description
///
/// @param v1 Variable 1
///
Parameters for meth3:
[{'line_number': 30, 'constant': 1, 'name': 'v1', 'reference': 1, 'type': 'const string &', 'static': 0, 'pointer': 0, 'desc': 'Variable 1'}, {'line_number': 30, 'constant': 0, 'name': 'v2', 'reference': 1, 'type': 'vector<string> &', 'static': 0, 'pointer': 0, 'desc': 'Variable 2'}]
Doxygen for meth4:
/**********************************
* Method 4 description
*
* @return Return value
*********************************/
Return type for meth5:
void *
Doxygen type for prop1:
/// prop1 description
Type for prop5:
int
Namespace for AlphaClass is:
Alpha
Return type for alphaMethod is:
void
Namespace for OmegaClass is:
Alpha::Omega
Type for omegaString is:
string
Free functions are:
sampleFreeFunction
anotherFreeFunction
#includes are:
<vector>
<string>
#defines are:
DEF_1 1
OS_NAME "Linux"
Contributors
------------
* Chris Love
* HartsAntler

43
cppParser/setup.py Executable file
View File

@ -0,0 +1,43 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys, glob
from distutils.core import setup
DESCRIPTION = (
'Parse C++ header files and generate a data structure '
'representing the class'
)
CLASSIFIERS = [
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 3',
'Programming Language :: C++',
'License :: OSI Approved :: BSD License',
'Development Status :: 5 - Production/Stable',
'Intended Audience :: Developers',
'Topic :: Software Development',
'Topic :: Software Development :: Code Generators',
'Topic :: Software Development :: Compilers',
'Topic :: Software Development :: Disassemblers'
]
setup(
name = 'CppHeaderParser',
version = '2.4',
author = 'Jashua Cloutier',
author_email = 'jashuac@bellsouth.net',
url = 'http://senexcanis.com/open-source/cppheaderparser/',
description = DESCRIPTION,
long_description = open('README.txt').read(),
license = 'BSD',
platforms = 'Platform Independent',
packages = ['CppHeaderParser'],
keywords = 'c++ header parser ply',
classifiers = CLASSIFIERS,
requires = ['ply'],
package_data = { 'CppHeaderParser': ['README', 'README.html', 'doc/*.*', 'examples/*.*'], },
)

View File

@ -1,8 +1,11 @@
#!/usr/bin/python
import lutinDebug as debug
import sys
import CppHeaderParser
import lutinTools
# TODO : Add try of generic input ...
sys.path.append(lutinTools.GetCurrentPath(__file__) + "/ply/ply/")
sys.path.append(lutinTools.GetCurrentPath(__file__) + "/cppParser/CppheaderParser/")
import CppHeaderParser
def writeExpendSize(data, size) :
ret = data
@ -10,25 +13,25 @@ def writeExpendSize(data, size) :
ret += " "
return ret
def displayReductFunction(className, function, file, classement, sizeReturn, sizefunction) :
lineData = "\t" + classement + " "
def displayReductFunction(function, file, classement, sizeReturn, sizefunction) :
lineData = classement + " "
if function['destructor'] :
lineData += writeExpendSize("", sizeReturn+1)
elif function['constructor'] :
lineData += writeExpendSize("", sizeReturn)
lineData += "~"
elif function['constructor'] :
lineData += writeExpendSize("", sizeReturn+1)
else :
lineData += writeExpendSize(function["rtnType"], sizeReturn+1)
lineData += writeExpendSize(function["name"], sizefunction+1)
lineData += "("
file.write(lineData);
file.write("\t" + lineData);
parameterPos = len(lineData);
isFirst = True
for param in function["parameters"]:
if isFirst == False:
file.write(",\n")
file.write(",\n\t")
file.write(writeExpendSize("",parameterPos))
file.write(param['type'])
if param['name'] != "":
@ -38,6 +41,52 @@ def displayReductFunction(className, function, file, classement, sizeReturn, siz
file.write(");")
file.write("\n")
def displayFunction(namespace, function, file, classement, sizeReturn, sizefunction) :
lineData = ""
if namespace != "":
lineData = namespace + "::"
if function['destructor'] :
lineData += "~"
lineData += function["name"] + "( ... )"
file.write(lineData + "\n")
for iii in range(0, len(lineData)):
file.write(".")
file.write("\n\n")
if function['destructor'] :
lineData = "~"
elif function['constructor'] :
lineData = ""
else :
lineData = function["rtnType"] + " "
lineData += function["name"]
lineData += "("
file.write("\t" + lineData);
parameterPos = len(lineData);
isFirst = True
for param in function["parameters"]:
if isFirst == False:
file.write(",\n\t")
file.write(writeExpendSize("",parameterPos))
file.write(param['type'])
if param['name'] != "":
file.write(" ")
file.write(param['name'])
isFirst = False
file.write(");")
file.write("\n\n")
if "doxygen" in function:
# TODO : parse doxygen ...
file.write(function["doxygen"])
file.write("\n")
file.write("\n")
def calsulateSizeFunction(function, size) :
if len(function["name"]) > size:
return len(function["name"])+1
@ -58,7 +107,7 @@ def GenerateDocFile(filename, outFolder) :
lutinTools.CreateDirectoryOfFile(outFolder+"/");
for element in metaData.classes:
classFileName = outFolder + "/";
classFileName = outFolder + "/"
localClass = metaData.classes[element]
if localClass['namespace'] == "":
className = localClass['name']
@ -96,15 +145,29 @@ def GenerateDocFile(filename, outFolder) :
# display all functions :
# TODO: ...
for function in localClass["methods"]["public"]:
displayReductFunction(localClass['name'], function, file, "public: ", sizeReturn, sizefunction)
displayReductFunction(function, file, "public: ", sizeReturn, sizefunction)
for function in localClass["methods"]["protected"]:
displayReductFunction(localClass['name'], function, file, "protected:", sizeReturn, sizefunction)
displayReductFunction(function, file, "protected:", sizeReturn, sizefunction)
for function in localClass["methods"]["private"]:
displayReductFunction(localClass['name'], function, file, "private: ", sizeReturn, sizefunction)
displayReductFunction(function, file, "private: ", sizeReturn, sizefunction)
file.write("\n")
file.write("\n")
if len(localClass['inherits']) != 0:
file.write("Object Hierarchy:\n")
file.write("-----------------\n")
file.write("\n")
for heritedClass in localClass['inherits']:
file.write("\t" + heritedClass['class'] + "\n")
file.write("\t |\n")
file.write("\t +--> " + localClass['name'] + "\n")
file.write("\n")
file.write("\n")
"""
file.write("Signals:\n")
file.write("--------\n")
file.write("\n")
@ -116,22 +179,34 @@ def GenerateDocFile(filename, outFolder) :
file.write("\n")
# display all configuration :
# TODO: ...
"""
file.write("Description:\n")
file.write("------------\n")
file.write("\n")
# display Class description :
# TODO: ...
if "doxygen" in localClass:
file.write("Description:\n")
file.write("------------\n")
file.write("\n")
# display Class description :
file.write(localClass["doxygen"])
file.write("\n")
file.write("\n")
file.write("Detail:\n")
file.write("-------\n")
file.write("\n")
# display all the class internal functions :
# TODO: ...
for function in localClass["methods"]["public"]:
displayFunction(localClass['namespace'] , function, file, "public: ", sizeReturn, sizefunction)
file.write("\n________________________________________________________________________\n\n")
for function in localClass["methods"]["protected"]:
displayFunction(localClass['namespace'] , function, file, "protected:", sizeReturn, sizefunction)
file.write("\n________________________________________________________________________\n\n")
for function in localClass["methods"]["private"]:
displayFunction(localClass['namespace'] , function, file, "private: ", sizeReturn, sizefunction)
file.write("\n________________________________________________________________________\n\n")
if len(localClass['inherits']) != 0:
for heritedClass in localClass['inherits']:
debug.debug(" heritage : " + str(heritedClass['class']))

40
ply/ANNOUNCE Normal file
View File

@ -0,0 +1,40 @@
February 17, 2011
Announcing : PLY-3.4 (Python Lex-Yacc)
http://www.dabeaz.com/ply
I'm pleased to announce PLY-3.4--a pure Python implementation of the
common parsing tools lex and yacc. PLY-3.4 is a minor bug fix
release. It supports both Python 2 and Python 3.
If you are new to PLY, here are a few highlights:
- PLY is closely modeled after traditional lex/yacc. If you know how
to use these or similar tools in other languages, you will find
PLY to be comparable.
- PLY provides very extensive error reporting and diagnostic
information to assist in parser construction. The original
implementation was developed for instructional purposes. As
a result, the system tries to identify the most common types
of errors made by novice users.
- PLY provides full support for empty productions, error recovery,
precedence rules, and ambiguous grammars.
- Parsing is based on LR-parsing which is fast, memory efficient,
better suited to large grammars, and which has a number of nice
properties when dealing with syntax errors and other parsing
problems. Currently, PLY can build its parsing tables using
either SLR or LALR(1) algorithms.
More information about PLY can be obtained on the PLY webpage at:
http://www.dabeaz.com/ply
PLY is freely available.
Cheers,
David Beazley (http://www.dabeaz.com)

1093
ply/CHANGES Normal file

File diff suppressed because it is too large Load Diff

22
ply/PKG-INFO Normal file
View File

@ -0,0 +1,22 @@
Metadata-Version: 1.0
Name: ply
Version: 3.4
Summary: Python Lex & Yacc
Home-page: http://www.dabeaz.com/ply/
Author: David Beazley
Author-email: dave@dabeaz.com
License: BSD
Description:
PLY is yet another implementation of lex and yacc for Python. Some notable
features include the fact that its implemented entirely in Python and it
uses LALR(1) parsing which is efficient and well suited for larger grammars.
PLY provides most of the standard lex/yacc features including support for empty
productions, precedence rules, error recovery, and support for ambiguous grammars.
PLY is extremely easy to use and provides very extensive error checking.
It is compatible with both Python 2 and Python 3.
Platform: UNKNOWN
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 2

271
ply/README Normal file
View File

@ -0,0 +1,271 @@
PLY (Python Lex-Yacc) Version 3.4
Copyright (C) 2001-2011,
David M. Beazley (Dabeaz LLC)
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the David Beazley or Dabeaz LLC may be used to
endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Introduction
============
PLY is a 100% Python implementation of the common parsing tools lex
and yacc. Here are a few highlights:
- PLY is very closely modeled after traditional lex/yacc.
If you know how to use these tools in C, you will find PLY
to be similar.
- PLY provides *very* extensive error reporting and diagnostic
information to assist in parser construction. The original
implementation was developed for instructional purposes. As
a result, the system tries to identify the most common types
of errors made by novice users.
- PLY provides full support for empty productions, error recovery,
precedence specifiers, and moderately ambiguous grammars.
- Parsing is based on LR-parsing which is fast, memory efficient,
better suited to large grammars, and which has a number of nice
properties when dealing with syntax errors and other parsing problems.
Currently, PLY builds its parsing tables using the LALR(1)
algorithm used in yacc.
- PLY uses Python introspection features to build lexers and parsers.
This greatly simplifies the task of parser construction since it reduces
the number of files and eliminates the need to run a separate lex/yacc
tool before running your program.
- PLY can be used to build parsers for "real" programming languages.
Although it is not ultra-fast due to its Python implementation,
PLY can be used to parse grammars consisting of several hundred
rules (as might be found for a language like C). The lexer and LR
parser are also reasonably efficient when parsing typically
sized programs. People have used PLY to build parsers for
C, C++, ADA, and other real programming languages.
How to Use
==========
PLY consists of two files : lex.py and yacc.py. These are contained
within the 'ply' directory which may also be used as a Python package.
To use PLY, simply copy the 'ply' directory to your project and import
lex and yacc from the associated 'ply' package. For example:
import ply.lex as lex
import ply.yacc as yacc
Alternatively, you can copy just the files lex.py and yacc.py
individually and use them as modules. For example:
import lex
import yacc
The file setup.py can be used to install ply using distutils.
The file doc/ply.html contains complete documentation on how to use
the system.
The example directory contains several different examples including a
PLY specification for ANSI C as given in K&R 2nd Ed.
A simple example is found at the end of this document
Requirements
============
PLY requires the use of Python 2.2 or greater. However, you should
use the latest Python release if possible. It should work on just
about any platform. PLY has been tested with both CPython and Jython.
It also seems to work with IronPython.
Resources
=========
More information about PLY can be obtained on the PLY webpage at:
http://www.dabeaz.com/ply
For a detailed overview of parsing theory, consult the excellent
book "Compilers : Principles, Techniques, and Tools" by Aho, Sethi, and
Ullman. The topics found in "Lex & Yacc" by Levine, Mason, and Brown
may also be useful.
A Google group for PLY can be found at
http://groups.google.com/group/ply-hack
Acknowledgments
===============
A special thanks is in order for all of the students in CS326 who
suffered through about 25 different versions of these tools :-).
The CHANGES file acknowledges those who have contributed patches.
Elias Ioup did the first implementation of LALR(1) parsing in PLY-1.x.
Andrew Waters and Markus Schoepflin were instrumental in reporting bugs
and testing a revised LALR(1) implementation for PLY-2.0.
Special Note for PLY-3.0
========================
PLY-3.0 the first PLY release to support Python 3. However, backwards
compatibility with Python 2.2 is still preserved. PLY provides dual
Python 2/3 compatibility by restricting its implementation to a common
subset of basic language features. You should not convert PLY using
2to3--it is not necessary and may in fact break the implementation.
Example
=======
Here is a simple example showing a PLY implementation of a calculator
with variables.
# -----------------------------------------------------------------------------
# calc.py
#
# A simple calculator with variables.
# -----------------------------------------------------------------------------
tokens = (
'NAME','NUMBER',
'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
'LPAREN','RPAREN',
)
# Tokens
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_EQUALS = r'='
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
t.value = int(t.value)
return t
# Ignored characters
t_ignore = " \t"
def t_newline(t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
import ply.lex as lex
lex.lex()
# Precedence rules for the arithmetic operators
precedence = (
('left','PLUS','MINUS'),
('left','TIMES','DIVIDE'),
('right','UMINUS'),
)
# dictionary of names (for storing variables)
names = { }
def p_statement_assign(p):
'statement : NAME EQUALS expression'
names[p[1]] = p[3]
def p_statement_expr(p):
'statement : expression'
print(p[1])
def p_expression_binop(p):
'''expression : expression PLUS expression
| expression MINUS expression
| expression TIMES expression
| expression DIVIDE expression'''
if p[2] == '+' : p[0] = p[1] + p[3]
elif p[2] == '-': p[0] = p[1] - p[3]
elif p[2] == '*': p[0] = p[1] * p[3]
elif p[2] == '/': p[0] = p[1] / p[3]
def p_expression_uminus(p):
'expression : MINUS expression %prec UMINUS'
p[0] = -p[2]
def p_expression_group(p):
'expression : LPAREN expression RPAREN'
p[0] = p[2]
def p_expression_number(p):
'expression : NUMBER'
p[0] = p[1]
def p_expression_name(p):
'expression : NAME'
try:
p[0] = names[p[1]]
except LookupError:
print("Undefined name '%s'" % p[1])
p[0] = 0
def p_error(p):
print("Syntax error at '%s'" % p.value)
import ply.yacc as yacc
yacc.yacc()
while 1:
try:
s = raw_input('calc > ') # use input() on Python 3
except EOFError:
break
yacc.parse(s)
Bug Reports and Patches
=======================
My goal with PLY is to simply have a decent lex/yacc implementation
for Python. As a general rule, I don't spend huge amounts of time
working on it unless I receive very specific bug reports and/or
patches to fix problems. I also try to incorporate submitted feature
requests and enhancements into each new version. To contact me about
bugs and/or new features, please send email to dave@dabeaz.com.
In addition there is a Google group for discussing PLY related issues at
http://groups.google.com/group/ply-hack
-- Dave

16
ply/TODO Normal file
View File

@ -0,0 +1,16 @@
The PLY to-do list:
1. Finish writing the C Preprocessor module. Started in the
file ply/cpp.py
2. Create and document libraries of useful tokens.
3. Expand the examples/yply tool that parses bison/yacc
files.
4. Think of various diabolical things to do with the
new yacc internals. For example, it is now possible
to specify grammrs using completely different schemes
than the reflection approach used by PLY.

874
ply/doc/internal.html Normal file
View File

@ -0,0 +1,874 @@
<html>
<head>
<title>PLY Internals</title>
</head>
<body bgcolor="#ffffff">
<h1>PLY Internals</h1>
<b>
David M. Beazley <br>
dave@dabeaz.com<br>
</b>
<p>
<b>PLY Version: 3.0</b>
<p>
<!-- INDEX -->
<div class="sectiontoc">
<ul>
<li><a href="#internal_nn1">Introduction</a>
<li><a href="#internal_nn2">Grammar Class</a>
<li><a href="#internal_nn3">Productions</a>
<li><a href="#internal_nn4">LRItems</a>
<li><a href="#internal_nn5">LRTable</a>
<li><a href="#internal_nn6">LRGeneratedTable</a>
<li><a href="#internal_nn7">LRParser</a>
<li><a href="#internal_nn8">ParserReflect</a>
<li><a href="#internal_nn9">High-level operation</a>
</ul>
</div>
<!-- INDEX -->
<H2><a name="internal_nn1"></a>1. Introduction</H2>
This document describes classes and functions that make up the internal
operation of PLY. Using this programming interface, it is possible to
manually build an parser using a different interface specification
than what PLY normally uses. For example, you could build a gramar
from information parsed in a completely different input format. Some of
these objects may be useful for building more advanced parsing engines
such as GLR.
<p>
It should be stressed that using PLY at this level is not for the
faint of heart. Generally, it's assumed that you know a bit of
the underlying compiler theory and how an LR parser is put together.
<H2><a name="internal_nn2"></a>2. Grammar Class</H2>
The file <tt>ply.yacc</tt> defines a class <tt>Grammar</tt> that
is used to hold and manipulate information about a grammar
specification. It encapsulates the same basic information
about a grammar that is put into a YACC file including
the list of tokens, precedence rules, and grammar rules.
Various operations are provided to perform different validations
on the grammar. In addition, there are operations to compute
the first and follow sets that are needed by the various table
generation algorithms.
<p>
<tt><b>Grammar(terminals)</b></tt>
<blockquote>
Creates a new grammar object. <tt>terminals</tt> is a list of strings
specifying the terminals for the grammar. An instance <tt>g</tt> of
<tt>Grammar</tt> has the following methods:
</blockquote>
<p>
<b><tt>g.set_precedence(term,assoc,level)</tt></b>
<blockquote>
Sets the precedence level and associativity for a given terminal <tt>term</tt>.
<tt>assoc</tt> is one of <tt>'right'</tt>,
<tt>'left'</tt>, or <tt>'nonassoc'</tt> and <tt>level</tt> is a positive integer. The higher
the value of <tt>level</tt>, the higher the precedence. Here is an example of typical
precedence settings:
<pre>
g.set_precedence('PLUS', 'left',1)
g.set_precedence('MINUS', 'left',1)
g.set_precedence('TIMES', 'left',2)
g.set_precedence('DIVIDE','left',2)
g.set_precedence('UMINUS','left',3)
</pre>
This method must be called prior to adding any productions to the
grammar with <tt>g.add_production()</tt>. The precedence of individual grammar
rules is determined by the precedence of the right-most terminal.
</blockquote>
<p>
<b><tt>g.add_production(name,syms,func=None,file='',line=0)</tt></b>
<blockquote>
Adds a new grammar rule. <tt>name</tt> is the name of the rule,
<tt>syms</tt> is a list of symbols making up the right hand
side of the rule, <tt>func</tt> is the function to call when
reducing the rule. <tt>file</tt> and <tt>line</tt> specify
the filename and line number of the rule and are used for
generating error messages.
<p>
The list of symbols in <tt>syms</tt> may include character
literals and <tt>%prec</tt> specifiers. Here are some
examples:
<pre>
g.add_production('expr',['expr','PLUS','term'],func,file,line)
g.add_production('expr',['expr','"+"','term'],func,file,line)
g.add_production('expr',['MINUS','expr','%prec','UMINUS'],func,file,line)
</pre>
<p>
If any kind of error is detected, a <tt>GrammarError</tt> exception
is raised with a message indicating the reason for the failure.
</blockquote>
<p>
<b><tt>g.set_start(start=None)</tt></b>
<blockquote>
Sets the starting rule for the grammar. <tt>start</tt> is a string
specifying the name of the start rule. If <tt>start</tt> is omitted,
the first grammar rule added with <tt>add_production()</tt> is taken to be
the starting rule. This method must always be called after all
productions have been added.
</blockquote>
<p>
<b><tt>g.find_unreachable()</tt></b>
<blockquote>
Diagnostic function. Returns a list of all unreachable non-terminals
defined in the grammar. This is used to identify inactive parts of
the grammar specification.
</blockquote>
<p>
<b><tt>g.infinite_cycle()</tt></b>
<blockquote>
Diagnostic function. Returns a list of all non-terminals in the
grammar that result in an infinite cycle. This condition occurs if
there is no way for a grammar rule to expand to a string containing
only terminal symbols.
</blockquote>
<p>
<b><tt>g.undefined_symbols()</tt></b>
<blockquote>
Diagnostic function. Returns a list of tuples <tt>(name, prod)</tt>
corresponding to undefined symbols in the grammar. <tt>name</tt> is the
name of the undefined symbol and <tt>prod</tt> is an instance of
<tt>Production</tt> which has information about the production rule
where the undefined symbol was used.
</blockquote>
<p>
<b><tt>g.unused_terminals()</tt></b>
<blockquote>
Diagnostic function. Returns a list of terminals that were defined,
but never used in the grammar.
</blockquote>
<p>
<b><tt>g.unused_rules()</tt></b>
<blockquote>
Diagnostic function. Returns a list of <tt>Production</tt> instances
corresponding to production rules that were defined in the grammar,
but never used anywhere. This is slightly different
than <tt>find_unreachable()</tt>.
</blockquote>
<p>
<b><tt>g.unused_precedence()</tt></b>
<blockquote>
Diagnostic function. Returns a list of tuples <tt>(term, assoc)</tt>
corresponding to precedence rules that were set, but never used the
grammar. <tt>term</tt> is the terminal name and <tt>assoc</tt> is the
precedence associativity (e.g., <tt>'left'</tt>, <tt>'right'</tt>,
or <tt>'nonassoc'</tt>.
</blockquote>
<p>
<b><tt>g.compute_first()</tt></b>
<blockquote>
Compute all of the first sets for all symbols in the grammar. Returns a dictionary
mapping symbol names to a list of all first symbols.
</blockquote>
<p>
<b><tt>g.compute_follow()</tt></b>
<blockquote>
Compute all of the follow sets for all non-terminals in the grammar.
The follow set is the set of all possible symbols that might follow a
given non-terminal. Returns a dictionary mapping non-terminal names
to a list of symbols.
</blockquote>
<p>
<b><tt>g.build_lritems()</tt></b>
<blockquote>
Calculates all of the LR items for all productions in the grammar. This
step is required before using the grammar for any kind of table generation.
See the section on LR items below.
</blockquote>
<p>
The following attributes are set by the above methods and may be useful
in code that works with the grammar. All of these attributes should be
assumed to be read-only. Changing their values directly will likely
break the grammar.
<p>
<b><tt>g.Productions</tt></b>
<blockquote>
A list of all productions added. The first entry is reserved for
a production representing the starting rule. The objects in this list
are instances of the <tt>Production</tt> class, described shortly.
</blockquote>
<p>
<b><tt>g.Prodnames</tt></b>
<blockquote>
A dictionary mapping the names of nonterminals to a list of all
productions of that nonterminal.
</blockquote>
<p>
<b><tt>g.Terminals</tt></b>
<blockquote>
A dictionary mapping the names of terminals to a list of the
production numbers where they are used.
</blockquote>
<p>
<b><tt>g.Nonterminals</tt></b>
<blockquote>
A dictionary mapping the names of nonterminals to a list of the
production numbers where they are used.
</blockquote>
<p>
<b><tt>g.First</tt></b>
<blockquote>
A dictionary representing the first sets for all grammar symbols. This is
computed and returned by the <tt>compute_first()</tt> method.
</blockquote>
<p>
<b><tt>g.Follow</tt></b>
<blockquote>
A dictionary representing the follow sets for all grammar rules. This is
computed and returned by the <tt>compute_follow()</tt> method.
</blockquote>
<p>
<b><tt>g.Start</tt></b>
<blockquote>
Starting symbol for the grammar. Set by the <tt>set_start()</tt> method.
</blockquote>
For the purposes of debugging, a <tt>Grammar</tt> object supports the <tt>__len__()</tt> and
<tt>__getitem__()</tt> special methods. Accessing <tt>g[n]</tt> returns the nth production
from the grammar.
<H2><a name="internal_nn3"></a>3. Productions</H2>
<tt>Grammar</tt> objects store grammar rules as instances of a <tt>Production</tt> class. This
class has no public constructor--you should only create productions by calling <tt>Grammar.add_production()</tt>.
The following attributes are available on a <tt>Production</tt> instance <tt>p</tt>.
<p>
<b><tt>p.name</tt></b>
<blockquote>
The name of the production. For a grammar rule such as <tt>A : B C D</tt>, this is <tt>'A'</tt>.
</blockquote>
<p>
<b><tt>p.prod</tt></b>
<blockquote>
A tuple of symbols making up the right-hand side of the production. For a grammar rule such as <tt>A : B C D</tt>, this is <tt>('B','C','D')</tt>.
</blockquote>
<p>
<b><tt>p.number</tt></b>
<blockquote>
Production number. An integer containing the index of the production in the grammar's <tt>Productions</tt> list.
</blockquote>
<p>
<b><tt>p.func</tt></b>
<blockquote>
The name of the reduction function associated with the production.
This is the function that will execute when reducing the entire
grammar rule during parsing.
</blockquote>
<p>
<b><tt>p.callable</tt></b>
<blockquote>
The callable object associated with the name in <tt>p.func</tt>. This is <tt>None</tt>
unless the production has been bound using <tt>bind()</tt>.
</blockquote>
<p>
<b><tt>p.file</tt></b>
<blockquote>
Filename associated with the production. Typically this is the file where the production was defined. Used for error messages.
</blockquote>
<p>
<b><tt>p.lineno</tt></b>
<blockquote>
Line number associated with the production. Typically this is the line number in <tt>p.file</tt> where the production was defined. Used for error messages.
</blockquote>
<p>
<b><tt>p.prec</tt></b>
<blockquote>
Precedence and associativity associated with the production. This is a tuple <tt>(assoc,level)</tt> where
<tt>assoc</tt> is one of <tt>'left'</tt>,<tt>'right'</tt>, or <tt>'nonassoc'</tt> and <tt>level</tt> is
an integer. This value is determined by the precedence of the right-most terminal symbol in the production
or by use of the <tt>%prec</tt> specifier when adding the production.
</blockquote>
<p>
<b><tt>p.usyms</tt></b>
<blockquote>
A list of all unique symbols found in the production.
</blockquote>
<p>
<b><tt>p.lr_items</tt></b>
<blockquote>
A list of all LR items for this production. This attribute only has a meaningful value if the
<tt>Grammar.build_lritems()</tt> method has been called. The items in this list are
instances of <tt>LRItem</tt> described below.
</blockquote>
<p>
<b><tt>p.lr_next</tt></b>
<blockquote>
The head of a linked-list representation of the LR items in <tt>p.lr_items</tt>.
This attribute only has a meaningful value if the <tt>Grammar.build_lritems()</tt>
method has been called. Each <tt>LRItem</tt> instance has a <tt>lr_next</tt> attribute
to move to the next item. The list is terminated by <tt>None</tt>.
</blockquote>
<p>
<b><tt>p.bind(dict)</tt></b>
<blockquote>
Binds the production function name in <tt>p.func</tt> to a callable object in
<tt>dict</tt>. This operation is typically carried out in the last step
prior to running the parsing engine and is needed since parsing tables are typically
read from files which only include the function names, not the functions themselves.
</blockquote>
<P>
<tt>Production</tt> objects support
the <tt>__len__()</tt>, <tt>__getitem__()</tt>, and <tt>__str__()</tt>
special methods.
<tt>len(p)</tt> returns the number of symbols in <tt>p.prod</tt>
and <tt>p[n]</tt> is the same as <tt>p.prod[n]</tt>.
<H2><a name="internal_nn4"></a>4. LRItems</H2>
The construction of parsing tables in an LR-based parser generator is primarily
done over a set of "LR Items". An LR item represents a stage of parsing one
of the grammar rules. To compute the LR items, it is first necessary to
call <tt>Grammar.build_lritems()</tt>. Once this step, all of the productions
in the grammar will have their LR items attached to them.
<p>
Here is an interactive example that shows what LR items look like if you
interactively experiment. In this example, <tt>g</tt> is a <tt>Grammar</tt>
object.
<blockquote>
<pre>
>>> <b>g.build_lritems()</b>
>>> <b>p = g[1]</b>
>>> <b>p</b>
Production(statement -> ID = expr)
>>>
</pre>
</blockquote>
In the above code, <tt>p</tt> represents the first grammar rule. In
this case, a rule <tt>'statement -> ID = expr'</tt>.
<p>
Now, let's look at the LR items for <tt>p</tt>.
<blockquote>
<pre>
>>> <b>p.lr_items</b>
[LRItem(statement -> . ID = expr),
LRItem(statement -> ID . = expr),
LRItem(statement -> ID = . expr),
LRItem(statement -> ID = expr .)]
>>>
</pre>
</blockquote>
In each LR item, the dot (.) represents a specific stage of parsing. In each LR item, the dot
is advanced by one symbol. It is only when the dot reaches the very end that a production
is successfully parsed.
<p>
An instance <tt>lr</tt> of <tt>LRItem</tt> has the following
attributes that hold information related to that specific stage of
parsing.
<p>
<b><tt>lr.name</tt></b>
<blockquote>
The name of the grammar rule. For example, <tt>'statement'</tt> in the above example.
</blockquote>
<p>
<b><tt>lr.prod</tt></b>
<blockquote>
A tuple of symbols representing the right-hand side of the production, including the
special <tt>'.'</tt> character. For example, <tt>('ID','.','=','expr')</tt>.
</blockquote>
<p>
<b><tt>lr.number</tt></b>
<blockquote>
An integer representing the production number in the grammar.
</blockquote>
<p>
<b><tt>lr.usyms</tt></b>
<blockquote>
A set of unique symbols in the production. Inherited from the original <tt>Production</tt> instance.
</blockquote>
<p>
<b><tt>lr.lr_index</tt></b>
<blockquote>
An integer representing the position of the dot (.). You should never use <tt>lr.prod.index()</tt>
to search for it--the result will be wrong if the grammar happens to also use (.) as a character
literal.
</blockquote>
<p>
<b><tt>lr.lr_after</tt></b>
<blockquote>
A list of all productions that can legally appear immediately to the right of the
dot (.). This list contains <tt>Production</tt> instances. This attribute
represents all of the possible branches a parse can take from the current position.
For example, suppose that <tt>lr</tt> represents a stage immediately before
an expression like this:
<pre>
>>> <b>lr</b>
LRItem(statement -> ID = . expr)
>>>
</pre>
Then, the value of <tt>lr.lr_after</tt> might look like this, showing all productions that
can legally appear next:
<pre>
>>> <b>lr.lr_after</b>
[Production(expr -> expr PLUS expr),
Production(expr -> expr MINUS expr),
Production(expr -> expr TIMES expr),
Production(expr -> expr DIVIDE expr),
Production(expr -> MINUS expr),
Production(expr -> LPAREN expr RPAREN),
Production(expr -> NUMBER),
Production(expr -> ID)]
>>>
</pre>
</blockquote>
<p>
<b><tt>lr.lr_before</tt></b>
<blockquote>
The grammar symbol that appears immediately before the dot (.) or <tt>None</tt> if
at the beginning of the parse.
</blockquote>
<p>
<b><tt>lr.lr_next</tt></b>
<blockquote>
A link to the next LR item, representing the next stage of the parse. <tt>None</tt> if <tt>lr</tt>
is the last LR item.
</blockquote>
<tt>LRItem</tt> instances also support the <tt>__len__()</tt> and <tt>__getitem__()</tt> special methods.
<tt>len(lr)</tt> returns the number of items in <tt>lr.prod</tt> including the dot (.). <tt>lr[n]</tt>
returns <tt>lr.prod[n]</tt>.
<p>
It goes without saying that all of the attributes associated with LR
items should be assumed to be read-only. Modifications will very
likely create a small black-hole that will consume you and your code.
<H2><a name="internal_nn5"></a>5. LRTable</H2>
The <tt>LRTable</tt> class is used to represent LR parsing table data. This
minimally includes the production list, action table, and goto table.
<p>
<b><tt>LRTable()</tt></b>
<blockquote>
Create an empty LRTable object. This object contains only the information needed to
run an LR parser.
</blockquote>
An instance <tt>lrtab</tt> of <tt>LRTable</tt> has the following methods:
<p>
<b><tt>lrtab.read_table(module)</tt></b>
<blockquote>
Populates the LR table with information from the module specified in <tt>module</tt>.
<tt>module</tt> is either a module object already loaded with <tt>import</tt> or
the name of a Python module. If it's a string containing a module name, it is
loaded and parsing data is extracted. Returns the signature value that was used
when initially writing the tables. Raises a <tt>VersionError</tt> exception if
the module was created using an incompatible version of PLY.
</blockquote>
<p>
<b><tt>lrtab.bind_callables(dict)</tt></b>
<blockquote>
This binds all of the function names used in productions to callable objects
found in the dictionary <tt>dict</tt>. During table generation and when reading
LR tables from files, PLY only uses the names of action functions such as <tt>'p_expr'</tt>,
<tt>'p_statement'</tt>, etc. In order to actually run the parser, these names
have to be bound to callable objects. This method is always called prior to
running a parser.
</blockquote>
After <tt>lrtab</tt> has been populated, the following attributes are defined.
<p>
<b><tt>lrtab.lr_method</tt></b>
<blockquote>
The LR parsing method used (e.g., <tt>'LALR'</tt>)
</blockquote>
<p>
<b><tt>lrtab.lr_productions</tt></b>
<blockquote>
The production list. If the parsing tables have been newly
constructed, this will be a list of <tt>Production</tt> instances. If
the parsing tables have been read from a file, it's a list
of <tt>MiniProduction</tt> instances. This, together
with <tt>lr_action</tt> and <tt>lr_goto</tt> contain all of the
information needed by the LR parsing engine.
</blockquote>
<p>
<b><tt>lrtab.lr_action</tt></b>
<blockquote>
The LR action dictionary that implements the underlying state machine.
The keys of this dictionary are the LR states.
</blockquote>
<p>
<b><tt>lrtab.lr_goto</tt></b>
<blockquote>
The LR goto table that contains information about grammar rule reductions.
</blockquote>
<H2><a name="internal_nn6"></a>6. LRGeneratedTable</H2>
The <tt>LRGeneratedTable</tt> class represents constructed LR parsing tables on a
grammar. It is a subclass of <tt>LRTable</tt>.
<p>
<b><tt>LRGeneratedTable(grammar, method='LALR',log=None)</tt></b>
<blockquote>
Create the LR parsing tables on a grammar. <tt>grammar</tt> is an instance of <tt>Grammar</tt>,
<tt>method</tt> is a string with the parsing method (<tt>'SLR'</tt> or <tt>'LALR'</tt>), and
<tt>log</tt> is a logger object used to write debugging information. The debugging information
written to <tt>log</tt> is the same as what appears in the <tt>parser.out</tt> file created
by yacc. By supplying a custom logger with a different message format, it is possible to get
more information (e.g., the line number in <tt>yacc.py</tt> used for issuing each line of
output in the log). The result is an instance of <tt>LRGeneratedTable</tt>.
</blockquote>
<p>
An instance <tt>lr</tt> of <tt>LRGeneratedTable</tt> has the following attributes.
<p>
<b><tt>lr.grammar</tt></b>
<blockquote>
A link to the Grammar object used to construct the parsing tables.
</blockquote>
<p>
<b><tt>lr.lr_method</tt></b>
<blockquote>
The LR parsing method used (e.g., <tt>'LALR'</tt>)
</blockquote>
<p>
<b><tt>lr.lr_productions</tt></b>
<blockquote>
A reference to <tt>grammar.Productions</tt>. This, together with <tt>lr_action</tt> and <tt>lr_goto</tt>
contain all of the information needed by the LR parsing engine.
</blockquote>
<p>
<b><tt>lr.lr_action</tt></b>
<blockquote>
The LR action dictionary that implements the underlying state machine. The keys of this dictionary are
the LR states.
</blockquote>
<p>
<b><tt>lr.lr_goto</tt></b>
<blockquote>
The LR goto table that contains information about grammar rule reductions.
</blockquote>
<p>
<b><tt>lr.sr_conflicts</tt></b>
<blockquote>
A list of tuples <tt>(state,token,resolution)</tt> identifying all shift/reduce conflicts. <tt>state</tt> is the LR state
number where the conflict occurred, <tt>token</tt> is the token causing the conflict, and <tt>resolution</tt> is
a string describing the resolution taken. <tt>resolution</tt> is either <tt>'shift'</tt> or <tt>'reduce'</tt>.
</blockquote>
<p>
<b><tt>lr.rr_conflicts</tt></b>
<blockquote>
A list of tuples <tt>(state,rule,rejected)</tt> identifying all reduce/reduce conflicts. <tt>state</tt> is the
LR state number where the conflict occurred, <tt>rule</tt> is the production rule that was selected
and <tt>rejected</tt> is the production rule that was rejected. Both <tt>rule</tt> and </tt>rejected</tt> are
instances of <tt>Production</tt>. They can be inspected to provide the user with more information.
</blockquote>
<p>
There are two public methods of <tt>LRGeneratedTable</tt>.
<p>
<b><tt>lr.write_table(modulename,outputdir="",signature="")</tt></b>
<blockquote>
Writes the LR parsing table information to a Python module. <tt>modulename</tt> is a string
specifying the name of a module such as <tt>"parsetab"</tt>. <tt>outputdir</tt> is the name of a
directory where the module should be created. <tt>signature</tt> is a string representing a
grammar signature that's written into the output file. This can be used to detect when
the data stored in a module file is out-of-sync with the the grammar specification (and that
the tables need to be regenerated). If <tt>modulename</tt> is a string <tt>"parsetab"</tt>,
this function creates a file called <tt>parsetab.py</tt>. If the module name represents a
package such as <tt>"foo.bar.parsetab"</tt>, then only the last component, <tt>"parsetab"</tt> is
used.
</blockquote>
<H2><a name="internal_nn7"></a>7. LRParser</H2>
The <tt>LRParser</tt> class implements the low-level LR parsing engine.
<p>
<b><tt>LRParser(lrtab, error_func)</tt></b>
<blockquote>
Create an LRParser. <tt>lrtab</tt> is an instance of <tt>LRTable</tt>
containing the LR production and state tables. <tt>error_func</tt> is the
error function to invoke in the event of a parsing error.
</blockquote>
An instance <tt>p</tt> of <tt>LRParser</tt> has the following methods:
<p>
<b><tt>p.parse(input=None,lexer=None,debug=0,tracking=0,tokenfunc=None)</tt></b>
<blockquote>
Run the parser. <tt>input</tt> is a string, which if supplied is fed into the
lexer using its <tt>input()</tt> method. <tt>lexer</tt> is an instance of the
<tt>Lexer</tt> class to use for tokenizing. If not supplied, the last lexer
created with the <tt>lex</tt> module is used. <tt>debug</tt> is a boolean flag
that enables debugging. <tt>tracking</tt> is a boolean flag that tells the
parser to perform additional line number tracking. <tt>tokenfunc</tt> is a callable
function that returns the next token. If supplied, the parser will use it to get
all tokens.
</blockquote>
<p>
<b><tt>p.restart()</tt></b>
<blockquote>
Resets the parser state for a parse already in progress.
</blockquote>
<H2><a name="internal_nn8"></a>8. ParserReflect</H2>
<p>
The <tt>ParserReflect</tt> class is used to collect parser specification data
from a Python module or object. This class is what collects all of the
<tt>p_rule()</tt> functions in a PLY file, performs basic error checking,
and collects all of the needed information to build a grammar. Most of the
high-level PLY interface as used by the <tt>yacc()</tt> function is actually
implemented by this class.
<p>
<b><tt>ParserReflect(pdict, log=None)</tt></b>
<blockquote>
Creates a <tt>ParserReflect</tt> instance. <tt>pdict</tt> is a dictionary
containing parser specification data. This dictionary typically corresponds
to the module or class dictionary of code that implements a PLY parser.
<tt>log</tt> is a logger instance that will be used to report error
messages.
</blockquote>
An instance <tt>p</tt> of <tt>ParserReflect</tt> has the following methods:
<p>
<b><tt>p.get_all()</tt></b>
<blockquote>
Collect and store all required parsing information.
</blockquote>
<p>
<b><tt>p.validate_all()</tt></b>
<blockquote>
Validate all of the collected parsing information. This is a seprate step
from <tt>p.get_all()</tt> as a performance optimization. In order to
increase parser start-up time, a parser can elect to only validate the
parsing data when regenerating the parsing tables. The validation
step tries to collect as much information as possible rather than
raising an exception at the first sign of trouble. The attribute
<tt>p.error</tt> is set if there are any validation errors. The
value of this attribute is also returned.
</blockquote>
<p>
<b><tt>p.signature()</tt></b>
<blockquote>
Compute a signature representing the contents of the collected parsing
data. The signature value should change if anything in the parser
specification has changed in a way that would justify parser table
regeneration. This method can be called after <tt>p.get_all()</tt>,
but before <tt>p.validate_all()</tt>.
</blockquote>
The following attributes are set in the process of collecting data:
<p>
<b><tt>p.start</tt></b>
<blockquote>
The grammar start symbol, if any. Taken from <tt>pdict['start']</tt>.
</blockquote>
<p>
<b><tt>p.error_func</tt></b>
<blockquote>
The error handling function or <tt>None</tt>. Taken from <tt>pdict['p_error']</tt>.
</blockquote>
<p>
<b><tt>p.tokens</tt></b>
<blockquote>
The token list. Taken from <tt>pdict['tokens']</tt>.
</blockquote>
<p>
<b><tt>p.prec</tt></b>
<blockquote>
The precedence specifier. Taken from <tt>pdict['precedence']</tt>.
</blockquote>
<p>
<b><tt>p.preclist</tt></b>
<blockquote>
A parsed version of the precedence specified. A list of tuples of the form
<tt>(token,assoc,level)</tt> where <tt>token</tt> is the terminal symbol,
<tt>assoc</tt> is the associativity (e.g., <tt>'left'</tt>) and <tt>level</tt>
is a numeric precedence level.
</blockquote>
<p>
<b><tt>p.grammar</tt></b>
<blockquote>
A list of tuples <tt>(name, rules)</tt> representing the grammar rules. <tt>name</tt> is the
name of a Python function or method in <tt>pdict</tt> that starts with <tt>"p_"</tt>.
<tt>rules</tt> is a list of tuples <tt>(filename,line,prodname,syms)</tt> representing
the grammar rules found in the documentation string of that function. <tt>filename</tt> and <tt>line</tt> contain location
information that can be used for debugging. <tt>prodname</tt> is the name of the
production. <tt>syms</tt> is the right-hand side of the production. If you have a
function like this
<pre>
def p_expr(p):
'''expr : expr PLUS expr
| expr MINUS expr
| expr TIMES expr
| expr DIVIDE expr'''
</pre>
then the corresponding entry in <tt>p.grammar</tt> might look like this:
<pre>
('p_expr', [ ('calc.py',10,'expr', ['expr','PLUS','expr']),
('calc.py',11,'expr', ['expr','MINUS','expr']),
('calc.py',12,'expr', ['expr','TIMES','expr']),
('calc.py',13,'expr', ['expr','DIVIDE','expr'])
])
</pre>
</blockquote>
<p>
<b><tt>p.pfuncs</tt></b>
<blockquote>
A sorted list of tuples <tt>(line, file, name, doc)</tt> representing all of
the <tt>p_</tt> functions found. <tt>line</tt> and <tt>file</tt> give location
information. <tt>name</tt> is the name of the function. <tt>doc</tt> is the
documentation string. This list is sorted in ascending order by line number.
</blockquote>
<p>
<b><tt>p.files</tt></b>
<blockquote>
A dictionary holding all of the source filenames that were encountered
while collecting parser information. Only the keys of this dictionary have
any meaning.
</blockquote>
<p>
<b><tt>p.error</tt></b>
<blockquote>
An attribute that indicates whether or not any critical errors
occurred in validation. If this is set, it means that that some kind
of problem was detected and that no further processing should be
performed.
</blockquote>
<H2><a name="internal_nn9"></a>9. High-level operation</H2>
Using all of the above classes requires some attention to detail. The <tt>yacc()</tt>
function carries out a very specific sequence of operations to create a grammar.
This same sequence should be emulated if you build an alternative PLY interface.
<ol>
<li>A <tt>ParserReflect</tt> object is created and raw grammar specification data is
collected.
<li>A <tt>Grammar</tt> object is created and populated with information
from the specification data.
<li>A <tt>LRGenerator</tt> object is created to run the LALR algorithm over
the <tt>Grammar</tt> object.
<li>Productions in the LRGenerator and bound to callables using the <tt>bind_callables()</tt>
method.
<li>A <tt>LRParser</tt> object is created from from the information in the
<tt>LRGenerator</tt> object.
</ol>
</body>
</html>

194
ply/doc/makedoc.py Normal file
View File

@ -0,0 +1,194 @@
#!/usr/local/bin/python
###############################################################################
# Takes a chapter as input and adds internal links and numbering to all
# of the H1, H2, H3, H4 and H5 sections.
#
# Every heading HTML tag (H1, H2 etc) is given an autogenerated name to link
# to. However, if the name is not an autogenerated name from a previous run,
# it will be kept. If it is autogenerated, it might change on subsequent runs
# of this program. Thus if you want to create links to one of the headings,
# then change the heading link name to something that does not look like an
# autogenerated link name.
###############################################################################
import sys
import re
import string
###############################################################################
# Functions
###############################################################################
# Regexs for <a name="..."></a>
alink = re.compile(r"<a *name *= *\"(.*)\"></a>", re.IGNORECASE)
heading = re.compile(r"(_nn\d)", re.IGNORECASE)
def getheadingname(m):
autogeneratedheading = True;
if m.group(1) != None:
amatch = alink.match(m.group(1))
if amatch:
# A non-autogenerated heading - keep it
headingname = amatch.group(1)
autogeneratedheading = heading.match(headingname)
if autogeneratedheading:
# The heading name was either non-existent or autogenerated,
# We can create a new heading / change the existing heading
headingname = "%s_nn%d" % (filenamebase, nameindex)
return headingname
###############################################################################
# Main program
###############################################################################
if len(sys.argv) != 2:
print "usage: makedoc.py filename"
sys.exit(1)
filename = sys.argv[1]
filenamebase = string.split(filename,".")[0]
section = 0
subsection = 0
subsubsection = 0
subsubsubsection = 0
nameindex = 0
name = ""
# Regexs for <h1>,... <h5> sections
h1 = re.compile(r".*?<H1>(<a.*a>)*[\d\.\s]*(.*?)</H1>", re.IGNORECASE)
h2 = re.compile(r".*?<H2>(<a.*a>)*[\d\.\s]*(.*?)</H2>", re.IGNORECASE)
h3 = re.compile(r".*?<H3>(<a.*a>)*[\d\.\s]*(.*?)</H3>", re.IGNORECASE)
h4 = re.compile(r".*?<H4>(<a.*a>)*[\d\.\s]*(.*?)</H4>", re.IGNORECASE)
h5 = re.compile(r".*?<H5>(<a.*a>)*[\d\.\s]*(.*?)</H5>", re.IGNORECASE)
data = open(filename).read() # Read data
open(filename+".bak","w").write(data) # Make backup
lines = data.splitlines()
result = [ ] # This is the result of postprocessing the file
index = "<!-- INDEX -->\n<div class=\"sectiontoc\">\n" # index contains the index for adding at the top of the file. Also printed to stdout.
skip = 0
skipspace = 0
for s in lines:
if s == "<!-- INDEX -->":
if not skip:
result.append("@INDEX@")
skip = 1
else:
skip = 0
continue;
if skip:
continue
if not s and skipspace:
continue
if skipspace:
result.append("")
result.append("")
skipspace = 0
m = h2.match(s)
if m:
prevheadingtext = m.group(2)
nameindex += 1
section += 1
headingname = getheadingname(m)
result.append("""<H2><a name="%s"></a>%d. %s</H2>""" % (headingname,section, prevheadingtext))
if subsubsubsection:
index += "</ul>\n"
if subsubsection:
index += "</ul>\n"
if subsection:
index += "</ul>\n"
if section == 1:
index += "<ul>\n"
index += """<li><a href="#%s">%s</a>\n""" % (headingname,prevheadingtext)
subsection = 0
subsubsection = 0
subsubsubsection = 0
skipspace = 1
continue
m = h3.match(s)
if m:
prevheadingtext = m.group(2)
nameindex += 1
subsection += 1
headingname = getheadingname(m)
result.append("""<H3><a name="%s"></a>%d.%d %s</H3>""" % (headingname,section, subsection, prevheadingtext))
if subsubsubsection:
index += "</ul>\n"
if subsubsection:
index += "</ul>\n"
if subsection == 1:
index += "<ul>\n"
index += """<li><a href="#%s">%s</a>\n""" % (headingname,prevheadingtext)
subsubsection = 0
skipspace = 1
continue
m = h4.match(s)
if m:
prevheadingtext = m.group(2)
nameindex += 1
subsubsection += 1
subsubsubsection = 0
headingname = getheadingname(m)
result.append("""<H4><a name="%s"></a>%d.%d.%d %s</H4>""" % (headingname,section, subsection, subsubsection, prevheadingtext))
if subsubsubsection:
index += "</ul>\n"
if subsubsection == 1:
index += "<ul>\n"
index += """<li><a href="#%s">%s</a>\n""" % (headingname,prevheadingtext)
skipspace = 1
continue
m = h5.match(s)
if m:
prevheadingtext = m.group(2)
nameindex += 1
subsubsubsection += 1
headingname = getheadingname(m)
result.append("""<H5><a name="%s"></a>%d.%d.%d.%d %s</H5>""" % (headingname,section, subsection, subsubsection, subsubsubsection, prevheadingtext))
if subsubsubsection == 1:
index += "<ul>\n"
index += """<li><a href="#%s">%s</a>\n""" % (headingname,prevheadingtext)
skipspace = 1
continue
result.append(s)
if subsubsubsection:
index += "</ul>\n"
if subsubsection:
index += "</ul>\n"
if subsection:
index += "</ul>\n"
if section:
index += "</ul>\n"
index += "</div>\n<!-- INDEX -->\n"
data = "\n".join(result)
data = data.replace("@INDEX@",index) + "\n";
# Write the file back out
open(filename,"w").write(data)

3262
ply/doc/ply.html Normal file

File diff suppressed because it is too large Load Diff

79
ply/example/BASIC/README Normal file
View File

@ -0,0 +1,79 @@
Inspired by a September 14, 2006 Salon article "Why Johnny Can't Code" by
David Brin (http://www.salon.com/tech/feature/2006/09/14/basic/index.html),
I thought that a fully working BASIC interpreter might be an interesting,
if not questionable, PLY example. Uh, okay, so maybe it's just a bad idea,
but in any case, here it is.
In this example, you'll find a rough implementation of 1964 Dartmouth BASIC
as described in the manual at:
http://www.bitsavers.org/pdf/dartmouth/BASIC_Oct64.pdf
See also:
http://en.wikipedia.org/wiki/Dartmouth_BASIC
This dialect is downright primitive---there are no string variables
and no facilities for interactive input. Moreover, subroutines and functions
are brain-dead even more than they usually are for BASIC. Of course,
the GOTO statement is provided.
Nevertheless, there are a few interesting aspects of this example:
- It illustrates a fully working interpreter including lexing, parsing,
and interpretation of instructions.
- The parser shows how to catch and report various kinds of parsing
errors in a more graceful way.
- The example both parses files (supplied on command line) and
interactive input entered line by line.
- It shows how you might represent parsed information. In this case,
each BASIC statement is encoded into a Python tuple containing the
statement type and parameters. These tuples are then stored in
a dictionary indexed by program line numbers.
- Even though it's just BASIC, the parser contains more than 80
rules and 150 parsing states. Thus, it's a little more meaty than
the calculator example.
To use the example, run it as follows:
% python basic.py hello.bas
HELLO WORLD
%
or use it interactively:
% python basic.py
[BASIC] 10 PRINT "HELLO WORLD"
[BASIC] 20 END
[BASIC] RUN
HELLO WORLD
[BASIC]
The following files are defined:
basic.py - High level script that controls everything
basiclex.py - BASIC tokenizer
basparse.py - BASIC parser
basinterp.py - BASIC interpreter that runs parsed programs.
In addition, a number of sample BASIC programs (.bas suffix) are
provided. These were taken out of the Dartmouth manual.
Disclaimer: I haven't spent a ton of time testing this and it's likely that
I've skimped here and there on a few finer details (e.g., strictly enforcing
variable naming rules). However, the interpreter seems to be able to run
the examples in the BASIC manual.
Have fun!
-Dave

View File

@ -0,0 +1,71 @@
# An implementation of Dartmouth BASIC (1964)
#
import sys
sys.path.insert(0,"../..")
if sys.version_info[0] >= 3:
raw_input = input
import basiclex
import basparse
import basinterp
# If a filename has been specified, we try to run it.
# If a runtime error occurs, we bail out and enter
# interactive mode below
if len(sys.argv) == 2:
data = open(sys.argv[1]).read()
prog = basparse.parse(data)
if not prog: raise SystemExit
b = basinterp.BasicInterpreter(prog)
try:
b.run()
raise SystemExit
except RuntimeError:
pass
else:
b = basinterp.BasicInterpreter({})
# Interactive mode. This incrementally adds/deletes statements
# from the program stored in the BasicInterpreter object. In
# addition, special commands 'NEW','LIST',and 'RUN' are added.
# Specifying a line number with no code deletes that line from
# the program.
while 1:
try:
line = raw_input("[BASIC] ")
except EOFError:
raise SystemExit
if not line: continue
line += "\n"
prog = basparse.parse(line)
if not prog: continue
keys = list(prog)
if keys[0] > 0:
b.add_statements(prog)
else:
stat = prog[keys[0]]
if stat[0] == 'RUN':
try:
b.run()
except RuntimeError:
pass
elif stat[0] == 'LIST':
b.list()
elif stat[0] == 'BLANK':
b.del_line(stat[1])
elif stat[0] == 'NEW':
b.new()

View File

@ -0,0 +1,74 @@
# An implementation of Dartmouth BASIC (1964)
from ply import *
keywords = (
'LET','READ','DATA','PRINT','GOTO','IF','THEN','FOR','NEXT','TO','STEP',
'END','STOP','DEF','GOSUB','DIM','REM','RETURN','RUN','LIST','NEW',
)
tokens = keywords + (
'EQUALS','PLUS','MINUS','TIMES','DIVIDE','POWER',
'LPAREN','RPAREN','LT','LE','GT','GE','NE',
'COMMA','SEMI', 'INTEGER','FLOAT', 'STRING',
'ID','NEWLINE'
)
t_ignore = ' \t'
def t_REM(t):
r'REM .*'
return t
def t_ID(t):
r'[A-Z][A-Z0-9]*'
if t.value in keywords:
t.type = t.value
return t
t_EQUALS = r'='
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_POWER = r'\^'
t_DIVIDE = r'/'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LT = r'<'
t_LE = r'<='
t_GT = r'>'
t_GE = r'>='
t_NE = r'<>'
t_COMMA = r'\,'
t_SEMI = r';'
t_INTEGER = r'\d+'
t_FLOAT = r'((\d*\.\d+)(E[\+-]?\d+)?|([1-9]\d*E[\+-]?\d+))'
t_STRING = r'\".*?\"'
def t_NEWLINE(t):
r'\n'
t.lexer.lineno += 1
return t
def t_error(t):
print("Illegal character %s" % t.value[0])
t.lexer.skip(1)
lex.lex(debug=0)

View File

@ -0,0 +1,79 @@
# An implementation of Dartmouth BASIC (1964)
#
import sys
sys.path.insert(0,"../..")
if sys.version_info[0] >= 3:
raw_input = input
import logging
logging.basicConfig(
level = logging.INFO,
filename = "parselog.txt",
filemode = "w"
)
log = logging.getLogger()
import basiclex
import basparse
import basinterp
# If a filename has been specified, we try to run it.
# If a runtime error occurs, we bail out and enter
# interactive mode below
if len(sys.argv) == 2:
data = open(sys.argv[1]).read()
prog = basparse.parse(data,debug=log)
if not prog: raise SystemExit
b = basinterp.BasicInterpreter(prog)
try:
b.run()
raise SystemExit
except RuntimeError:
pass
else:
b = basinterp.BasicInterpreter({})
# Interactive mode. This incrementally adds/deletes statements
# from the program stored in the BasicInterpreter object. In
# addition, special commands 'NEW','LIST',and 'RUN' are added.
# Specifying a line number with no code deletes that line from
# the program.
while 1:
try:
line = raw_input("[BASIC] ")
except EOFError:
raise SystemExit
if not line: continue
line += "\n"
prog = basparse.parse(line,debug=log)
if not prog: continue
keys = list(prog)
if keys[0] > 0:
b.add_statements(prog)
else:
stat = prog[keys[0]]
if stat[0] == 'RUN':
try:
b.run()
except RuntimeError:
pass
elif stat[0] == 'LIST':
b.list()
elif stat[0] == 'BLANK':
b.del_line(stat[1])
elif stat[0] == 'NEW':
b.new()

View File

@ -0,0 +1,441 @@
# This file provides the runtime support for running a basic program
# Assumes the program has been parsed using basparse.py
import sys
import math
import random
class BasicInterpreter:
# Initialize the interpreter. prog is a dictionary
# containing (line,statement) mappings
def __init__(self,prog):
self.prog = prog
self.functions = { # Built-in function table
'SIN' : lambda z: math.sin(self.eval(z)),
'COS' : lambda z: math.cos(self.eval(z)),
'TAN' : lambda z: math.tan(self.eval(z)),
'ATN' : lambda z: math.atan(self.eval(z)),
'EXP' : lambda z: math.exp(self.eval(z)),
'ABS' : lambda z: abs(self.eval(z)),
'LOG' : lambda z: math.log(self.eval(z)),
'SQR' : lambda z: math.sqrt(self.eval(z)),
'INT' : lambda z: int(self.eval(z)),
'RND' : lambda z: random.random()
}
# Collect all data statements
def collect_data(self):
self.data = []
for lineno in self.stat:
if self.prog[lineno][0] == 'DATA':
self.data = self.data + self.prog[lineno][1]
self.dc = 0 # Initialize the data counter
# Check for end statements
def check_end(self):
has_end = 0
for lineno in self.stat:
if self.prog[lineno][0] == 'END' and not has_end:
has_end = lineno
if not has_end:
print("NO END INSTRUCTION")
self.error = 1
return
if has_end != lineno:
print("END IS NOT LAST")
self.error = 1
# Check loops
def check_loops(self):
for pc in range(len(self.stat)):
lineno = self.stat[pc]
if self.prog[lineno][0] == 'FOR':
forinst = self.prog[lineno]
loopvar = forinst[1]
for i in range(pc+1,len(self.stat)):
if self.prog[self.stat[i]][0] == 'NEXT':
nextvar = self.prog[self.stat[i]][1]
if nextvar != loopvar: continue
self.loopend[pc] = i
break
else:
print("FOR WITHOUT NEXT AT LINE %s" % self.stat[pc])
self.error = 1
# Evaluate an expression
def eval(self,expr):
etype = expr[0]
if etype == 'NUM': return expr[1]
elif etype == 'GROUP': return self.eval(expr[1])
elif etype == 'UNARY':
if expr[1] == '-': return -self.eval(expr[2])
elif etype == 'BINOP':
if expr[1] == '+': return self.eval(expr[2])+self.eval(expr[3])
elif expr[1] == '-': return self.eval(expr[2])-self.eval(expr[3])
elif expr[1] == '*': return self.eval(expr[2])*self.eval(expr[3])
elif expr[1] == '/': return float(self.eval(expr[2]))/self.eval(expr[3])
elif expr[1] == '^': return abs(self.eval(expr[2]))**self.eval(expr[3])
elif etype == 'VAR':
var,dim1,dim2 = expr[1]
if not dim1 and not dim2:
if var in self.vars:
return self.vars[var]
else:
print("UNDEFINED VARIABLE %s AT LINE %s" % (var, self.stat[self.pc]))
raise RuntimeError
# May be a list lookup or a function evaluation
if dim1 and not dim2:
if var in self.functions:
# A function
return self.functions[var](dim1)
else:
# A list evaluation
if var in self.lists:
dim1val = self.eval(dim1)
if dim1val < 1 or dim1val > len(self.lists[var]):
print("LIST INDEX OUT OF BOUNDS AT LINE %s" % self.stat[self.pc])
raise RuntimeError
return self.lists[var][dim1val-1]
if dim1 and dim2:
if var in self.tables:
dim1val = self.eval(dim1)
dim2val = self.eval(dim2)
if dim1val < 1 or dim1val > len(self.tables[var]) or dim2val < 1 or dim2val > len(self.tables[var][0]):
print("TABLE INDEX OUT OUT BOUNDS AT LINE %s" % self.stat[self.pc])
raise RuntimeError
return self.tables[var][dim1val-1][dim2val-1]
print("UNDEFINED VARIABLE %s AT LINE %s" % (var, self.stat[self.pc]))
raise RuntimeError
# Evaluate a relational expression
def releval(self,expr):
etype = expr[1]
lhs = self.eval(expr[2])
rhs = self.eval(expr[3])
if etype == '<':
if lhs < rhs: return 1
else: return 0
elif etype == '<=':
if lhs <= rhs: return 1
else: return 0
elif etype == '>':
if lhs > rhs: return 1
else: return 0
elif etype == '>=':
if lhs >= rhs: return 1
else: return 0
elif etype == '=':
if lhs == rhs: return 1
else: return 0
elif etype == '<>':
if lhs != rhs: return 1
else: return 0
# Assignment
def assign(self,target,value):
var, dim1, dim2 = target
if not dim1 and not dim2:
self.vars[var] = self.eval(value)
elif dim1 and not dim2:
# List assignment
dim1val = self.eval(dim1)
if not var in self.lists:
self.lists[var] = [0]*10
if dim1val > len(self.lists[var]):
print ("DIMENSION TOO LARGE AT LINE %s" % self.stat[self.pc])
raise RuntimeError
self.lists[var][dim1val-1] = self.eval(value)
elif dim1 and dim2:
dim1val = self.eval(dim1)
dim2val = self.eval(dim2)
if not var in self.tables:
temp = [0]*10
v = []
for i in range(10): v.append(temp[:])
self.tables[var] = v
# Variable already exists
if dim1val > len(self.tables[var]) or dim2val > len(self.tables[var][0]):
print("DIMENSION TOO LARGE AT LINE %s" % self.stat[self.pc])
raise RuntimeError
self.tables[var][dim1val-1][dim2val-1] = self.eval(value)
# Change the current line number
def goto(self,linenum):
if not linenum in self.prog:
print("UNDEFINED LINE NUMBER %d AT LINE %d" % (linenum, self.stat[self.pc]))
raise RuntimeError
self.pc = self.stat.index(linenum)
# Run it
def run(self):
self.vars = { } # All variables
self.lists = { } # List variables
self.tables = { } # Tables
self.loops = [ ] # Currently active loops
self.loopend= { } # Mapping saying where loops end
self.gosub = None # Gosub return point (if any)
self.error = 0 # Indicates program error
self.stat = list(self.prog) # Ordered list of all line numbers
self.stat.sort()
self.pc = 0 # Current program counter
# Processing prior to running
self.collect_data() # Collect all of the data statements
self.check_end()
self.check_loops()
if self.error: raise RuntimeError
while 1:
line = self.stat[self.pc]
instr = self.prog[line]
op = instr[0]
# END and STOP statements
if op == 'END' or op == 'STOP':
break # We're done
# GOTO statement
elif op == 'GOTO':
newline = instr[1]
self.goto(newline)
continue
# PRINT statement
elif op == 'PRINT':
plist = instr[1]
out = ""
for label,val in plist:
if out:
out += ' '*(15 - (len(out) % 15))
out += label
if val:
if label: out += " "
eval = self.eval(val)
out += str(eval)
sys.stdout.write(out)
end = instr[2]
if not (end == ',' or end == ';'):
sys.stdout.write("\n")
if end == ',': sys.stdout.write(" "*(15-(len(out) % 15)))
if end == ';': sys.stdout.write(" "*(3-(len(out) % 3)))
# LET statement
elif op == 'LET':
target = instr[1]
value = instr[2]
self.assign(target,value)
# READ statement
elif op == 'READ':
for target in instr[1]:
if self.dc < len(self.data):
value = ('NUM',self.data[self.dc])
self.assign(target,value)
self.dc += 1
else:
# No more data. Program ends
return
elif op == 'IF':
relop = instr[1]
newline = instr[2]
if (self.releval(relop)):
self.goto(newline)
continue
elif op == 'FOR':
loopvar = instr[1]
initval = instr[2]
finval = instr[3]
stepval = instr[4]
# Check to see if this is a new loop
if not self.loops or self.loops[-1][0] != self.pc:
# Looks like a new loop. Make the initial assignment
newvalue = initval
self.assign((loopvar,None,None),initval)
if not stepval: stepval = ('NUM',1)
stepval = self.eval(stepval) # Evaluate step here
self.loops.append((self.pc,stepval))
else:
# It's a repeat of the previous loop
# Update the value of the loop variable according to the step
stepval = ('NUM',self.loops[-1][1])
newvalue = ('BINOP','+',('VAR',(loopvar,None,None)),stepval)
if self.loops[-1][1] < 0: relop = '>='
else: relop = '<='
if not self.releval(('RELOP',relop,newvalue,finval)):
# Loop is done. Jump to the NEXT
self.pc = self.loopend[self.pc]
self.loops.pop()
else:
self.assign((loopvar,None,None),newvalue)
elif op == 'NEXT':
if not self.loops:
print("NEXT WITHOUT FOR AT LINE %s" % line)
return
nextvar = instr[1]
self.pc = self.loops[-1][0]
loopinst = self.prog[self.stat[self.pc]]
forvar = loopinst[1]
if nextvar != forvar:
print("NEXT DOESN'T MATCH FOR AT LINE %s" % line)
return
continue
elif op == 'GOSUB':
newline = instr[1]
if self.gosub:
print("ALREADY IN A SUBROUTINE AT LINE %s" % line)
return
self.gosub = self.stat[self.pc]
self.goto(newline)
continue
elif op == 'RETURN':
if not self.gosub:
print("RETURN WITHOUT A GOSUB AT LINE %s" % line)
return
self.goto(self.gosub)
self.gosub = None
elif op == 'FUNC':
fname = instr[1]
pname = instr[2]
expr = instr[3]
def eval_func(pvalue,name=pname,self=self,expr=expr):
self.assign((pname,None,None),pvalue)
return self.eval(expr)
self.functions[fname] = eval_func
elif op == 'DIM':
for vname,x,y in instr[1]:
if y == 0:
# Single dimension variable
self.lists[vname] = [0]*x
else:
# Double dimension variable
temp = [0]*y
v = []
for i in range(x):
v.append(temp[:])
self.tables[vname] = v
self.pc += 1
# Utility functions for program listing
def expr_str(self,expr):
etype = expr[0]
if etype == 'NUM': return str(expr[1])
elif etype == 'GROUP': return "(%s)" % self.expr_str(expr[1])
elif etype == 'UNARY':
if expr[1] == '-': return "-"+str(expr[2])
elif etype == 'BINOP':
return "%s %s %s" % (self.expr_str(expr[2]),expr[1],self.expr_str(expr[3]))
elif etype == 'VAR':
return self.var_str(expr[1])
def relexpr_str(self,expr):
return "%s %s %s" % (self.expr_str(expr[2]),expr[1],self.expr_str(expr[3]))
def var_str(self,var):
varname,dim1,dim2 = var
if not dim1 and not dim2: return varname
if dim1 and not dim2: return "%s(%s)" % (varname, self.expr_str(dim1))
return "%s(%s,%s)" % (varname, self.expr_str(dim1),self.expr_str(dim2))
# Create a program listing
def list(self):
stat = list(self.prog) # Ordered list of all line numbers
stat.sort()
for line in stat:
instr = self.prog[line]
op = instr[0]
if op in ['END','STOP','RETURN']:
print("%s %s" % (line, op))
continue
elif op == 'REM':
print("%s %s" % (line, instr[1]))
elif op == 'PRINT':
_out = "%s %s " % (line, op)
first = 1
for p in instr[1]:
if not first: _out += ", "
if p[0] and p[1]: _out += '"%s"%s' % (p[0],self.expr_str(p[1]))
elif p[1]: _out += self.expr_str(p[1])
else: _out += '"%s"' % (p[0],)
first = 0
if instr[2]: _out += instr[2]
print(_out)
elif op == 'LET':
print("%s LET %s = %s" % (line,self.var_str(instr[1]),self.expr_str(instr[2])))
elif op == 'READ':
_out = "%s READ " % line
first = 1
for r in instr[1]:
if not first: _out += ","
_out += self.var_str(r)
first = 0
print(_out)
elif op == 'IF':
print("%s IF %s THEN %d" % (line,self.relexpr_str(instr[1]),instr[2]))
elif op == 'GOTO' or op == 'GOSUB':
print("%s %s %s" % (line, op, instr[1]))
elif op == 'FOR':
_out = "%s FOR %s = %s TO %s" % (line,instr[1],self.expr_str(instr[2]),self.expr_str(instr[3]))
if instr[4]: _out += " STEP %s" % (self.expr_str(instr[4]))
print(_out)
elif op == 'NEXT':
print("%s NEXT %s" % (line, instr[1]))
elif op == 'FUNC':
print("%s DEF %s(%s) = %s" % (line,instr[1],instr[2],self.expr_str(instr[3])))
elif op == 'DIM':
_out = "%s DIM " % line
first = 1
for vname,x,y in instr[1]:
if not first: _out += ","
first = 0
if y == 0:
_out += "%s(%d)" % (vname,x)
else:
_out += "%s(%d,%d)" % (vname,x,y)
print(_out)
elif op == 'DATA':
_out = "%s DATA " % line
first = 1
for v in instr[1]:
if not first: _out += ","
first = 0
_out += v
print(_out)
# Erase the current program
def new(self):
self.prog = {}
# Insert statements
def add_statements(self,prog):
for line,stat in prog.items():
self.prog[line] = stat
# Delete a statement
def del_line(self,lineno):
try:
del self.prog[lineno]
except KeyError:
pass

View File

@ -0,0 +1,424 @@
# An implementation of Dartmouth BASIC (1964)
#
from ply import *
import basiclex
tokens = basiclex.tokens
precedence = (
('left', 'PLUS','MINUS'),
('left', 'TIMES','DIVIDE'),
('left', 'POWER'),
('right','UMINUS')
)
#### A BASIC program is a series of statements. We represent the program as a
#### dictionary of tuples indexed by line number.
def p_program(p):
'''program : program statement
| statement'''
if len(p) == 2 and p[1]:
p[0] = { }
line,stat = p[1]
p[0][line] = stat
elif len(p) ==3:
p[0] = p[1]
if not p[0]: p[0] = { }
if p[2]:
line,stat = p[2]
p[0][line] = stat
#### This catch-all rule is used for any catastrophic errors. In this case,
#### we simply return nothing
def p_program_error(p):
'''program : error'''
p[0] = None
p.parser.error = 1
#### Format of all BASIC statements.
def p_statement(p):
'''statement : INTEGER command NEWLINE'''
if isinstance(p[2],str):
print("%s %s %s" % (p[2],"AT LINE", p[1]))
p[0] = None
p.parser.error = 1
else:
lineno = int(p[1])
p[0] = (lineno,p[2])
#### Interactive statements.
def p_statement_interactive(p):
'''statement : RUN NEWLINE
| LIST NEWLINE
| NEW NEWLINE'''
p[0] = (0, (p[1],0))
#### Blank line number
def p_statement_blank(p):
'''statement : INTEGER NEWLINE'''
p[0] = (0,('BLANK',int(p[1])))
#### Error handling for malformed statements
def p_statement_bad(p):
'''statement : INTEGER error NEWLINE'''
print("MALFORMED STATEMENT AT LINE %s" % p[1])
p[0] = None
p.parser.error = 1
#### Blank line
def p_statement_newline(p):
'''statement : NEWLINE'''
p[0] = None
#### LET statement
def p_command_let(p):
'''command : LET variable EQUALS expr'''
p[0] = ('LET',p[2],p[4])
def p_command_let_bad(p):
'''command : LET variable EQUALS error'''
p[0] = "BAD EXPRESSION IN LET"
#### READ statement
def p_command_read(p):
'''command : READ varlist'''
p[0] = ('READ',p[2])
def p_command_read_bad(p):
'''command : READ error'''
p[0] = "MALFORMED VARIABLE LIST IN READ"
#### DATA statement
def p_command_data(p):
'''command : DATA numlist'''
p[0] = ('DATA',p[2])
def p_command_data_bad(p):
'''command : DATA error'''
p[0] = "MALFORMED NUMBER LIST IN DATA"
#### PRINT statement
def p_command_print(p):
'''command : PRINT plist optend'''
p[0] = ('PRINT',p[2],p[3])
def p_command_print_bad(p):
'''command : PRINT error'''
p[0] = "MALFORMED PRINT STATEMENT"
#### Optional ending on PRINT. Either a comma (,) or semicolon (;)
def p_optend(p):
'''optend : COMMA
| SEMI
|'''
if len(p) == 2:
p[0] = p[1]
else:
p[0] = None
#### PRINT statement with no arguments
def p_command_print_empty(p):
'''command : PRINT'''
p[0] = ('PRINT',[],None)
#### GOTO statement
def p_command_goto(p):
'''command : GOTO INTEGER'''
p[0] = ('GOTO',int(p[2]))
def p_command_goto_bad(p):
'''command : GOTO error'''
p[0] = "INVALID LINE NUMBER IN GOTO"
#### IF-THEN statement
def p_command_if(p):
'''command : IF relexpr THEN INTEGER'''
p[0] = ('IF',p[2],int(p[4]))
def p_command_if_bad(p):
'''command : IF error THEN INTEGER'''
p[0] = "BAD RELATIONAL EXPRESSION"
def p_command_if_bad2(p):
'''command : IF relexpr THEN error'''
p[0] = "INVALID LINE NUMBER IN THEN"
#### FOR statement
def p_command_for(p):
'''command : FOR ID EQUALS expr TO expr optstep'''
p[0] = ('FOR',p[2],p[4],p[6],p[7])
def p_command_for_bad_initial(p):
'''command : FOR ID EQUALS error TO expr optstep'''
p[0] = "BAD INITIAL VALUE IN FOR STATEMENT"
def p_command_for_bad_final(p):
'''command : FOR ID EQUALS expr TO error optstep'''
p[0] = "BAD FINAL VALUE IN FOR STATEMENT"
def p_command_for_bad_step(p):
'''command : FOR ID EQUALS expr TO expr STEP error'''
p[0] = "MALFORMED STEP IN FOR STATEMENT"
#### Optional STEP qualifier on FOR statement
def p_optstep(p):
'''optstep : STEP expr
| empty'''
if len(p) == 3:
p[0] = p[2]
else:
p[0] = None
#### NEXT statement
def p_command_next(p):
'''command : NEXT ID'''
p[0] = ('NEXT',p[2])
def p_command_next_bad(p):
'''command : NEXT error'''
p[0] = "MALFORMED NEXT"
#### END statement
def p_command_end(p):
'''command : END'''
p[0] = ('END',)
#### REM statement
def p_command_rem(p):
'''command : REM'''
p[0] = ('REM',p[1])
#### STOP statement
def p_command_stop(p):
'''command : STOP'''
p[0] = ('STOP',)
#### DEF statement
def p_command_def(p):
'''command : DEF ID LPAREN ID RPAREN EQUALS expr'''
p[0] = ('FUNC',p[2],p[4],p[7])
def p_command_def_bad_rhs(p):
'''command : DEF ID LPAREN ID RPAREN EQUALS error'''
p[0] = "BAD EXPRESSION IN DEF STATEMENT"
def p_command_def_bad_arg(p):
'''command : DEF ID LPAREN error RPAREN EQUALS expr'''
p[0] = "BAD ARGUMENT IN DEF STATEMENT"
#### GOSUB statement
def p_command_gosub(p):
'''command : GOSUB INTEGER'''
p[0] = ('GOSUB',int(p[2]))
def p_command_gosub_bad(p):
'''command : GOSUB error'''
p[0] = "INVALID LINE NUMBER IN GOSUB"
#### RETURN statement
def p_command_return(p):
'''command : RETURN'''
p[0] = ('RETURN',)
#### DIM statement
def p_command_dim(p):
'''command : DIM dimlist'''
p[0] = ('DIM',p[2])
def p_command_dim_bad(p):
'''command : DIM error'''
p[0] = "MALFORMED VARIABLE LIST IN DIM"
#### List of variables supplied to DIM statement
def p_dimlist(p):
'''dimlist : dimlist COMMA dimitem
| dimitem'''
if len(p) == 4:
p[0] = p[1]
p[0].append(p[3])
else:
p[0] = [p[1]]
#### DIM items
def p_dimitem_single(p):
'''dimitem : ID LPAREN INTEGER RPAREN'''
p[0] = (p[1],eval(p[3]),0)
def p_dimitem_double(p):
'''dimitem : ID LPAREN INTEGER COMMA INTEGER RPAREN'''
p[0] = (p[1],eval(p[3]),eval(p[5]))
#### Arithmetic expressions
def p_expr_binary(p):
'''expr : expr PLUS expr
| expr MINUS expr
| expr TIMES expr
| expr DIVIDE expr
| expr POWER expr'''
p[0] = ('BINOP',p[2],p[1],p[3])
def p_expr_number(p):
'''expr : INTEGER
| FLOAT'''
p[0] = ('NUM',eval(p[1]))
def p_expr_variable(p):
'''expr : variable'''
p[0] = ('VAR',p[1])
def p_expr_group(p):
'''expr : LPAREN expr RPAREN'''
p[0] = ('GROUP',p[2])
def p_expr_unary(p):
'''expr : MINUS expr %prec UMINUS'''
p[0] = ('UNARY','-',p[2])
#### Relational expressions
def p_relexpr(p):
'''relexpr : expr LT expr
| expr LE expr
| expr GT expr
| expr GE expr
| expr EQUALS expr
| expr NE expr'''
p[0] = ('RELOP',p[2],p[1],p[3])
#### Variables
def p_variable(p):
'''variable : ID
| ID LPAREN expr RPAREN
| ID LPAREN expr COMMA expr RPAREN'''
if len(p) == 2:
p[0] = (p[1],None,None)
elif len(p) == 5:
p[0] = (p[1],p[3],None)
else:
p[0] = (p[1],p[3],p[5])
#### Builds a list of variable targets as a Python list
def p_varlist(p):
'''varlist : varlist COMMA variable
| variable'''
if len(p) > 2:
p[0] = p[1]
p[0].append(p[3])
else:
p[0] = [p[1]]
#### Builds a list of numbers as a Python list
def p_numlist(p):
'''numlist : numlist COMMA number
| number'''
if len(p) > 2:
p[0] = p[1]
p[0].append(p[3])
else:
p[0] = [p[1]]
#### A number. May be an integer or a float
def p_number(p):
'''number : INTEGER
| FLOAT'''
p[0] = eval(p[1])
#### A signed number.
def p_number_signed(p):
'''number : MINUS INTEGER
| MINUS FLOAT'''
p[0] = eval("-"+p[2])
#### List of targets for a print statement
#### Returns a list of tuples (label,expr)
def p_plist(p):
'''plist : plist COMMA pitem
| pitem'''
if len(p) > 3:
p[0] = p[1]
p[0].append(p[3])
else:
p[0] = [p[1]]
def p_item_string(p):
'''pitem : STRING'''
p[0] = (p[1][1:-1],None)
def p_item_string_expr(p):
'''pitem : STRING expr'''
p[0] = (p[1][1:-1],p[2])
def p_item_expr(p):
'''pitem : expr'''
p[0] = ("",p[1])
#### Empty
def p_empty(p):
'''empty : '''
#### Catastrophic error handler
def p_error(p):
if not p:
print("SYNTAX ERROR AT EOF")
bparser = yacc.yacc()
def parse(data,debug=0):
bparser.error = 0
p = bparser.parse(data,debug=debug)
if bparser.error: return None
return p

14
ply/example/BASIC/dim.bas Normal file
View File

@ -0,0 +1,14 @@
5 DIM A(50,15)
10 FOR I = 1 TO 50
20 FOR J = 1 TO 15
30 LET A(I,J) = I + J
35 REM PRINT I,J, A(I,J)
40 NEXT J
50 NEXT I
100 FOR I = 1 TO 50
110 FOR J = 1 TO 15
120 PRINT A(I,J),
130 NEXT J
140 PRINT
150 NEXT I
999 END

View File

@ -0,0 +1,5 @@
10 DEF FDX(X) = 2*X
20 FOR I = 0 TO 100
30 PRINT FDX(I)
40 NEXT I
50 END

22
ply/example/BASIC/gcd.bas Normal file
View File

@ -0,0 +1,22 @@
10 PRINT "A","B","C","GCD"
20 READ A,B,C
30 LET X = A
40 LET Y = B
50 GOSUB 200
60 LET X = G
70 LET Y = C
80 GOSUB 200
90 PRINT A, B, C, G
100 GOTO 20
110 DATA 60, 90, 120
120 DATA 38456, 64872, 98765
130 DATA 32, 384, 72
200 LET Q = INT(X/Y)
210 LET R = X - Q*Y
220 IF R = 0 THEN 300
230 LET X = Y
240 LET Y = R
250 GOTO 200
300 LET G = Y
310 RETURN
999 END

View File

@ -0,0 +1,13 @@
100 LET X = 3
110 GOSUB 400
120 PRINT U, V, W
200 LET X = 5
210 GOSUB 400
220 LET Z = U + 2*V + 3*W
230 PRINT Z
240 GOTO 999
400 LET U = X*X
410 LET V = X*X*X
420 LET W = X*X*X*X + X*X*X + X*X + X
430 RETURN
999 END

View File

@ -0,0 +1,4 @@
5 REM HELLO WORLD PROGAM
10 PRINT "HELLO WORLD"
99 END

View File

@ -0,0 +1,17 @@
1 REM ::: SOLVE A SYSTEM OF LINEAR EQUATIONS
2 REM ::: A1*X1 + A2*X2 = B1
3 REM ::: A3*X1 + A4*X2 = B2
4 REM --------------------------------------
10 READ A1, A2, A3, A4
15 LET D = A1 * A4 - A3 * A2
20 IF D = 0 THEN 65
30 READ B1, B2
37 LET X1 = (B1*A4 - B2*A2) / D
42 LET X2 = (A1*B2 - A3*B1) / D
55 PRINT X1, X2
60 GOTO 30
65 PRINT "NO UNIQUE SOLUTION"
70 DATA 1, 2, 4
80 DATA 2, -7, 5
85 DATA 1, 3, 4, -7
90 END

View File

@ -0,0 +1,12 @@
5 PRINT "X VALUE", "SINE", "RESOLUTION"
10 READ D
20 LET M = -1
30 FOR X = 0 TO 3 STEP D
40 IF SIN(X) <= M THEN 80
50 LET X0 = X
60 LET M = SIN(X)
80 NEXT X
85 PRINT X0, M, D
90 GOTO 10
100 DATA .1, .01, .001
110 END

View File

@ -0,0 +1,13 @@
5 PRINT "THIS PROGRAM COMPUTES AND PRINTS THE NTH POWERS"
6 PRINT "OF THE NUMBERS LESS THAN OR EQUAL TO N FOR VARIOUS"
7 PRINT "N FROM 1 THROUGH 7"
8 PRINT
10 FOR N = 1 TO 7
15 PRINT "N = "N
20 FOR I = 1 TO N
30 PRINT I^N,
40 NEXT I
50 PRINT
60 PRINT
70 NEXT N
80 END

View File

@ -0,0 +1,4 @@
10 FOR I = 1 TO 20
20 PRINT INT(10*RND(0))
30 NEXT I
40 END

View File

@ -0,0 +1,20 @@
10 FOR I = 1 TO 3
20 READ P(I)
30 NEXT I
40 FOR I = 1 TO 3
50 FOR J = 1 TO 5
60 READ S(I,J)
70 NEXT J
80 NEXT I
90 FOR J = 1 TO 5
100 LET S = 0
110 FOR I = 1 TO 3
120 LET S = S + P(I) * S(I,J)
130 NEXT I
140 PRINT "TOTAL SALES FOR SALESMAN"J, "$"S
150 NEXT J
200 DATA 1.25, 4.30, 2.50
210 DATA 40, 20, 37, 29, 42
220 DATA 10, 16, 3, 21, 8
230 DATA 35, 47, 29, 16, 33
300 END

View File

@ -0,0 +1,18 @@
1 REM :: THIS PROGRAM COMPUTES HOW MANY TIMES YOU HAVE TO FOLD
2 REM :: A PIECE OF PAPER SO THAT IT IS TALLER THAN THE
3 REM :: SEARS TOWER.
4 REM :: S = HEIGHT OF TOWER (METERS)
5 REM :: T = THICKNESS OF PAPER (MILLIMETERS)
10 LET S = 442
20 LET T = 0.1
30 REM CONVERT T TO METERS
40 LET T = T * .001
50 LET F = 1
60 LET H = T
100 IF H > S THEN 200
120 LET H = 2 * H
125 LET F = F + 1
130 GOTO 100
200 PRINT "NUMBER OF FOLDS ="F
220 PRINT "FINAL HEIGHT ="H
999 END

View File

@ -0,0 +1,5 @@
10 LET X = 0
20 LET X = X + 1
30 PRINT X, SQR(X)
40 IF X < 100 THEN 20
50 END

View File

@ -0,0 +1,4 @@
10 FOR X = 1 TO 100
20 PRINT X, SQR(X)
30 NEXT X
40 END

View File

@ -0,0 +1,709 @@
# GardenSnake - a parser generator demonstration program
#
# This implements a modified version of a subset of Python:
# - only 'def', 'return' and 'if' statements
# - 'if' only has 'then' clause (no elif nor else)
# - single-quoted strings only, content in raw format
# - numbers are decimal.Decimal instances (not integers or floats)
# - no print statment; use the built-in 'print' function
# - only < > == + - / * implemented (and unary + -)
# - assignment and tuple assignment work
# - no generators of any sort
# - no ... well, no quite a lot
# Why? I'm thinking about a new indentation-based configuration
# language for a project and wanted to figure out how to do it. Once
# I got that working I needed a way to test it out. My original AST
# was dumb so I decided to target Python's AST and compile it into
# Python code. Plus, it's pretty cool that it only took a day or so
# from sitting down with Ply to having working code.
# This uses David Beazley's Ply from http://www.dabeaz.com/ply/
# This work is hereby released into the Public Domain. To view a copy of
# the public domain dedication, visit
# http://creativecommons.org/licenses/publicdomain/ or send a letter to
# Creative Commons, 543 Howard Street, 5th Floor, San Francisco,
# California, 94105, USA.
#
# Portions of this work are derived from Python's Grammar definition
# and may be covered under the Python copyright and license
#
# Andrew Dalke / Dalke Scientific Software, LLC
# 30 August 2006 / Cape Town, South Africa
# Changelog:
# 30 August - added link to CC license; removed the "swapcase" encoding
# Modifications for inclusion in PLY distribution
import sys
sys.path.insert(0,"../..")
from ply import *
##### Lexer ######
#import lex
import decimal
tokens = (
'DEF',
'IF',
'NAME',
'NUMBER', # Python decimals
'STRING', # single quoted strings only; syntax of raw strings
'LPAR',
'RPAR',
'COLON',
'EQ',
'ASSIGN',
'LT',
'GT',
'PLUS',
'MINUS',
'MULT',
'DIV',
'RETURN',
'WS',
'NEWLINE',
'COMMA',
'SEMICOLON',
'INDENT',
'DEDENT',
'ENDMARKER',
)
#t_NUMBER = r'\d+'
# taken from decmial.py but without the leading sign
def t_NUMBER(t):
r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?"""
t.value = decimal.Decimal(t.value)
return t
def t_STRING(t):
r"'([^\\']+|\\'|\\\\)*'" # I think this is right ...
t.value=t.value[1:-1].decode("string-escape") # .swapcase() # for fun
return t
t_COLON = r':'
t_EQ = r'=='
t_ASSIGN = r'='
t_LT = r'<'
t_GT = r'>'
t_PLUS = r'\+'
t_MINUS = r'-'
t_MULT = r'\*'
t_DIV = r'/'
t_COMMA = r','
t_SEMICOLON = r';'
# Ply nicely documented how to do this.
RESERVED = {
"def": "DEF",
"if": "IF",
"return": "RETURN",
}
def t_NAME(t):
r'[a-zA-Z_][a-zA-Z0-9_]*'
t.type = RESERVED.get(t.value, "NAME")
return t
# Putting this before t_WS let it consume lines with only comments in
# them so the latter code never sees the WS part. Not consuming the
# newline. Needed for "if 1: #comment"
def t_comment(t):
r"[ ]*\043[^\n]*" # \043 is '#'
pass
# Whitespace
def t_WS(t):
r' [ ]+ '
if t.lexer.at_line_start and t.lexer.paren_count == 0:
return t
# Don't generate newline tokens when inside of parenthesis, eg
# a = (1,
# 2, 3)
def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)
t.type = "NEWLINE"
if t.lexer.paren_count == 0:
return t
def t_LPAR(t):
r'\('
t.lexer.paren_count += 1
return t
def t_RPAR(t):
r'\)'
# check for underflow? should be the job of the parser
t.lexer.paren_count -= 1
return t
def t_error(t):
raise SyntaxError("Unknown symbol %r" % (t.value[0],))
print "Skipping", repr(t.value[0])
t.lexer.skip(1)
## I implemented INDENT / DEDENT generation as a post-processing filter
# The original lex token stream contains WS and NEWLINE characters.
# WS will only occur before any other tokens on a line.
# I have three filters. One tags tokens by adding two attributes.
# "must_indent" is True if the token must be indented from the
# previous code. The other is "at_line_start" which is True for WS
# and the first non-WS/non-NEWLINE on a line. It flags the check so
# see if the new line has changed indication level.
# Python's syntax has three INDENT states
# 0) no colon hence no need to indent
# 1) "if 1: go()" - simple statements have a COLON but no need for an indent
# 2) "if 1:\n go()" - complex statements have a COLON NEWLINE and must indent
NO_INDENT = 0
MAY_INDENT = 1
MUST_INDENT = 2
# only care about whitespace at the start of a line
def track_tokens_filter(lexer, tokens):
lexer.at_line_start = at_line_start = True
indent = NO_INDENT
saw_colon = False
for token in tokens:
token.at_line_start = at_line_start
if token.type == "COLON":
at_line_start = False
indent = MAY_INDENT
token.must_indent = False
elif token.type == "NEWLINE":
at_line_start = True
if indent == MAY_INDENT:
indent = MUST_INDENT
token.must_indent = False
elif token.type == "WS":
assert token.at_line_start == True
at_line_start = True
token.must_indent = False
else:
# A real token; only indent after COLON NEWLINE
if indent == MUST_INDENT:
token.must_indent = True
else:
token.must_indent = False
at_line_start = False
indent = NO_INDENT
yield token
lexer.at_line_start = at_line_start
def _new_token(type, lineno):
tok = lex.LexToken()
tok.type = type
tok.value = None
tok.lineno = lineno
return tok
# Synthesize a DEDENT tag
def DEDENT(lineno):
return _new_token("DEDENT", lineno)
# Synthesize an INDENT tag
def INDENT(lineno):
return _new_token("INDENT", lineno)
# Track the indentation level and emit the right INDENT / DEDENT events.
def indentation_filter(tokens):
# A stack of indentation levels; will never pop item 0
levels = [0]
token = None
depth = 0
prev_was_ws = False
for token in tokens:
## if 1:
## print "Process", token,
## if token.at_line_start:
## print "at_line_start",
## if token.must_indent:
## print "must_indent",
## print
# WS only occurs at the start of the line
# There may be WS followed by NEWLINE so
# only track the depth here. Don't indent/dedent
# until there's something real.
if token.type == "WS":
assert depth == 0
depth = len(token.value)
prev_was_ws = True
# WS tokens are never passed to the parser
continue
if token.type == "NEWLINE":
depth = 0
if prev_was_ws or token.at_line_start:
# ignore blank lines
continue
# pass the other cases on through
yield token
continue
# then it must be a real token (not WS, not NEWLINE)
# which can affect the indentation level
prev_was_ws = False
if token.must_indent:
# The current depth must be larger than the previous level
if not (depth > levels[-1]):
raise IndentationError("expected an indented block")
levels.append(depth)
yield INDENT(token.lineno)
elif token.at_line_start:
# Must be on the same level or one of the previous levels
if depth == levels[-1]:
# At the same level
pass
elif depth > levels[-1]:
raise IndentationError("indentation increase but not in new block")
else:
# Back up; but only if it matches a previous level
try:
i = levels.index(depth)
except ValueError:
raise IndentationError("inconsistent indentation")
for _ in range(i+1, len(levels)):
yield DEDENT(token.lineno)
levels.pop()
yield token
### Finished processing ###
# Must dedent any remaining levels
if len(levels) > 1:
assert token is not None
for _ in range(1, len(levels)):
yield DEDENT(token.lineno)
# The top-level filter adds an ENDMARKER, if requested.
# Python's grammar uses it.
def filter(lexer, add_endmarker = True):
token = None
tokens = iter(lexer.token, None)
tokens = track_tokens_filter(lexer, tokens)
for token in indentation_filter(tokens):
yield token
if add_endmarker:
lineno = 1
if token is not None:
lineno = token.lineno
yield _new_token("ENDMARKER", lineno)
# Combine Ply and my filters into a new lexer
class IndentLexer(object):
def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0):
self.lexer = lex.lex(debug=debug, optimize=optimize, lextab=lextab, reflags=reflags)
self.token_stream = None
def input(self, s, add_endmarker=True):
self.lexer.paren_count = 0
self.lexer.input(s)
self.token_stream = filter(self.lexer, add_endmarker)
def token(self):
try:
return self.token_stream.next()
except StopIteration:
return None
########## Parser (tokens -> AST) ######
# also part of Ply
#import yacc
# I use the Python AST
from compiler import ast
# Helper function
def Assign(left, right):
names = []
if isinstance(left, ast.Name):
# Single assignment on left
return ast.Assign([ast.AssName(left.name, 'OP_ASSIGN')], right)
elif isinstance(left, ast.Tuple):
# List of things - make sure they are Name nodes
names = []
for child in left.getChildren():
if not isinstance(child, ast.Name):
raise SyntaxError("that assignment not supported")
names.append(child.name)
ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names]
return ast.Assign([ast.AssTuple(ass_list)], right)
else:
raise SyntaxError("Can't do that yet")
# The grammar comments come from Python's Grammar/Grammar file
## NB: compound_stmt in single_input is followed by extra NEWLINE!
# file_input: (NEWLINE | stmt)* ENDMARKER
def p_file_input_end(p):
"""file_input_end : file_input ENDMARKER"""
p[0] = ast.Stmt(p[1])
def p_file_input(p):
"""file_input : file_input NEWLINE
| file_input stmt
| NEWLINE
| stmt"""
if isinstance(p[len(p)-1], basestring):
if len(p) == 3:
p[0] = p[1]
else:
p[0] = [] # p == 2 --> only a blank line
else:
if len(p) == 3:
p[0] = p[1] + p[2]
else:
p[0] = p[1]
# funcdef: [decorators] 'def' NAME parameters ':' suite
# ignoring decorators
def p_funcdef(p):
"funcdef : DEF NAME parameters COLON suite"
p[0] = ast.Function(None, p[2], tuple(p[3]), (), 0, None, p[5])
# parameters: '(' [varargslist] ')'
def p_parameters(p):
"""parameters : LPAR RPAR
| LPAR varargslist RPAR"""
if len(p) == 3:
p[0] = []
else:
p[0] = p[2]
# varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) |
# highly simplified
def p_varargslist(p):
"""varargslist : varargslist COMMA NAME
| NAME"""
if len(p) == 4:
p[0] = p[1] + p[3]
else:
p[0] = [p[1]]
# stmt: simple_stmt | compound_stmt
def p_stmt_simple(p):
"""stmt : simple_stmt"""
# simple_stmt is a list
p[0] = p[1]
def p_stmt_compound(p):
"""stmt : compound_stmt"""
p[0] = [p[1]]
# simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
def p_simple_stmt(p):
"""simple_stmt : small_stmts NEWLINE
| small_stmts SEMICOLON NEWLINE"""
p[0] = p[1]
def p_small_stmts(p):
"""small_stmts : small_stmts SEMICOLON small_stmt
| small_stmt"""
if len(p) == 4:
p[0] = p[1] + [p[3]]
else:
p[0] = [p[1]]
# small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
# import_stmt | global_stmt | exec_stmt | assert_stmt
def p_small_stmt(p):
"""small_stmt : flow_stmt
| expr_stmt"""
p[0] = p[1]
# expr_stmt: testlist (augassign (yield_expr|testlist) |
# ('=' (yield_expr|testlist))*)
# augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
# '<<=' | '>>=' | '**=' | '//=')
def p_expr_stmt(p):
"""expr_stmt : testlist ASSIGN testlist
| testlist """
if len(p) == 2:
# a list of expressions
p[0] = ast.Discard(p[1])
else:
p[0] = Assign(p[1], p[3])
def p_flow_stmt(p):
"flow_stmt : return_stmt"
p[0] = p[1]
# return_stmt: 'return' [testlist]
def p_return_stmt(p):
"return_stmt : RETURN testlist"
p[0] = ast.Return(p[2])
def p_compound_stmt(p):
"""compound_stmt : if_stmt
| funcdef"""
p[0] = p[1]
def p_if_stmt(p):
'if_stmt : IF test COLON suite'
p[0] = ast.If([(p[2], p[4])], None)
def p_suite(p):
"""suite : simple_stmt
| NEWLINE INDENT stmts DEDENT"""
if len(p) == 2:
p[0] = ast.Stmt(p[1])
else:
p[0] = ast.Stmt(p[3])
def p_stmts(p):
"""stmts : stmts stmt
| stmt"""
if len(p) == 3:
p[0] = p[1] + p[2]
else:
p[0] = p[1]
## No using Python's approach because Ply supports precedence
# comparison: expr (comp_op expr)*
# arith_expr: term (('+'|'-') term)*
# term: factor (('*'|'/'|'%'|'//') factor)*
# factor: ('+'|'-'|'~') factor | power
# comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
def make_lt_compare((left, right)):
return ast.Compare(left, [('<', right),])
def make_gt_compare((left, right)):
return ast.Compare(left, [('>', right),])
def make_eq_compare((left, right)):
return ast.Compare(left, [('==', right),])
binary_ops = {
"+": ast.Add,
"-": ast.Sub,
"*": ast.Mul,
"/": ast.Div,
"<": make_lt_compare,
">": make_gt_compare,
"==": make_eq_compare,
}
unary_ops = {
"+": ast.UnaryAdd,
"-": ast.UnarySub,
}
precedence = (
("left", "EQ", "GT", "LT"),
("left", "PLUS", "MINUS"),
("left", "MULT", "DIV"),
)
def p_comparison(p):
"""comparison : comparison PLUS comparison
| comparison MINUS comparison
| comparison MULT comparison
| comparison DIV comparison
| comparison LT comparison
| comparison EQ comparison
| comparison GT comparison
| PLUS comparison
| MINUS comparison
| power"""
if len(p) == 4:
p[0] = binary_ops[p[2]]((p[1], p[3]))
elif len(p) == 3:
p[0] = unary_ops[p[1]](p[2])
else:
p[0] = p[1]
# power: atom trailer* ['**' factor]
# trailers enables function calls. I only allow one level of calls
# so this is 'trailer'
def p_power(p):
"""power : atom
| atom trailer"""
if len(p) == 2:
p[0] = p[1]
else:
if p[2][0] == "CALL":
p[0] = ast.CallFunc(p[1], p[2][1], None, None)
else:
raise AssertionError("not implemented")
def p_atom_name(p):
"""atom : NAME"""
p[0] = ast.Name(p[1])
def p_atom_number(p):
"""atom : NUMBER
| STRING"""
p[0] = ast.Const(p[1])
def p_atom_tuple(p):
"""atom : LPAR testlist RPAR"""
p[0] = p[2]
# trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
def p_trailer(p):
"trailer : LPAR arglist RPAR"
p[0] = ("CALL", p[2])
# testlist: test (',' test)* [',']
# Contains shift/reduce error
def p_testlist(p):
"""testlist : testlist_multi COMMA
| testlist_multi """
if len(p) == 2:
p[0] = p[1]
else:
# May need to promote singleton to tuple
if isinstance(p[1], list):
p[0] = p[1]
else:
p[0] = [p[1]]
# Convert into a tuple?
if isinstance(p[0], list):
p[0] = ast.Tuple(p[0])
def p_testlist_multi(p):
"""testlist_multi : testlist_multi COMMA test
| test"""
if len(p) == 2:
# singleton
p[0] = p[1]
else:
if isinstance(p[1], list):
p[0] = p[1] + [p[3]]
else:
# singleton -> tuple
p[0] = [p[1], p[3]]
# test: or_test ['if' or_test 'else' test] | lambdef
# as I don't support 'and', 'or', and 'not' this works down to 'comparison'
def p_test(p):
"test : comparison"
p[0] = p[1]
# arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test)
# XXX INCOMPLETE: this doesn't allow the trailing comma
def p_arglist(p):
"""arglist : arglist COMMA argument
| argument"""
if len(p) == 4:
p[0] = p[1] + [p[3]]
else:
p[0] = [p[1]]
# argument: test [gen_for] | test '=' test # Really [keyword '='] test
def p_argument(p):
"argument : test"
p[0] = p[1]
def p_error(p):
#print "Error!", repr(p)
raise SyntaxError(p)
class GardenSnakeParser(object):
def __init__(self, lexer = None):
if lexer is None:
lexer = IndentLexer()
self.lexer = lexer
self.parser = yacc.yacc(start="file_input_end")
def parse(self, code):
self.lexer.input(code)
result = self.parser.parse(lexer = self.lexer)
return ast.Module(None, result)
###### Code generation ######
from compiler import misc, syntax, pycodegen
class GardenSnakeCompiler(object):
def __init__(self):
self.parser = GardenSnakeParser()
def compile(self, code, filename="<string>"):
tree = self.parser.parse(code)
#print tree
misc.set_filename(filename, tree)
syntax.check(tree)
gen = pycodegen.ModuleCodeGenerator(tree)
code = gen.getCode()
return code
####### Test code #######
compile = GardenSnakeCompiler().compile
code = r"""
print('LET\'S TRY THIS \\OUT')
#Comment here
def x(a):
print('called with',a)
if a == 1:
return 2
if a*2 > 10: return 999 / 4
# Another comment here
return a+2*3
ints = (1, 2,
3, 4,
5)
print('mutiline-expression', ints)
t = 4+1/3*2+6*(9-5+1)
print('predence test; should be 34+2/3:', t, t==(34+2/3))
print('numbers', 1,2,3,4,5)
if 1:
8
a=9
print(x(a))
print(x(1))
print(x(2))
print(x(8),'3')
print('this is decimal', 1/5)
print('BIG DECIMAL', 1.234567891234567e12345)
"""
# Set up the GardenSnake run-time environment
def print_(*args):
print "-->", " ".join(map(str,args))
globals()["print"] = print_
compiled_code = compile(code)
exec compiled_code in globals()
print "Done"

View File

@ -0,0 +1,5 @@
This example is Andrew Dalke's GardenSnake language. It shows how to process an
indentation-like language like Python. Further details can be found here:
http://dalkescientific.com/writings/diary/archive/2006/08/30/gardensnake_language.html

10
ply/example/README Normal file
View File

@ -0,0 +1,10 @@
Simple examples:
calc - Simple calculator
classcalc - Simple calculate defined as a class
Complex examples
ansic - ANSI C grammar from K&R
BASIC - A small BASIC interpreter
GardenSnake - A simple python-like language
yply - Converts Unix yacc files to PLY programs.

2
ply/example/ansic/README Normal file
View File

@ -0,0 +1,2 @@
This example is incomplete. Was going to specify an ANSI C parser.
This is part of it.

164
ply/example/ansic/clex.py Normal file
View File

@ -0,0 +1,164 @@
# ----------------------------------------------------------------------
# clex.py
#
# A lexer for ANSI C.
# ----------------------------------------------------------------------
import sys
sys.path.insert(0,"../..")
import ply.lex as lex
# Reserved words
reserved = (
'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE',
'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER',
'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF',
'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE',
)
tokens = reserved + (
# Literals (identifier, integer constant, float constant, string constant, char const)
'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST',
# Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
'LOR', 'LAND', 'LNOT',
'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
# Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
# Increment/decrement (++,--)
'PLUSPLUS', 'MINUSMINUS',
# Structure dereference (->)
'ARROW',
# Conditional operator (?)
'CONDOP',
# Delimeters ( ) [ ] { } , . ; :
'LPAREN', 'RPAREN',
'LBRACKET', 'RBRACKET',
'LBRACE', 'RBRACE',
'COMMA', 'PERIOD', 'SEMI', 'COLON',
# Ellipsis (...)
'ELLIPSIS',
)
# Completely ignored characters
t_ignore = ' \t\x0c'
# Newlines
def t_NEWLINE(t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
# Operators
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_MOD = r'%'
t_OR = r'\|'
t_AND = r'&'
t_NOT = r'~'
t_XOR = r'\^'
t_LSHIFT = r'<<'
t_RSHIFT = r'>>'
t_LOR = r'\|\|'
t_LAND = r'&&'
t_LNOT = r'!'
t_LT = r'<'
t_GT = r'>'
t_LE = r'<='
t_GE = r'>='
t_EQ = r'=='
t_NE = r'!='
# Assignment operators
t_EQUALS = r'='
t_TIMESEQUAL = r'\*='
t_DIVEQUAL = r'/='
t_MODEQUAL = r'%='
t_PLUSEQUAL = r'\+='
t_MINUSEQUAL = r'-='
t_LSHIFTEQUAL = r'<<='
t_RSHIFTEQUAL = r'>>='
t_ANDEQUAL = r'&='
t_OREQUAL = r'\|='
t_XOREQUAL = r'^='
# Increment/decrement
t_PLUSPLUS = r'\+\+'
t_MINUSMINUS = r'--'
# ->
t_ARROW = r'->'
# ?
t_CONDOP = r'\?'
# Delimeters
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_LBRACE = r'\{'
t_RBRACE = r'\}'
t_COMMA = r','
t_PERIOD = r'\.'
t_SEMI = r';'
t_COLON = r':'
t_ELLIPSIS = r'\.\.\.'
# Identifiers and reserved words
reserved_map = { }
for r in reserved:
reserved_map[r.lower()] = r
def t_ID(t):
r'[A-Za-z_][\w_]*'
t.type = reserved_map.get(t.value,"ID")
return t
# Integer literal
t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
# Floating literal
t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
# String literal
t_SCONST = r'\"([^\\\n]|(\\.))*?\"'
# Character constant 'c' or L'c'
t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\''
# Comments
def t_comment(t):
r'/\*(.|\n)*?\*/'
t.lexer.lineno += t.value.count('\n')
# Preprocessor directive (ignored)
def t_preprocessor(t):
r'\#(.)*?\n'
t.lexer.lineno += 1
def t_error(t):
print("Illegal character %s" % repr(t.value[0]))
t.lexer.skip(1)
lexer = lex.lex(optimize=1)
if __name__ == "__main__":
lex.runmain(lexer)

863
ply/example/ansic/cparse.py Normal file
View File

@ -0,0 +1,863 @@
# -----------------------------------------------------------------------------
# cparse.py
#
# Simple parser for ANSI C. Based on the grammar in K&R, 2nd Ed.
# -----------------------------------------------------------------------------
import sys
import clex
import ply.yacc as yacc
# Get the token map
tokens = clex.tokens
# translation-unit:
def p_translation_unit_1(t):
'translation_unit : external_declaration'
pass
def p_translation_unit_2(t):
'translation_unit : translation_unit external_declaration'
pass
# external-declaration:
def p_external_declaration_1(t):
'external_declaration : function_definition'
pass
def p_external_declaration_2(t):
'external_declaration : declaration'
pass
# function-definition:
def p_function_definition_1(t):
'function_definition : declaration_specifiers declarator declaration_list compound_statement'
pass
def p_function_definition_2(t):
'function_definition : declarator declaration_list compound_statement'
pass
def p_function_definition_3(t):
'function_definition : declarator compound_statement'
pass
def p_function_definition_4(t):
'function_definition : declaration_specifiers declarator compound_statement'
pass
# declaration:
def p_declaration_1(t):
'declaration : declaration_specifiers init_declarator_list SEMI'
pass
def p_declaration_2(t):
'declaration : declaration_specifiers SEMI'
pass
# declaration-list:
def p_declaration_list_1(t):
'declaration_list : declaration'
pass
def p_declaration_list_2(t):
'declaration_list : declaration_list declaration '
pass
# declaration-specifiers
def p_declaration_specifiers_1(t):
'declaration_specifiers : storage_class_specifier declaration_specifiers'
pass
def p_declaration_specifiers_2(t):
'declaration_specifiers : type_specifier declaration_specifiers'
pass
def p_declaration_specifiers_3(t):
'declaration_specifiers : type_qualifier declaration_specifiers'
pass
def p_declaration_specifiers_4(t):
'declaration_specifiers : storage_class_specifier'
pass
def p_declaration_specifiers_5(t):
'declaration_specifiers : type_specifier'
pass
def p_declaration_specifiers_6(t):
'declaration_specifiers : type_qualifier'
pass
# storage-class-specifier
def p_storage_class_specifier(t):
'''storage_class_specifier : AUTO
| REGISTER
| STATIC
| EXTERN
| TYPEDEF
'''
pass
# type-specifier:
def p_type_specifier(t):
'''type_specifier : VOID
| CHAR
| SHORT
| INT
| LONG
| FLOAT
| DOUBLE
| SIGNED
| UNSIGNED
| struct_or_union_specifier
| enum_specifier
| TYPEID
'''
pass
# type-qualifier:
def p_type_qualifier(t):
'''type_qualifier : CONST
| VOLATILE'''
pass
# struct-or-union-specifier
def p_struct_or_union_specifier_1(t):
'struct_or_union_specifier : struct_or_union ID LBRACE struct_declaration_list RBRACE'
pass
def p_struct_or_union_specifier_2(t):
'struct_or_union_specifier : struct_or_union LBRACE struct_declaration_list RBRACE'
pass
def p_struct_or_union_specifier_3(t):
'struct_or_union_specifier : struct_or_union ID'
pass
# struct-or-union:
def p_struct_or_union(t):
'''struct_or_union : STRUCT
| UNION
'''
pass
# struct-declaration-list:
def p_struct_declaration_list_1(t):
'struct_declaration_list : struct_declaration'
pass
def p_struct_declaration_list_2(t):
'struct_declaration_list : struct_declaration_list struct_declaration'
pass
# init-declarator-list:
def p_init_declarator_list_1(t):
'init_declarator_list : init_declarator'
pass
def p_init_declarator_list_2(t):
'init_declarator_list : init_declarator_list COMMA init_declarator'
pass
# init-declarator
def p_init_declarator_1(t):
'init_declarator : declarator'
pass
def p_init_declarator_2(t):
'init_declarator : declarator EQUALS initializer'
pass
# struct-declaration:
def p_struct_declaration(t):
'struct_declaration : specifier_qualifier_list struct_declarator_list SEMI'
pass
# specifier-qualifier-list:
def p_specifier_qualifier_list_1(t):
'specifier_qualifier_list : type_specifier specifier_qualifier_list'
pass
def p_specifier_qualifier_list_2(t):
'specifier_qualifier_list : type_specifier'
pass
def p_specifier_qualifier_list_3(t):
'specifier_qualifier_list : type_qualifier specifier_qualifier_list'
pass
def p_specifier_qualifier_list_4(t):
'specifier_qualifier_list : type_qualifier'
pass
# struct-declarator-list:
def p_struct_declarator_list_1(t):
'struct_declarator_list : struct_declarator'
pass
def p_struct_declarator_list_2(t):
'struct_declarator_list : struct_declarator_list COMMA struct_declarator'
pass
# struct-declarator:
def p_struct_declarator_1(t):
'struct_declarator : declarator'
pass
def p_struct_declarator_2(t):
'struct_declarator : declarator COLON constant_expression'
pass
def p_struct_declarator_3(t):
'struct_declarator : COLON constant_expression'
pass
# enum-specifier:
def p_enum_specifier_1(t):
'enum_specifier : ENUM ID LBRACE enumerator_list RBRACE'
pass
def p_enum_specifier_2(t):
'enum_specifier : ENUM LBRACE enumerator_list RBRACE'
pass
def p_enum_specifier_3(t):
'enum_specifier : ENUM ID'
pass
# enumerator_list:
def p_enumerator_list_1(t):
'enumerator_list : enumerator'
pass
def p_enumerator_list_2(t):
'enumerator_list : enumerator_list COMMA enumerator'
pass
# enumerator:
def p_enumerator_1(t):
'enumerator : ID'
pass
def p_enumerator_2(t):
'enumerator : ID EQUALS constant_expression'
pass
# declarator:
def p_declarator_1(t):
'declarator : pointer direct_declarator'
pass
def p_declarator_2(t):
'declarator : direct_declarator'
pass
# direct-declarator:
def p_direct_declarator_1(t):
'direct_declarator : ID'
pass
def p_direct_declarator_2(t):
'direct_declarator : LPAREN declarator RPAREN'
pass
def p_direct_declarator_3(t):
'direct_declarator : direct_declarator LBRACKET constant_expression_opt RBRACKET'
pass
def p_direct_declarator_4(t):
'direct_declarator : direct_declarator LPAREN parameter_type_list RPAREN '
pass
def p_direct_declarator_5(t):
'direct_declarator : direct_declarator LPAREN identifier_list RPAREN '
pass
def p_direct_declarator_6(t):
'direct_declarator : direct_declarator LPAREN RPAREN '
pass
# pointer:
def p_pointer_1(t):
'pointer : TIMES type_qualifier_list'
pass
def p_pointer_2(t):
'pointer : TIMES'
pass
def p_pointer_3(t):
'pointer : TIMES type_qualifier_list pointer'
pass
def p_pointer_4(t):
'pointer : TIMES pointer'
pass
# type-qualifier-list:
def p_type_qualifier_list_1(t):
'type_qualifier_list : type_qualifier'
pass
def p_type_qualifier_list_2(t):
'type_qualifier_list : type_qualifier_list type_qualifier'
pass
# parameter-type-list:
def p_parameter_type_list_1(t):
'parameter_type_list : parameter_list'
pass
def p_parameter_type_list_2(t):
'parameter_type_list : parameter_list COMMA ELLIPSIS'
pass
# parameter-list:
def p_parameter_list_1(t):
'parameter_list : parameter_declaration'
pass
def p_parameter_list_2(t):
'parameter_list : parameter_list COMMA parameter_declaration'
pass
# parameter-declaration:
def p_parameter_declaration_1(t):
'parameter_declaration : declaration_specifiers declarator'
pass
def p_parameter_declaration_2(t):
'parameter_declaration : declaration_specifiers abstract_declarator_opt'
pass
# identifier-list:
def p_identifier_list_1(t):
'identifier_list : ID'
pass
def p_identifier_list_2(t):
'identifier_list : identifier_list COMMA ID'
pass
# initializer:
def p_initializer_1(t):
'initializer : assignment_expression'
pass
def p_initializer_2(t):
'''initializer : LBRACE initializer_list RBRACE
| LBRACE initializer_list COMMA RBRACE'''
pass
# initializer-list:
def p_initializer_list_1(t):
'initializer_list : initializer'
pass
def p_initializer_list_2(t):
'initializer_list : initializer_list COMMA initializer'
pass
# type-name:
def p_type_name(t):
'type_name : specifier_qualifier_list abstract_declarator_opt'
pass
def p_abstract_declarator_opt_1(t):
'abstract_declarator_opt : empty'
pass
def p_abstract_declarator_opt_2(t):
'abstract_declarator_opt : abstract_declarator'
pass
# abstract-declarator:
def p_abstract_declarator_1(t):
'abstract_declarator : pointer '
pass
def p_abstract_declarator_2(t):
'abstract_declarator : pointer direct_abstract_declarator'
pass
def p_abstract_declarator_3(t):
'abstract_declarator : direct_abstract_declarator'
pass
# direct-abstract-declarator:
def p_direct_abstract_declarator_1(t):
'direct_abstract_declarator : LPAREN abstract_declarator RPAREN'
pass
def p_direct_abstract_declarator_2(t):
'direct_abstract_declarator : direct_abstract_declarator LBRACKET constant_expression_opt RBRACKET'
pass
def p_direct_abstract_declarator_3(t):
'direct_abstract_declarator : LBRACKET constant_expression_opt RBRACKET'
pass
def p_direct_abstract_declarator_4(t):
'direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN'
pass
def p_direct_abstract_declarator_5(t):
'direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN'
pass
# Optional fields in abstract declarators
def p_constant_expression_opt_1(t):
'constant_expression_opt : empty'
pass
def p_constant_expression_opt_2(t):
'constant_expression_opt : constant_expression'
pass
def p_parameter_type_list_opt_1(t):
'parameter_type_list_opt : empty'
pass
def p_parameter_type_list_opt_2(t):
'parameter_type_list_opt : parameter_type_list'
pass
# statement:
def p_statement(t):
'''
statement : labeled_statement
| expression_statement
| compound_statement
| selection_statement
| iteration_statement
| jump_statement
'''
pass
# labeled-statement:
def p_labeled_statement_1(t):
'labeled_statement : ID COLON statement'
pass
def p_labeled_statement_2(t):
'labeled_statement : CASE constant_expression COLON statement'
pass
def p_labeled_statement_3(t):
'labeled_statement : DEFAULT COLON statement'
pass
# expression-statement:
def p_expression_statement(t):
'expression_statement : expression_opt SEMI'
pass
# compound-statement:
def p_compound_statement_1(t):
'compound_statement : LBRACE declaration_list statement_list RBRACE'
pass
def p_compound_statement_2(t):
'compound_statement : LBRACE statement_list RBRACE'
pass
def p_compound_statement_3(t):
'compound_statement : LBRACE declaration_list RBRACE'
pass
def p_compound_statement_4(t):
'compound_statement : LBRACE RBRACE'
pass
# statement-list:
def p_statement_list_1(t):
'statement_list : statement'
pass
def p_statement_list_2(t):
'statement_list : statement_list statement'
pass
# selection-statement
def p_selection_statement_1(t):
'selection_statement : IF LPAREN expression RPAREN statement'
pass
def p_selection_statement_2(t):
'selection_statement : IF LPAREN expression RPAREN statement ELSE statement '
pass
def p_selection_statement_3(t):
'selection_statement : SWITCH LPAREN expression RPAREN statement '
pass
# iteration_statement:
def p_iteration_statement_1(t):
'iteration_statement : WHILE LPAREN expression RPAREN statement'
pass
def p_iteration_statement_2(t):
'iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN statement '
pass
def p_iteration_statement_3(t):
'iteration_statement : DO statement WHILE LPAREN expression RPAREN SEMI'
pass
# jump_statement:
def p_jump_statement_1(t):
'jump_statement : GOTO ID SEMI'
pass
def p_jump_statement_2(t):
'jump_statement : CONTINUE SEMI'
pass
def p_jump_statement_3(t):
'jump_statement : BREAK SEMI'
pass
def p_jump_statement_4(t):
'jump_statement : RETURN expression_opt SEMI'
pass
def p_expression_opt_1(t):
'expression_opt : empty'
pass
def p_expression_opt_2(t):
'expression_opt : expression'
pass
# expression:
def p_expression_1(t):
'expression : assignment_expression'
pass
def p_expression_2(t):
'expression : expression COMMA assignment_expression'
pass
# assigment_expression:
def p_assignment_expression_1(t):
'assignment_expression : conditional_expression'
pass
def p_assignment_expression_2(t):
'assignment_expression : unary_expression assignment_operator assignment_expression'
pass
# assignment_operator:
def p_assignment_operator(t):
'''
assignment_operator : EQUALS
| TIMESEQUAL
| DIVEQUAL
| MODEQUAL
| PLUSEQUAL
| MINUSEQUAL
| LSHIFTEQUAL
| RSHIFTEQUAL
| ANDEQUAL
| OREQUAL
| XOREQUAL
'''
pass
# conditional-expression
def p_conditional_expression_1(t):
'conditional_expression : logical_or_expression'
pass
def p_conditional_expression_2(t):
'conditional_expression : logical_or_expression CONDOP expression COLON conditional_expression '
pass
# constant-expression
def p_constant_expression(t):
'constant_expression : conditional_expression'
pass
# logical-or-expression
def p_logical_or_expression_1(t):
'logical_or_expression : logical_and_expression'
pass
def p_logical_or_expression_2(t):
'logical_or_expression : logical_or_expression LOR logical_and_expression'
pass
# logical-and-expression
def p_logical_and_expression_1(t):
'logical_and_expression : inclusive_or_expression'
pass
def p_logical_and_expression_2(t):
'logical_and_expression : logical_and_expression LAND inclusive_or_expression'
pass
# inclusive-or-expression:
def p_inclusive_or_expression_1(t):
'inclusive_or_expression : exclusive_or_expression'
pass
def p_inclusive_or_expression_2(t):
'inclusive_or_expression : inclusive_or_expression OR exclusive_or_expression'
pass
# exclusive-or-expression:
def p_exclusive_or_expression_1(t):
'exclusive_or_expression : and_expression'
pass
def p_exclusive_or_expression_2(t):
'exclusive_or_expression : exclusive_or_expression XOR and_expression'
pass
# AND-expression
def p_and_expression_1(t):
'and_expression : equality_expression'
pass
def p_and_expression_2(t):
'and_expression : and_expression AND equality_expression'
pass
# equality-expression:
def p_equality_expression_1(t):
'equality_expression : relational_expression'
pass
def p_equality_expression_2(t):
'equality_expression : equality_expression EQ relational_expression'
pass
def p_equality_expression_3(t):
'equality_expression : equality_expression NE relational_expression'
pass
# relational-expression:
def p_relational_expression_1(t):
'relational_expression : shift_expression'
pass
def p_relational_expression_2(t):
'relational_expression : relational_expression LT shift_expression'
pass
def p_relational_expression_3(t):
'relational_expression : relational_expression GT shift_expression'
pass
def p_relational_expression_4(t):
'relational_expression : relational_expression LE shift_expression'
pass
def p_relational_expression_5(t):
'relational_expression : relational_expression GE shift_expression'
pass
# shift-expression
def p_shift_expression_1(t):
'shift_expression : additive_expression'
pass
def p_shift_expression_2(t):
'shift_expression : shift_expression LSHIFT additive_expression'
pass
def p_shift_expression_3(t):
'shift_expression : shift_expression RSHIFT additive_expression'
pass
# additive-expression
def p_additive_expression_1(t):
'additive_expression : multiplicative_expression'
pass
def p_additive_expression_2(t):
'additive_expression : additive_expression PLUS multiplicative_expression'
pass
def p_additive_expression_3(t):
'additive_expression : additive_expression MINUS multiplicative_expression'
pass
# multiplicative-expression
def p_multiplicative_expression_1(t):
'multiplicative_expression : cast_expression'
pass
def p_multiplicative_expression_2(t):
'multiplicative_expression : multiplicative_expression TIMES cast_expression'
pass
def p_multiplicative_expression_3(t):
'multiplicative_expression : multiplicative_expression DIVIDE cast_expression'
pass
def p_multiplicative_expression_4(t):
'multiplicative_expression : multiplicative_expression MOD cast_expression'
pass
# cast-expression:
def p_cast_expression_1(t):
'cast_expression : unary_expression'
pass
def p_cast_expression_2(t):
'cast_expression : LPAREN type_name RPAREN cast_expression'
pass
# unary-expression:
def p_unary_expression_1(t):
'unary_expression : postfix_expression'
pass
def p_unary_expression_2(t):
'unary_expression : PLUSPLUS unary_expression'
pass
def p_unary_expression_3(t):
'unary_expression : MINUSMINUS unary_expression'
pass
def p_unary_expression_4(t):
'unary_expression : unary_operator cast_expression'
pass
def p_unary_expression_5(t):
'unary_expression : SIZEOF unary_expression'
pass
def p_unary_expression_6(t):
'unary_expression : SIZEOF LPAREN type_name RPAREN'
pass
#unary-operator
def p_unary_operator(t):
'''unary_operator : AND
| TIMES
| PLUS
| MINUS
| NOT
| LNOT '''
pass
# postfix-expression:
def p_postfix_expression_1(t):
'postfix_expression : primary_expression'
pass
def p_postfix_expression_2(t):
'postfix_expression : postfix_expression LBRACKET expression RBRACKET'
pass
def p_postfix_expression_3(t):
'postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN'
pass
def p_postfix_expression_4(t):
'postfix_expression : postfix_expression LPAREN RPAREN'
pass
def p_postfix_expression_5(t):
'postfix_expression : postfix_expression PERIOD ID'
pass
def p_postfix_expression_6(t):
'postfix_expression : postfix_expression ARROW ID'
pass
def p_postfix_expression_7(t):
'postfix_expression : postfix_expression PLUSPLUS'
pass
def p_postfix_expression_8(t):
'postfix_expression : postfix_expression MINUSMINUS'
pass
# primary-expression:
def p_primary_expression(t):
'''primary_expression : ID
| constant
| SCONST
| LPAREN expression RPAREN'''
pass
# argument-expression-list:
def p_argument_expression_list(t):
'''argument_expression_list : assignment_expression
| argument_expression_list COMMA assignment_expression'''
pass
# constant:
def p_constant(t):
'''constant : ICONST
| FCONST
| CCONST'''
pass
def p_empty(t):
'empty : '
pass
def p_error(t):
print("Whoa. We're hosed")
import profile
# Build the grammar
yacc.yacc(method='LALR')
#profile.run("yacc.yacc(method='LALR')")

107
ply/example/calc/calc.py Normal file
View File

@ -0,0 +1,107 @@
# -----------------------------------------------------------------------------
# calc.py
#
# A simple calculator with variables. This is from O'Reilly's
# "Lex and Yacc", p. 63.
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0,"../..")
if sys.version_info[0] >= 3:
raw_input = input
tokens = (
'NAME','NUMBER',
)
literals = ['=','+','-','*','/', '(',')']
# Tokens
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
t.value = int(t.value)
return t
t_ignore = " \t"
def t_newline(t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
import ply.lex as lex
lex.lex()
# Parsing rules
precedence = (
('left','+','-'),
('left','*','/'),
('right','UMINUS'),
)
# dictionary of names
names = { }
def p_statement_assign(p):
'statement : NAME "=" expression'
names[p[1]] = p[3]
def p_statement_expr(p):
'statement : expression'
print(p[1])
def p_expression_binop(p):
'''expression : expression '+' expression
| expression '-' expression
| expression '*' expression
| expression '/' expression'''
if p[2] == '+' : p[0] = p[1] + p[3]
elif p[2] == '-': p[0] = p[1] - p[3]
elif p[2] == '*': p[0] = p[1] * p[3]
elif p[2] == '/': p[0] = p[1] / p[3]
def p_expression_uminus(p):
"expression : '-' expression %prec UMINUS"
p[0] = -p[2]
def p_expression_group(p):
"expression : '(' expression ')'"
p[0] = p[2]
def p_expression_number(p):
"expression : NUMBER"
p[0] = p[1]
def p_expression_name(p):
"expression : NAME"
try:
p[0] = names[p[1]]
except LookupError:
print("Undefined name '%s'" % p[1])
p[0] = 0
def p_error(p):
if p:
print("Syntax error at '%s'" % p.value)
else:
print("Syntax error at EOF")
import ply.yacc as yacc
yacc.yacc()
while 1:
try:
s = raw_input('calc > ')
except EOFError:
break
if not s: continue
yacc.parse(s)

View File

@ -0,0 +1,113 @@
# -----------------------------------------------------------------------------
# calc.py
#
# This example shows how to run the parser in a debugging mode
# with output routed to a logging object.
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0,"../..")
if sys.version_info[0] >= 3:
raw_input = input
tokens = (
'NAME','NUMBER',
)
literals = ['=','+','-','*','/', '(',')']
# Tokens
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
t.value = int(t.value)
return t
t_ignore = " \t"
def t_newline(t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
import ply.lex as lex
lex.lex()
# Parsing rules
precedence = (
('left','+','-'),
('left','*','/'),
('right','UMINUS'),
)
# dictionary of names
names = { }
def p_statement_assign(p):
'statement : NAME "=" expression'
names[p[1]] = p[3]
def p_statement_expr(p):
'statement : expression'
print(p[1])
def p_expression_binop(p):
'''expression : expression '+' expression
| expression '-' expression
| expression '*' expression
| expression '/' expression'''
if p[2] == '+' : p[0] = p[1] + p[3]
elif p[2] == '-': p[0] = p[1] - p[3]
elif p[2] == '*': p[0] = p[1] * p[3]
elif p[2] == '/': p[0] = p[1] / p[3]
def p_expression_uminus(p):
"expression : '-' expression %prec UMINUS"
p[0] = -p[2]
def p_expression_group(p):
"expression : '(' expression ')'"
p[0] = p[2]
def p_expression_number(p):
"expression : NUMBER"
p[0] = p[1]
def p_expression_name(p):
"expression : NAME"
try:
p[0] = names[p[1]]
except LookupError:
print("Undefined name '%s'" % p[1])
p[0] = 0
def p_error(p):
if p:
print("Syntax error at '%s'" % p.value)
else:
print("Syntax error at EOF")
import ply.yacc as yacc
yacc.yacc()
import logging
logging.basicConfig(
level=logging.INFO,
filename="parselog.txt"
)
while 1:
try:
s = raw_input('calc > ')
except EOFError:
break
if not s: continue
yacc.parse(s,debug=logging.getLogger())

157
ply/example/classcalc/calc.py Executable file
View File

@ -0,0 +1,157 @@
#!/usr/bin/env python
# -----------------------------------------------------------------------------
# calc.py
#
# A simple calculator with variables. This is from O'Reilly's
# "Lex and Yacc", p. 63.
#
# Class-based example contributed to PLY by David McNab
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0,"../..")
if sys.version_info[0] >= 3:
raw_input = input
import ply.lex as lex
import ply.yacc as yacc
import os
class Parser:
"""
Base class for a lexer/parser that has the rules defined as methods
"""
tokens = ()
precedence = ()
def __init__(self, **kw):
self.debug = kw.get('debug', 0)
self.names = { }
try:
modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__
except:
modname = "parser"+"_"+self.__class__.__name__
self.debugfile = modname + ".dbg"
self.tabmodule = modname + "_" + "parsetab"
#print self.debugfile, self.tabmodule
# Build the lexer and parser
lex.lex(module=self, debug=self.debug)
yacc.yacc(module=self,
debug=self.debug,
debugfile=self.debugfile,
tabmodule=self.tabmodule)
def run(self):
while 1:
try:
s = raw_input('calc > ')
except EOFError:
break
if not s: continue
yacc.parse(s)
class Calc(Parser):
tokens = (
'NAME','NUMBER',
'PLUS','MINUS','EXP', 'TIMES','DIVIDE','EQUALS',
'LPAREN','RPAREN',
)
# Tokens
t_PLUS = r'\+'
t_MINUS = r'-'
t_EXP = r'\*\*'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_EQUALS = r'='
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(self, t):
r'\d+'
try:
t.value = int(t.value)
except ValueError:
print("Integer value too large %s" % t.value)
t.value = 0
#print "parsed number %s" % repr(t.value)
return t
t_ignore = " \t"
def t_newline(self, t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(self, t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Parsing rules
precedence = (
('left','PLUS','MINUS'),
('left','TIMES','DIVIDE'),
('left', 'EXP'),
('right','UMINUS'),
)
def p_statement_assign(self, p):
'statement : NAME EQUALS expression'
self.names[p[1]] = p[3]
def p_statement_expr(self, p):
'statement : expression'
print(p[1])
def p_expression_binop(self, p):
"""
expression : expression PLUS expression
| expression MINUS expression
| expression TIMES expression
| expression DIVIDE expression
| expression EXP expression
"""
#print [repr(p[i]) for i in range(0,4)]
if p[2] == '+' : p[0] = p[1] + p[3]
elif p[2] == '-': p[0] = p[1] - p[3]
elif p[2] == '*': p[0] = p[1] * p[3]
elif p[2] == '/': p[0] = p[1] / p[3]
elif p[2] == '**': p[0] = p[1] ** p[3]
def p_expression_uminus(self, p):
'expression : MINUS expression %prec UMINUS'
p[0] = -p[2]
def p_expression_group(self, p):
'expression : LPAREN expression RPAREN'
p[0] = p[2]
def p_expression_number(self, p):
'expression : NUMBER'
p[0] = p[1]
def p_expression_name(self, p):
'expression : NAME'
try:
p[0] = self.names[p[1]]
except LookupError:
print("Undefined name '%s'" % p[1])
p[0] = 0
def p_error(self, p):
if p:
print("Syntax error at '%s'" % p.value)
else:
print("Syntax error at EOF")
if __name__ == '__main__':
calc = Calc()
calc.run()

View File

@ -0,0 +1,40 @@
# calc_Calc_parsetab.py
# This file is automatically generated. Do not edit.
_tabversion = '3.2'
_lr_method = 'LALR'
_lr_signature = '|\x0f"\xe2\x0e\xf7\x0fT\x15K\x1c\xc0\x1e\xa3c\x10'
_lr_action_items = {'$end':([1,2,3,5,9,15,16,17,18,19,20,21,22,],[-11,-10,0,-2,-11,-8,-1,-9,-6,-5,-3,-7,-4,]),'RPAREN':([2,8,9,15,17,18,19,20,21,22,],[-10,17,-11,-8,-9,-6,-5,-3,-7,-4,]),'DIVIDE':([1,2,5,8,9,15,16,17,18,19,20,21,22,],[-11,-10,10,10,-11,-8,10,-9,-6,-5,10,-7,10,]),'EQUALS':([1,],[7,]),'NUMBER':([0,4,6,7,10,11,12,13,14,],[2,2,2,2,2,2,2,2,2,]),'PLUS':([1,2,5,8,9,15,16,17,18,19,20,21,22,],[-11,-10,12,12,-11,-8,12,-9,-6,-5,-3,-7,-4,]),'LPAREN':([0,4,6,7,10,11,12,13,14,],[4,4,4,4,4,4,4,4,4,]),'EXP':([1,2,5,8,9,15,16,17,18,19,20,21,22,],[-11,-10,13,13,-11,-8,13,-9,13,13,13,-7,13,]),'TIMES':([1,2,5,8,9,15,16,17,18,19,20,21,22,],[-11,-10,11,11,-11,-8,11,-9,-6,-5,11,-7,11,]),'MINUS':([0,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,],[6,-11,-10,6,14,6,6,14,-11,6,6,6,6,6,-8,14,-9,-6,-5,-3,-7,-4,]),'NAME':([0,4,6,7,10,11,12,13,14,],[1,9,9,9,9,9,9,9,9,]),}
_lr_action = { }
for _k, _v in _lr_action_items.items():
for _x,_y in zip(_v[0],_v[1]):
if not _x in _lr_action: _lr_action[_x] = { }
_lr_action[_x][_k] = _y
del _lr_action_items
_lr_goto_items = {'expression':([0,4,6,7,10,11,12,13,14,],[5,8,15,16,18,19,20,21,22,]),'statement':([0,],[3,]),}
_lr_goto = { }
for _k, _v in _lr_goto_items.items():
for _x,_y in zip(_v[0],_v[1]):
if not _x in _lr_goto: _lr_goto[_x] = { }
_lr_goto[_x][_k] = _y
del _lr_goto_items
_lr_productions = [
("S' -> statement","S'",1,None,None,None),
('statement -> NAME EQUALS expression','statement',3,'p_statement_assign','./calc.py',107),
('statement -> expression','statement',1,'p_statement_expr','./calc.py',111),
('expression -> expression PLUS expression','expression',3,'p_expression_binop','./calc.py',116),
('expression -> expression MINUS expression','expression',3,'p_expression_binop','./calc.py',117),
('expression -> expression TIMES expression','expression',3,'p_expression_binop','./calc.py',118),
('expression -> expression DIVIDE expression','expression',3,'p_expression_binop','./calc.py',119),
('expression -> expression EXP expression','expression',3,'p_expression_binop','./calc.py',120),
('expression -> MINUS expression','expression',2,'p_expression_uminus','./calc.py',130),
('expression -> LPAREN expression RPAREN','expression',3,'p_expression_group','./calc.py',134),
('expression -> NUMBER','expression',1,'p_expression_number','./calc.py',138),
('expression -> NAME','expression',1,'p_expression_name','./calc.py',142),
]

2
ply/example/cleanup.sh Executable file
View File

@ -0,0 +1,2 @@
#!/bin/sh
rm -f */*.pyc */parsetab.py */parser.out */*~ */*.class

View File

@ -0,0 +1,130 @@
# -----------------------------------------------------------------------------
# calc.py
#
# A calculator parser that makes use of closures. The function make_calculator()
# returns a function that accepts an input string and returns a result. All
# lexing rules, parsing rules, and internal state are held inside the function.
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0,"../..")
if sys.version_info[0] >= 3:
raw_input = input
# Make a calculator function
def make_calculator():
import ply.lex as lex
import ply.yacc as yacc
# ------- Internal calculator state
variables = { } # Dictionary of stored variables
# ------- Calculator tokenizing rules
tokens = (
'NAME','NUMBER',
)
literals = ['=','+','-','*','/', '(',')']
t_ignore = " \t"
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
t.value = int(t.value)
return t
def t_newline(t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
lexer = lex.lex()
# ------- Calculator parsing rules
precedence = (
('left','+','-'),
('left','*','/'),
('right','UMINUS'),
)
def p_statement_assign(p):
'statement : NAME "=" expression'
variables[p[1]] = p[3]
p[0] = None
def p_statement_expr(p):
'statement : expression'
p[0] = p[1]
def p_expression_binop(p):
'''expression : expression '+' expression
| expression '-' expression
| expression '*' expression
| expression '/' expression'''
if p[2] == '+' : p[0] = p[1] + p[3]
elif p[2] == '-': p[0] = p[1] - p[3]
elif p[2] == '*': p[0] = p[1] * p[3]
elif p[2] == '/': p[0] = p[1] / p[3]
def p_expression_uminus(p):
"expression : '-' expression %prec UMINUS"
p[0] = -p[2]
def p_expression_group(p):
"expression : '(' expression ')'"
p[0] = p[2]
def p_expression_number(p):
"expression : NUMBER"
p[0] = p[1]
def p_expression_name(p):
"expression : NAME"
try:
p[0] = variables[p[1]]
except LookupError:
print("Undefined name '%s'" % p[1])
p[0] = 0
def p_error(p):
if p:
print("Syntax error at '%s'" % p.value)
else:
print("Syntax error at EOF")
# Build the parser
parser = yacc.yacc()
# ------- Input function
def input(text):
result = parser.parse(text,lexer=lexer)
return result
return input
# Make a calculator object and use it
calc = make_calculator()
while True:
try:
s = raw_input("calc > ")
except EOFError:
break
r = calc(s)
if r:
print(r)

View File

@ -0,0 +1,48 @@
# -----------------------------------------------------------------------------
# hedit.py
#
# Paring of Fortran H Edit descriptions (Contributed by Pearu Peterson)
#
# These tokens can't be easily tokenized because they are of the following
# form:
#
# nHc1...cn
#
# where n is a positive integer and c1 ... cn are characters.
#
# This example shows how to modify the state of the lexer to parse
# such tokens
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0,"../..")
tokens = (
'H_EDIT_DESCRIPTOR',
)
# Tokens
t_ignore = " \t\n"
def t_H_EDIT_DESCRIPTOR(t):
r"\d+H.*" # This grabs all of the remaining text
i = t.value.index('H')
n = eval(t.value[:i])
# Adjust the tokenizing position
t.lexer.lexpos -= len(t.value) - (i+1+n)
t.value = t.value[i+1:i+1+n]
return t
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
import ply.lex as lex
lex.lex()
lex.runmain()

160
ply/example/newclasscalc/calc.py Executable file
View File

@ -0,0 +1,160 @@
#!/usr/bin/env python
# -----------------------------------------------------------------------------
# calc.py
#
# A simple calculator with variables. This is from O'Reilly's
# "Lex and Yacc", p. 63.
#
# Class-based example contributed to PLY by David McNab.
#
# Modified to use new-style classes. Test case.
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0,"../..")
if sys.version_info[0] >= 3:
raw_input = input
import ply.lex as lex
import ply.yacc as yacc
import os
class Parser(object):
"""
Base class for a lexer/parser that has the rules defined as methods
"""
tokens = ()
precedence = ()
def __init__(self, **kw):
self.debug = kw.get('debug', 0)
self.names = { }
try:
modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__
except:
modname = "parser"+"_"+self.__class__.__name__
self.debugfile = modname + ".dbg"
self.tabmodule = modname + "_" + "parsetab"
#print self.debugfile, self.tabmodule
# Build the lexer and parser
lex.lex(module=self, debug=self.debug)
yacc.yacc(module=self,
debug=self.debug,
debugfile=self.debugfile,
tabmodule=self.tabmodule)
def run(self):
while 1:
try:
s = raw_input('calc > ')
except EOFError:
break
if not s: continue
yacc.parse(s)
class Calc(Parser):
tokens = (
'NAME','NUMBER',
'PLUS','MINUS','EXP', 'TIMES','DIVIDE','EQUALS',
'LPAREN','RPAREN',
)
# Tokens
t_PLUS = r'\+'
t_MINUS = r'-'
t_EXP = r'\*\*'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_EQUALS = r'='
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(self, t):
r'\d+'
try:
t.value = int(t.value)
except ValueError:
print("Integer value too large %s" % t.value)
t.value = 0
#print "parsed number %s" % repr(t.value)
return t
t_ignore = " \t"
def t_newline(self, t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(self, t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Parsing rules
precedence = (
('left','PLUS','MINUS'),
('left','TIMES','DIVIDE'),
('left', 'EXP'),
('right','UMINUS'),
)
def p_statement_assign(self, p):
'statement : NAME EQUALS expression'
self.names[p[1]] = p[3]
def p_statement_expr(self, p):
'statement : expression'
print(p[1])
def p_expression_binop(self, p):
"""
expression : expression PLUS expression
| expression MINUS expression
| expression TIMES expression
| expression DIVIDE expression
| expression EXP expression
"""
#print [repr(p[i]) for i in range(0,4)]
if p[2] == '+' : p[0] = p[1] + p[3]
elif p[2] == '-': p[0] = p[1] - p[3]
elif p[2] == '*': p[0] = p[1] * p[3]
elif p[2] == '/': p[0] = p[1] / p[3]
elif p[2] == '**': p[0] = p[1] ** p[3]
def p_expression_uminus(self, p):
'expression : MINUS expression %prec UMINUS'
p[0] = -p[2]
def p_expression_group(self, p):
'expression : LPAREN expression RPAREN'
p[0] = p[2]
def p_expression_number(self, p):
'expression : NUMBER'
p[0] = p[1]
def p_expression_name(self, p):
'expression : NAME'
try:
p[0] = self.names[p[1]]
except LookupError:
print("Undefined name '%s'" % p[1])
p[0] = 0
def p_error(self, p):
if p:
print("Syntax error at '%s'" % p.value)
else:
print("Syntax error at EOF")
if __name__ == '__main__':
calc = Calc()
calc.run()

View File

@ -0,0 +1,9 @@
An example showing how to use Python optimized mode.
To run:
- First run 'python calc.py'
- Then run 'python -OO calc.py'
If working correctly, the second version should run the
same way.

119
ply/example/optcalc/calc.py Normal file
View File

@ -0,0 +1,119 @@
# -----------------------------------------------------------------------------
# calc.py
#
# A simple calculator with variables. This is from O'Reilly's
# "Lex and Yacc", p. 63.
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0,"../..")
if sys.version_info[0] >= 3:
raw_input = input
tokens = (
'NAME','NUMBER',
'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
'LPAREN','RPAREN',
)
# Tokens
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_EQUALS = r'='
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
try:
t.value = int(t.value)
except ValueError:
print("Integer value too large %s" % t.value)
t.value = 0
return t
t_ignore = " \t"
def t_newline(t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
import ply.lex as lex
lex.lex(optimize=1)
# Parsing rules
precedence = (
('left','PLUS','MINUS'),
('left','TIMES','DIVIDE'),
('right','UMINUS'),
)
# dictionary of names
names = { }
def p_statement_assign(t):
'statement : NAME EQUALS expression'
names[t[1]] = t[3]
def p_statement_expr(t):
'statement : expression'
print(t[1])
def p_expression_binop(t):
'''expression : expression PLUS expression
| expression MINUS expression
| expression TIMES expression
| expression DIVIDE expression'''
if t[2] == '+' : t[0] = t[1] + t[3]
elif t[2] == '-': t[0] = t[1] - t[3]
elif t[2] == '*': t[0] = t[1] * t[3]
elif t[2] == '/': t[0] = t[1] / t[3]
elif t[2] == '<': t[0] = t[1] < t[3]
def p_expression_uminus(t):
'expression : MINUS expression %prec UMINUS'
t[0] = -t[2]
def p_expression_group(t):
'expression : LPAREN expression RPAREN'
t[0] = t[2]
def p_expression_number(t):
'expression : NUMBER'
t[0] = t[1]
def p_expression_name(t):
'expression : NAME'
try:
t[0] = names[t[1]]
except LookupError:
print("Undefined name '%s'" % t[1])
t[0] = 0
def p_error(t):
if t:
print("Syntax error at '%s'" % t.value)
else:
print("Syntax error at EOF")
import ply.yacc as yacc
yacc.yacc(optimize=1)
while 1:
try:
s = raw_input('calc > ')
except EOFError:
break
yacc.parse(s)

117
ply/example/unicalc/calc.py Normal file
View File

@ -0,0 +1,117 @@
# -----------------------------------------------------------------------------
# calc.py
#
# A simple calculator with variables. This is from O'Reilly's
# "Lex and Yacc", p. 63.
#
# This example uses unicode strings for tokens, docstrings, and input.
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0,"../..")
tokens = (
'NAME','NUMBER',
'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
'LPAREN','RPAREN',
)
# Tokens
t_PLUS = ur'\+'
t_MINUS = ur'-'
t_TIMES = ur'\*'
t_DIVIDE = ur'/'
t_EQUALS = ur'='
t_LPAREN = ur'\('
t_RPAREN = ur'\)'
t_NAME = ur'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
ur'\d+'
try:
t.value = int(t.value)
except ValueError:
print "Integer value too large", t.value
t.value = 0
return t
t_ignore = u" \t"
def t_newline(t):
ur'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(t):
print "Illegal character '%s'" % t.value[0]
t.lexer.skip(1)
# Build the lexer
import ply.lex as lex
lex.lex()
# Parsing rules
precedence = (
('left','PLUS','MINUS'),
('left','TIMES','DIVIDE'),
('right','UMINUS'),
)
# dictionary of names
names = { }
def p_statement_assign(p):
'statement : NAME EQUALS expression'
names[p[1]] = p[3]
def p_statement_expr(p):
'statement : expression'
print p[1]
def p_expression_binop(p):
'''expression : expression PLUS expression
| expression MINUS expression
| expression TIMES expression
| expression DIVIDE expression'''
if p[2] == u'+' : p[0] = p[1] + p[3]
elif p[2] == u'-': p[0] = p[1] - p[3]
elif p[2] == u'*': p[0] = p[1] * p[3]
elif p[2] == u'/': p[0] = p[1] / p[3]
def p_expression_uminus(p):
'expression : MINUS expression %prec UMINUS'
p[0] = -p[2]
def p_expression_group(p):
'expression : LPAREN expression RPAREN'
p[0] = p[2]
def p_expression_number(p):
'expression : NUMBER'
p[0] = p[1]
def p_expression_name(p):
'expression : NAME'
try:
p[0] = names[p[1]]
except LookupError:
print "Undefined name '%s'" % p[1]
p[0] = 0
def p_error(p):
if p:
print "Syntax error at '%s'" % p.value
else:
print "Syntax error at EOF"
import ply.yacc as yacc
yacc.yacc()
while 1:
try:
s = raw_input('calc > ')
except EOFError:
break
if not s: continue
yacc.parse(unicode(s))

41
ply/example/yply/README Normal file
View File

@ -0,0 +1,41 @@
yply.py
This example implements a program yply.py that converts a UNIX-yacc
specification file into a PLY-compatible program. To use, simply
run it like this:
% python yply.py [-nocode] inputfile.y >myparser.py
The output of this program is Python code. In the output,
any C code in the original file is included, but is commented out.
If you use the -nocode option, then all of the C code in the
original file is just discarded.
To use the resulting grammer with PLY, you'll need to edit the
myparser.py file. Within this file, some stub code is included that
can be used to test the construction of the parsing tables. However,
you'll need to do more editing to make a workable parser.
Disclaimer: This just an example I threw together in an afternoon.
It might have some bugs. However, it worked when I tried it on
a yacc-specified C++ parser containing 442 rules and 855 parsing
states.
Comments:
1. This example does not parse specification files meant for lex/flex.
You'll need to specify the tokenizer on your own.
2. This example shows a number of interesting PLY features including
- Parsing of literal text delimited by nested parentheses
- Some interaction between the parser and the lexer.
- Use of literals in the grammar specification
- One pass compilation. The program just emits the result,
there is no intermediate parse tree.
3. This program could probably be cleaned up and enhanced a lot.
It would be great if someone wanted to work on this (hint).
-Dave

112
ply/example/yply/ylex.py Normal file
View File

@ -0,0 +1,112 @@
# lexer for yacc-grammars
#
# Author: David Beazley (dave@dabeaz.com)
# Date : October 2, 2006
import sys
sys.path.append("../..")
from ply import *
tokens = (
'LITERAL','SECTION','TOKEN','LEFT','RIGHT','PREC','START','TYPE','NONASSOC','UNION','CODE',
'ID','QLITERAL','NUMBER',
)
states = (('code','exclusive'),)
literals = [ ';', ',', '<', '>', '|',':' ]
t_ignore = ' \t'
t_TOKEN = r'%token'
t_LEFT = r'%left'
t_RIGHT = r'%right'
t_NONASSOC = r'%nonassoc'
t_PREC = r'%prec'
t_START = r'%start'
t_TYPE = r'%type'
t_UNION = r'%union'
t_ID = r'[a-zA-Z_][a-zA-Z_0-9]*'
t_QLITERAL = r'''(?P<quote>['"]).*?(?P=quote)'''
t_NUMBER = r'\d+'
def t_SECTION(t):
r'%%'
if getattr(t.lexer,"lastsection",0):
t.value = t.lexer.lexdata[t.lexpos+2:]
t.lexer.lexpos = len(t.lexer.lexdata)
else:
t.lexer.lastsection = 0
return t
# Comments
def t_ccomment(t):
r'/\*(.|\n)*?\*/'
t.lexer.lineno += t.value.count('\n')
t_ignore_cppcomment = r'//.*'
def t_LITERAL(t):
r'%\{(.|\n)*?%\}'
t.lexer.lineno += t.value.count("\n")
return t
def t_NEWLINE(t):
r'\n'
t.lexer.lineno += 1
def t_code(t):
r'\{'
t.lexer.codestart = t.lexpos
t.lexer.level = 1
t.lexer.begin('code')
def t_code_ignore_string(t):
r'\"([^\\\n]|(\\.))*?\"'
def t_code_ignore_char(t):
r'\'([^\\\n]|(\\.))*?\''
def t_code_ignore_comment(t):
r'/\*(.|\n)*?\*/'
def t_code_ignore_cppcom(t):
r'//.*'
def t_code_lbrace(t):
r'\{'
t.lexer.level += 1
def t_code_rbrace(t):
r'\}'
t.lexer.level -= 1
if t.lexer.level == 0:
t.type = 'CODE'
t.value = t.lexer.lexdata[t.lexer.codestart:t.lexpos+1]
t.lexer.begin('INITIAL')
t.lexer.lineno += t.value.count('\n')
return t
t_code_ignore_nonspace = r'[^\s\}\'\"\{]+'
t_code_ignore_whitespace = r'\s+'
t_code_ignore = ""
def t_code_error(t):
raise RuntimeError
def t_error(t):
print "%d: Illegal character '%s'" % (t.lexer.lineno, t.value[0])
print t.value
t.lexer.skip(1)
lex.lex()
if __name__ == '__main__':
lex.runmain()

217
ply/example/yply/yparse.py Normal file
View File

@ -0,0 +1,217 @@
# parser for Unix yacc-based grammars
#
# Author: David Beazley (dave@dabeaz.com)
# Date : October 2, 2006
import ylex
tokens = ylex.tokens
from ply import *
tokenlist = []
preclist = []
emit_code = 1
def p_yacc(p):
'''yacc : defsection rulesection'''
def p_defsection(p):
'''defsection : definitions SECTION
| SECTION'''
p.lexer.lastsection = 1
print "tokens = ", repr(tokenlist)
print
print "precedence = ", repr(preclist)
print
print "# -------------- RULES ----------------"
print
def p_rulesection(p):
'''rulesection : rules SECTION'''
print "# -------------- RULES END ----------------"
print_code(p[2],0)
def p_definitions(p):
'''definitions : definitions definition
| definition'''
def p_definition_literal(p):
'''definition : LITERAL'''
print_code(p[1],0)
def p_definition_start(p):
'''definition : START ID'''
print "start = '%s'" % p[2]
def p_definition_token(p):
'''definition : toktype opttype idlist optsemi '''
for i in p[3]:
if i[0] not in "'\"":
tokenlist.append(i)
if p[1] == '%left':
preclist.append(('left',) + tuple(p[3]))
elif p[1] == '%right':
preclist.append(('right',) + tuple(p[3]))
elif p[1] == '%nonassoc':
preclist.append(('nonassoc',)+ tuple(p[3]))
def p_toktype(p):
'''toktype : TOKEN
| LEFT
| RIGHT
| NONASSOC'''
p[0] = p[1]
def p_opttype(p):
'''opttype : '<' ID '>'
| empty'''
def p_idlist(p):
'''idlist : idlist optcomma tokenid
| tokenid'''
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = p[1]
p[1].append(p[3])
def p_tokenid(p):
'''tokenid : ID
| ID NUMBER
| QLITERAL
| QLITERAL NUMBER'''
p[0] = p[1]
def p_optsemi(p):
'''optsemi : ';'
| empty'''
def p_optcomma(p):
'''optcomma : ','
| empty'''
def p_definition_type(p):
'''definition : TYPE '<' ID '>' namelist optsemi'''
# type declarations are ignored
def p_namelist(p):
'''namelist : namelist optcomma ID
| ID'''
def p_definition_union(p):
'''definition : UNION CODE optsemi'''
# Union declarations are ignored
def p_rules(p):
'''rules : rules rule
| rule'''
if len(p) == 2:
rule = p[1]
else:
rule = p[2]
# Print out a Python equivalent of this rule
embedded = [ ] # Embedded actions (a mess)
embed_count = 0
rulename = rule[0]
rulecount = 1
for r in rule[1]:
# r contains one of the rule possibilities
print "def p_%s_%d(p):" % (rulename,rulecount)
prod = []
prodcode = ""
for i in range(len(r)):
item = r[i]
if item[0] == '{': # A code block
if i == len(r) - 1:
prodcode = item
break
else:
# an embedded action
embed_name = "_embed%d_%s" % (embed_count,rulename)
prod.append(embed_name)
embedded.append((embed_name,item))
embed_count += 1
else:
prod.append(item)
print " '''%s : %s'''" % (rulename, " ".join(prod))
# Emit code
print_code(prodcode,4)
print
rulecount += 1
for e,code in embedded:
print "def p_%s(p):" % e
print " '''%s : '''" % e
print_code(code,4)
print
def p_rule(p):
'''rule : ID ':' rulelist ';' '''
p[0] = (p[1],[p[3]])
def p_rule2(p):
'''rule : ID ':' rulelist morerules ';' '''
p[4].insert(0,p[3])
p[0] = (p[1],p[4])
def p_rule_empty(p):
'''rule : ID ':' ';' '''
p[0] = (p[1],[[]])
def p_rule_empty2(p):
'''rule : ID ':' morerules ';' '''
p[3].insert(0,[])
p[0] = (p[1],p[3])
def p_morerules(p):
'''morerules : morerules '|' rulelist
| '|' rulelist
| '|' '''
if len(p) == 2:
p[0] = [[]]
elif len(p) == 3:
p[0] = [p[2]]
else:
p[0] = p[1]
p[0].append(p[3])
# print "morerules", len(p), p[0]
def p_rulelist(p):
'''rulelist : rulelist ruleitem
| ruleitem'''
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = p[1]
p[1].append(p[2])
def p_ruleitem(p):
'''ruleitem : ID
| QLITERAL
| CODE
| PREC'''
p[0] = p[1]
def p_empty(p):
'''empty : '''
def p_error(p):
pass
yacc.yacc(debug=0)
def print_code(code,indent):
if not emit_code: return
codelines = code.splitlines()
for c in codelines:
print "%s# %s" % (" "*indent,c)

53
ply/example/yply/yply.py Executable file
View File

@ -0,0 +1,53 @@
#!/usr/local/bin/python
# yply.py
#
# Author: David Beazley (dave@dabeaz.com)
# Date : October 2, 2006
#
# Converts a UNIX-yacc specification file into a PLY-compatible
# specification. To use, simply do this:
#
# % python yply.py [-nocode] inputfile.y >myparser.py
#
# The output of this program is Python code. In the output,
# any C code in the original file is included, but is commented.
# If you use the -nocode option, then all of the C code in the
# original file is discarded.
#
# Disclaimer: This just an example I threw together in an afternoon.
# It might have some bugs. However, it worked when I tried it on
# a yacc-specified C++ parser containing 442 rules and 855 parsing
# states.
#
import sys
sys.path.insert(0,"../..")
import ylex
import yparse
from ply import *
if len(sys.argv) == 1:
print "usage : yply.py [-nocode] inputfile"
raise SystemExit
if len(sys.argv) == 3:
if sys.argv[1] == '-nocode':
yparse.emit_code = 0
else:
print "Unknown option '%s'" % sys.argv[1]
raise SystemExit
filename = sys.argv[2]
else:
filename = sys.argv[1]
yacc.parse(open(filename).read())
print """
if __name__ == '__main__':
from ply import *
yacc.yacc()
"""

4
ply/ply/__init__.py Normal file
View File

@ -0,0 +1,4 @@
# PLY package
# Author: David Beazley (dave@dabeaz.com)
__all__ = ['lex','yacc']

898
ply/ply/cpp.py Normal file
View File

@ -0,0 +1,898 @@
# -----------------------------------------------------------------------------
# cpp.py
#
# Author: David Beazley (http://www.dabeaz.com)
# Copyright (C) 2007
# All rights reserved
#
# This module implements an ANSI-C style lexical preprocessor for PLY.
# -----------------------------------------------------------------------------
from __future__ import generators
# -----------------------------------------------------------------------------
# Default preprocessor lexer definitions. These tokens are enough to get
# a basic preprocessor working. Other modules may import these if they want
# -----------------------------------------------------------------------------
tokens = (
'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT', 'CPP_POUND','CPP_DPOUND'
)
literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\""
# Whitespace
def t_CPP_WS(t):
r'\s+'
t.lexer.lineno += t.value.count("\n")
return t
t_CPP_POUND = r'\#'
t_CPP_DPOUND = r'\#\#'
# Identifier
t_CPP_ID = r'[A-Za-z_][\w_]*'
# Integer literal
def CPP_INTEGER(t):
r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU]|[lL]|[uU][lL]|[lL][uU])?)'
return t
t_CPP_INTEGER = CPP_INTEGER
# Floating literal
t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
# String literal
def t_CPP_STRING(t):
r'\"([^\\\n]|(\\(.|\n)))*?\"'
t.lexer.lineno += t.value.count("\n")
return t
# Character constant 'c' or L'c'
def t_CPP_CHAR(t):
r'(L)?\'([^\\\n]|(\\(.|\n)))*?\''
t.lexer.lineno += t.value.count("\n")
return t
# Comment
def t_CPP_COMMENT(t):
r'(/\*(.|\n)*?\*/)|(//.*?\n)'
t.lexer.lineno += t.value.count("\n")
return t
def t_error(t):
t.type = t.value[0]
t.value = t.value[0]
t.lexer.skip(1)
return t
import re
import copy
import time
import os.path
# -----------------------------------------------------------------------------
# trigraph()
#
# Given an input string, this function replaces all trigraph sequences.
# The following mapping is used:
#
# ??= #
# ??/ \
# ??' ^
# ??( [
# ??) ]
# ??! |
# ??< {
# ??> }
# ??- ~
# -----------------------------------------------------------------------------
_trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''')
_trigraph_rep = {
'=':'#',
'/':'\\',
"'":'^',
'(':'[',
')':']',
'!':'|',
'<':'{',
'>':'}',
'-':'~'
}
def trigraph(input):
return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input)
# ------------------------------------------------------------------
# Macro object
#
# This object holds information about preprocessor macros
#
# .name - Macro name (string)
# .value - Macro value (a list of tokens)
# .arglist - List of argument names
# .variadic - Boolean indicating whether or not variadic macro
# .vararg - Name of the variadic parameter
#
# When a macro is created, the macro replacement token sequence is
# pre-scanned and used to create patch lists that are later used
# during macro expansion
# ------------------------------------------------------------------
class Macro(object):
def __init__(self,name,value,arglist=None,variadic=False):
self.name = name
self.value = value
self.arglist = arglist
self.variadic = variadic
if variadic:
self.vararg = arglist[-1]
self.source = None
# ------------------------------------------------------------------
# Preprocessor object
#
# Object representing a preprocessor. Contains macro definitions,
# include directories, and other information
# ------------------------------------------------------------------
class Preprocessor(object):
def __init__(self,lexer=None):
if lexer is None:
lexer = lex.lexer
self.lexer = lexer
self.macros = { }
self.path = []
self.temp_path = []
# Probe the lexer for selected tokens
self.lexprobe()
tm = time.localtime()
self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm))
self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm))
self.parser = None
# -----------------------------------------------------------------------------
# tokenize()
#
# Utility function. Given a string of text, tokenize into a list of tokens
# -----------------------------------------------------------------------------
def tokenize(self,text):
tokens = []
self.lexer.input(text)
while True:
tok = self.lexer.token()
if not tok: break
tokens.append(tok)
return tokens
# ---------------------------------------------------------------------
# error()
#
# Report a preprocessor error/warning of some kind
# ----------------------------------------------------------------------
def error(self,file,line,msg):
print("%s:%d %s" % (file,line,msg))
# ----------------------------------------------------------------------
# lexprobe()
#
# This method probes the preprocessor lexer object to discover
# the token types of symbols that are important to the preprocessor.
# If this works right, the preprocessor will simply "work"
# with any suitable lexer regardless of how tokens have been named.
# ----------------------------------------------------------------------
def lexprobe(self):
# Determine the token type for identifiers
self.lexer.input("identifier")
tok = self.lexer.token()
if not tok or tok.value != "identifier":
print("Couldn't determine identifier type")
else:
self.t_ID = tok.type
# Determine the token type for integers
self.lexer.input("12345")
tok = self.lexer.token()
if not tok or int(tok.value) != 12345:
print("Couldn't determine integer type")
else:
self.t_INTEGER = tok.type
self.t_INTEGER_TYPE = type(tok.value)
# Determine the token type for strings enclosed in double quotes
self.lexer.input("\"filename\"")
tok = self.lexer.token()
if not tok or tok.value != "\"filename\"":
print("Couldn't determine string type")
else:
self.t_STRING = tok.type
# Determine the token type for whitespace--if any
self.lexer.input(" ")
tok = self.lexer.token()
if not tok or tok.value != " ":
self.t_SPACE = None
else:
self.t_SPACE = tok.type
# Determine the token type for newlines
self.lexer.input("\n")
tok = self.lexer.token()
if not tok or tok.value != "\n":
self.t_NEWLINE = None
print("Couldn't determine token for newlines")
else:
self.t_NEWLINE = tok.type
self.t_WS = (self.t_SPACE, self.t_NEWLINE)
# Check for other characters used by the preprocessor
chars = [ '<','>','#','##','\\','(',')',',','.']
for c in chars:
self.lexer.input(c)
tok = self.lexer.token()
if not tok or tok.value != c:
print("Unable to lex '%s' required for preprocessor" % c)
# ----------------------------------------------------------------------
# add_path()
#
# Adds a search path to the preprocessor.
# ----------------------------------------------------------------------
def add_path(self,path):
self.path.append(path)
# ----------------------------------------------------------------------
# group_lines()
#
# Given an input string, this function splits it into lines. Trailing whitespace
# is removed. Any line ending with \ is grouped with the next line. This
# function forms the lowest level of the preprocessor---grouping into text into
# a line-by-line format.
# ----------------------------------------------------------------------
def group_lines(self,input):
lex = self.lexer.clone()
lines = [x.rstrip() for x in input.splitlines()]
for i in xrange(len(lines)):
j = i+1
while lines[i].endswith('\\') and (j < len(lines)):
lines[i] = lines[i][:-1]+lines[j]
lines[j] = ""
j += 1
input = "\n".join(lines)
lex.input(input)
lex.lineno = 1
current_line = []
while True:
tok = lex.token()
if not tok:
break
current_line.append(tok)
if tok.type in self.t_WS and '\n' in tok.value:
yield current_line
current_line = []
if current_line:
yield current_line
# ----------------------------------------------------------------------
# tokenstrip()
#
# Remove leading/trailing whitespace tokens from a token list
# ----------------------------------------------------------------------
def tokenstrip(self,tokens):
i = 0
while i < len(tokens) and tokens[i].type in self.t_WS:
i += 1
del tokens[:i]
i = len(tokens)-1
while i >= 0 and tokens[i].type in self.t_WS:
i -= 1
del tokens[i+1:]
return tokens
# ----------------------------------------------------------------------
# collect_args()
#
# Collects comma separated arguments from a list of tokens. The arguments
# must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions)
# where tokencount is the number of tokens consumed, args is a list of arguments,
# and positions is a list of integers containing the starting index of each
# argument. Each argument is represented by a list of tokens.
#
# When collecting arguments, leading and trailing whitespace is removed
# from each argument.
#
# This function properly handles nested parenthesis and commas---these do not
# define new arguments.
# ----------------------------------------------------------------------
def collect_args(self,tokenlist):
args = []
positions = []
current_arg = []
nesting = 1
tokenlen = len(tokenlist)
# Search for the opening '('.
i = 0
while (i < tokenlen) and (tokenlist[i].type in self.t_WS):
i += 1
if (i < tokenlen) and (tokenlist[i].value == '('):
positions.append(i+1)
else:
self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments")
return 0, [], []
i += 1
while i < tokenlen:
t = tokenlist[i]
if t.value == '(':
current_arg.append(t)
nesting += 1
elif t.value == ')':
nesting -= 1
if nesting == 0:
if current_arg:
args.append(self.tokenstrip(current_arg))
positions.append(i)
return i+1,args,positions
current_arg.append(t)
elif t.value == ',' and nesting == 1:
args.append(self.tokenstrip(current_arg))
positions.append(i+1)
current_arg = []
else:
current_arg.append(t)
i += 1
# Missing end argument
self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments")
return 0, [],[]
# ----------------------------------------------------------------------
# macro_prescan()
#
# Examine the macro value (token sequence) and identify patch points
# This is used to speed up macro expansion later on---we'll know
# right away where to apply patches to the value to form the expansion
# ----------------------------------------------------------------------
def macro_prescan(self,macro):
macro.patch = [] # Standard macro arguments
macro.str_patch = [] # String conversion expansion
macro.var_comma_patch = [] # Variadic macro comma patch
i = 0
while i < len(macro.value):
if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist:
argnum = macro.arglist.index(macro.value[i].value)
# Conversion of argument to a string
if i > 0 and macro.value[i-1].value == '#':
macro.value[i] = copy.copy(macro.value[i])
macro.value[i].type = self.t_STRING
del macro.value[i-1]
macro.str_patch.append((argnum,i-1))
continue
# Concatenation
elif (i > 0 and macro.value[i-1].value == '##'):
macro.patch.append(('c',argnum,i-1))
del macro.value[i-1]
continue
elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'):
macro.patch.append(('c',argnum,i))
i += 1
continue
# Standard expansion
else:
macro.patch.append(('e',argnum,i))
elif macro.value[i].value == '##':
if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \
((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \
(macro.value[i+1].value == macro.vararg):
macro.var_comma_patch.append(i-1)
i += 1
macro.patch.sort(key=lambda x: x[2],reverse=True)
# ----------------------------------------------------------------------
# macro_expand_args()
#
# Given a Macro and list of arguments (each a token list), this method
# returns an expanded version of a macro. The return value is a token sequence
# representing the replacement macro tokens
# ----------------------------------------------------------------------
def macro_expand_args(self,macro,args):
# Make a copy of the macro token sequence
rep = [copy.copy(_x) for _x in macro.value]
# Make string expansion patches. These do not alter the length of the replacement sequence
str_expansion = {}
for argnum, i in macro.str_patch:
if argnum not in str_expansion:
str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\")
rep[i] = copy.copy(rep[i])
rep[i].value = str_expansion[argnum]
# Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid
comma_patch = False
if macro.variadic and not args[-1]:
for i in macro.var_comma_patch:
rep[i] = None
comma_patch = True
# Make all other patches. The order of these matters. It is assumed that the patch list
# has been sorted in reverse order of patch location since replacements will cause the
# size of the replacement sequence to expand from the patch point.
expanded = { }
for ptype, argnum, i in macro.patch:
# Concatenation. Argument is left unexpanded
if ptype == 'c':
rep[i:i+1] = args[argnum]
# Normal expansion. Argument is macro expanded first
elif ptype == 'e':
if argnum not in expanded:
expanded[argnum] = self.expand_macros(args[argnum])
rep[i:i+1] = expanded[argnum]
# Get rid of removed comma if necessary
if comma_patch:
rep = [_i for _i in rep if _i]
return rep
# ----------------------------------------------------------------------
# expand_macros()
#
# Given a list of tokens, this function performs macro expansion.
# The expanded argument is a dictionary that contains macros already
# expanded. This is used to prevent infinite recursion.
# ----------------------------------------------------------------------
def expand_macros(self,tokens,expanded=None):
if expanded is None:
expanded = {}
i = 0
while i < len(tokens):
t = tokens[i]
if t.type == self.t_ID:
if t.value in self.macros and t.value not in expanded:
# Yes, we found a macro match
expanded[t.value] = True
m = self.macros[t.value]
if not m.arglist:
# A simple macro
ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded)
for e in ex:
e.lineno = t.lineno
tokens[i:i+1] = ex
i += len(ex)
else:
# A macro with arguments
j = i + 1
while j < len(tokens) and tokens[j].type in self.t_WS:
j += 1
if tokens[j].value == '(':
tokcount,args,positions = self.collect_args(tokens[j:])
if not m.variadic and len(args) != len(m.arglist):
self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist)))
i = j + tokcount
elif m.variadic and len(args) < len(m.arglist)-1:
if len(m.arglist) > 2:
self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1))
else:
self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1))
i = j + tokcount
else:
if m.variadic:
if len(args) == len(m.arglist)-1:
args.append([])
else:
args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1]
del args[len(m.arglist):]
# Get macro replacement text
rep = self.macro_expand_args(m,args)
rep = self.expand_macros(rep,expanded)
for r in rep:
r.lineno = t.lineno
tokens[i:j+tokcount] = rep
i += len(rep)
del expanded[t.value]
continue
elif t.value == '__LINE__':
t.type = self.t_INTEGER
t.value = self.t_INTEGER_TYPE(t.lineno)
i += 1
return tokens
# ----------------------------------------------------------------------
# evalexpr()
#
# Evaluate an expression token sequence for the purposes of evaluating
# integral expressions.
# ----------------------------------------------------------------------
def evalexpr(self,tokens):
# tokens = tokenize(line)
# Search for defined macros
i = 0
while i < len(tokens):
if tokens[i].type == self.t_ID and tokens[i].value == 'defined':
j = i + 1
needparen = False
result = "0L"
while j < len(tokens):
if tokens[j].type in self.t_WS:
j += 1
continue
elif tokens[j].type == self.t_ID:
if tokens[j].value in self.macros:
result = "1L"
else:
result = "0L"
if not needparen: break
elif tokens[j].value == '(':
needparen = True
elif tokens[j].value == ')':
break
else:
self.error(self.source,tokens[i].lineno,"Malformed defined()")
j += 1
tokens[i].type = self.t_INTEGER
tokens[i].value = self.t_INTEGER_TYPE(result)
del tokens[i+1:j+1]
i += 1
tokens = self.expand_macros(tokens)
for i,t in enumerate(tokens):
if t.type == self.t_ID:
tokens[i] = copy.copy(t)
tokens[i].type = self.t_INTEGER
tokens[i].value = self.t_INTEGER_TYPE("0L")
elif t.type == self.t_INTEGER:
tokens[i] = copy.copy(t)
# Strip off any trailing suffixes
tokens[i].value = str(tokens[i].value)
while tokens[i].value[-1] not in "0123456789abcdefABCDEF":
tokens[i].value = tokens[i].value[:-1]
expr = "".join([str(x.value) for x in tokens])
expr = expr.replace("&&"," and ")
expr = expr.replace("||"," or ")
expr = expr.replace("!"," not ")
try:
result = eval(expr)
except StandardError:
self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression")
result = 0
return result
# ----------------------------------------------------------------------
# parsegen()
#
# Parse an input string/
# ----------------------------------------------------------------------
def parsegen(self,input,source=None):
# Replace trigraph sequences
t = trigraph(input)
lines = self.group_lines(t)
if not source:
source = ""
self.define("__FILE__ \"%s\"" % source)
self.source = source
chunk = []
enable = True
iftrigger = False
ifstack = []
for x in lines:
for i,tok in enumerate(x):
if tok.type not in self.t_WS: break
if tok.value == '#':
# Preprocessor directive
for tok in x:
if tok in self.t_WS and '\n' in tok.value:
chunk.append(tok)
dirtokens = self.tokenstrip(x[i+1:])
if dirtokens:
name = dirtokens[0].value
args = self.tokenstrip(dirtokens[1:])
else:
name = ""
args = []
if name == 'define':
if enable:
for tok in self.expand_macros(chunk):
yield tok
chunk = []
self.define(args)
elif name == 'include':
if enable:
for tok in self.expand_macros(chunk):
yield tok
chunk = []
oldfile = self.macros['__FILE__']
for tok in self.include(args):
yield tok
self.macros['__FILE__'] = oldfile
self.source = source
elif name == 'undef':
if enable:
for tok in self.expand_macros(chunk):
yield tok
chunk = []
self.undef(args)
elif name == 'ifdef':
ifstack.append((enable,iftrigger))
if enable:
if not args[0].value in self.macros:
enable = False
iftrigger = False
else:
iftrigger = True
elif name == 'ifndef':
ifstack.append((enable,iftrigger))
if enable:
if args[0].value in self.macros:
enable = False
iftrigger = False
else:
iftrigger = True
elif name == 'if':
ifstack.append((enable,iftrigger))
if enable:
result = self.evalexpr(args)
if not result:
enable = False
iftrigger = False
else:
iftrigger = True
elif name == 'elif':
if ifstack:
if ifstack[-1][0]: # We only pay attention if outer "if" allows this
if enable: # If already true, we flip enable False
enable = False
elif not iftrigger: # If False, but not triggered yet, we'll check expression
result = self.evalexpr(args)
if result:
enable = True
iftrigger = True
else:
self.error(self.source,dirtokens[0].lineno,"Misplaced #elif")
elif name == 'else':
if ifstack:
if ifstack[-1][0]:
if enable:
enable = False
elif not iftrigger:
enable = True
iftrigger = True
else:
self.error(self.source,dirtokens[0].lineno,"Misplaced #else")
elif name == 'endif':
if ifstack:
enable,iftrigger = ifstack.pop()
else:
self.error(self.source,dirtokens[0].lineno,"Misplaced #endif")
else:
# Unknown preprocessor directive
pass
else:
# Normal text
if enable:
chunk.extend(x)
for tok in self.expand_macros(chunk):
yield tok
chunk = []
# ----------------------------------------------------------------------
# include()
#
# Implementation of file-inclusion
# ----------------------------------------------------------------------
def include(self,tokens):
# Try to extract the filename and then process an include file
if not tokens:
return
if tokens:
if tokens[0].value != '<' and tokens[0].type != self.t_STRING:
tokens = self.expand_macros(tokens)
if tokens[0].value == '<':
# Include <...>
i = 1
while i < len(tokens):
if tokens[i].value == '>':
break
i += 1
else:
print("Malformed #include <...>")
return
filename = "".join([x.value for x in tokens[1:i]])
path = self.path + [""] + self.temp_path
elif tokens[0].type == self.t_STRING:
filename = tokens[0].value[1:-1]
path = self.temp_path + [""] + self.path
else:
print("Malformed #include statement")
return
for p in path:
iname = os.path.join(p,filename)
try:
data = open(iname,"r").read()
dname = os.path.dirname(iname)
if dname:
self.temp_path.insert(0,dname)
for tok in self.parsegen(data,filename):
yield tok
if dname:
del self.temp_path[0]
break
except IOError:
pass
else:
print("Couldn't find '%s'" % filename)
# ----------------------------------------------------------------------
# define()
#
# Define a new macro
# ----------------------------------------------------------------------
def define(self,tokens):
if isinstance(tokens,(str,unicode)):
tokens = self.tokenize(tokens)
linetok = tokens
try:
name = linetok[0]
if len(linetok) > 1:
mtype = linetok[1]
else:
mtype = None
if not mtype:
m = Macro(name.value,[])
self.macros[name.value] = m
elif mtype.type in self.t_WS:
# A normal macro
m = Macro(name.value,self.tokenstrip(linetok[2:]))
self.macros[name.value] = m
elif mtype.value == '(':
# A macro with arguments
tokcount, args, positions = self.collect_args(linetok[1:])
variadic = False
for a in args:
if variadic:
print("No more arguments may follow a variadic argument")
break
astr = "".join([str(_i.value) for _i in a])
if astr == "...":
variadic = True
a[0].type = self.t_ID
a[0].value = '__VA_ARGS__'
variadic = True
del a[1:]
continue
elif astr[-3:] == "..." and a[0].type == self.t_ID:
variadic = True
del a[1:]
# If, for some reason, "." is part of the identifier, strip off the name for the purposes
# of macro expansion
if a[0].value[-3:] == '...':
a[0].value = a[0].value[:-3]
continue
if len(a) > 1 or a[0].type != self.t_ID:
print("Invalid macro argument")
break
else:
mvalue = self.tokenstrip(linetok[1+tokcount:])
i = 0
while i < len(mvalue):
if i+1 < len(mvalue):
if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##':
del mvalue[i]
continue
elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS:
del mvalue[i+1]
i += 1
m = Macro(name.value,mvalue,[x[0].value for x in args],variadic)
self.macro_prescan(m)
self.macros[name.value] = m
else:
print("Bad macro definition")
except LookupError:
print("Bad macro definition")
# ----------------------------------------------------------------------
# undef()
#
# Undefine a macro
# ----------------------------------------------------------------------
def undef(self,tokens):
id = tokens[0].value
try:
del self.macros[id]
except LookupError:
pass
# ----------------------------------------------------------------------
# parse()
#
# Parse input text.
# ----------------------------------------------------------------------
def parse(self,input,source=None,ignore={}):
self.ignore = ignore
self.parser = self.parsegen(input,source)
# ----------------------------------------------------------------------
# token()
#
# Method to return individual tokens
# ----------------------------------------------------------------------
def token(self):
try:
while True:
tok = next(self.parser)
if tok.type not in self.ignore: return tok
except StopIteration:
self.parser = None
return None
if __name__ == '__main__':
import ply.lex as lex
lexer = lex.lex()
# Run a preprocessor
import sys
f = open(sys.argv[1])
input = f.read()
p = Preprocessor(lexer)
p.parse(input,sys.argv[1])
while True:
tok = p.token()
if not tok: break
print(p.source, tok)

133
ply/ply/ctokens.py Normal file
View File

@ -0,0 +1,133 @@
# ----------------------------------------------------------------------
# ctokens.py
#
# Token specifications for symbols in ANSI C and C++. This file is
# meant to be used as a library in other tokenizers.
# ----------------------------------------------------------------------
# Reserved words
tokens = [
# Literals (identifier, integer constant, float constant, string constant, char const)
'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST',
# Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
'LOR', 'LAND', 'LNOT',
'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
# Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
# Increment/decrement (++,--)
'PLUSPLUS', 'MINUSMINUS',
# Structure dereference (->)
'ARROW',
# Ternary operator (?)
'TERNARY',
# Delimeters ( ) [ ] { } , . ; :
'LPAREN', 'RPAREN',
'LBRACKET', 'RBRACKET',
'LBRACE', 'RBRACE',
'COMMA', 'PERIOD', 'SEMI', 'COLON',
# Ellipsis (...)
'ELLIPSIS',
]
# Operators
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_MODULO = r'%'
t_OR = r'\|'
t_AND = r'&'
t_NOT = r'~'
t_XOR = r'\^'
t_LSHIFT = r'<<'
t_RSHIFT = r'>>'
t_LOR = r'\|\|'
t_LAND = r'&&'
t_LNOT = r'!'
t_LT = r'<'
t_GT = r'>'
t_LE = r'<='
t_GE = r'>='
t_EQ = r'=='
t_NE = r'!='
# Assignment operators
t_EQUALS = r'='
t_TIMESEQUAL = r'\*='
t_DIVEQUAL = r'/='
t_MODEQUAL = r'%='
t_PLUSEQUAL = r'\+='
t_MINUSEQUAL = r'-='
t_LSHIFTEQUAL = r'<<='
t_RSHIFTEQUAL = r'>>='
t_ANDEQUAL = r'&='
t_OREQUAL = r'\|='
t_XOREQUAL = r'^='
# Increment/decrement
t_INCREMENT = r'\+\+'
t_DECREMENT = r'--'
# ->
t_ARROW = r'->'
# ?
t_TERNARY = r'\?'
# Delimeters
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_LBRACE = r'\{'
t_RBRACE = r'\}'
t_COMMA = r','
t_PERIOD = r'\.'
t_SEMI = r';'
t_COLON = r':'
t_ELLIPSIS = r'\.\.\.'
# Identifiers
t_ID = r'[A-Za-z_][A-Za-z0-9_]*'
# Integer literal
t_INTEGER = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
# Floating literal
t_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
# String literal
t_STRING = r'\"([^\\\n]|(\\.))*?\"'
# Character constant 'c' or L'c'
t_CHARACTER = r'(L)?\'([^\\\n]|(\\.))*?\''
# Comment (C-Style)
def t_COMMENT(t):
r'/\*(.|\n)*?\*/'
t.lexer.lineno += t.value.count('\n')
return t
# Comment (C++-Style)
def t_CPPCOMMENT(t):
r'//.*\n'
t.lexer.lineno += 1
return t

1058
ply/ply/lex.py Normal file

File diff suppressed because it is too large Load Diff

3276
ply/ply/yacc.py Normal file

File diff suppressed because it is too large Load Diff

32
ply/setup.py Executable file
View File

@ -0,0 +1,32 @@
#!/usr/bin/python
try:
from setuptools import setup
except ImportError:
from distutils.core import setup
setup(name = "ply",
description="Python Lex & Yacc",
long_description = """
PLY is yet another implementation of lex and yacc for Python. Some notable
features include the fact that its implemented entirely in Python and it
uses LALR(1) parsing which is efficient and well suited for larger grammars.
PLY provides most of the standard lex/yacc features including support for empty
productions, precedence rules, error recovery, and support for ambiguous grammars.
PLY is extremely easy to use and provides very extensive error checking.
It is compatible with both Python 2 and Python 3.
""",
license="""BSD""",
version = "3.4",
author = "David Beazley",
author_email = "dave@dabeaz.com",
maintainer = "David Beazley",
maintainer_email = "dave@dabeaz.com",
url = "http://www.dabeaz.com/ply/",
packages = ['ply'],
classifiers = [
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 2',
]
)

7
ply/test/README Normal file
View File

@ -0,0 +1,7 @@
This directory mostly contains tests for various types of error
conditions. To run:
$ python testlex.py .
$ python testyacc.py .
The script 'cleanup.sh' cleans up this directory to its original state.

49
ply/test/calclex.py Normal file
View File

@ -0,0 +1,49 @@
# -----------------------------------------------------------------------------
# calclex.py
# -----------------------------------------------------------------------------
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = (
'NAME','NUMBER',
'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
'LPAREN','RPAREN',
)
# Tokens
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_EQUALS = r'='
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
try:
t.value = int(t.value)
except ValueError:
print("Integer value too large %s" % t.value)
t.value = 0
return t
t_ignore = " \t"
def t_newline(t):
r'\n+'
t.lineno += t.value.count("\n")
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
lex.lex()

4
ply/test/cleanup.sh Executable file
View File

@ -0,0 +1,4 @@
#!/bin/sh
rm -rf *~ *.pyc *.pyo *.dif *.out __pycache__

54
ply/test/lex_closure.py Normal file
View File

@ -0,0 +1,54 @@
# -----------------------------------------------------------------------------
# lex_closure.py
# -----------------------------------------------------------------------------
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = (
'NAME','NUMBER',
'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
'LPAREN','RPAREN',
)
def make_calc():
# Tokens
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_EQUALS = r'='
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
try:
t.value = int(t.value)
except ValueError:
print("Integer value too large %s" % t.value)
t.value = 0
return t
t_ignore = " \t"
def t_newline(t):
r'\n+'
t.lineno += t.value.count("\n")
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
return lex.lex()
make_calc()
lex.runmain(data="3+4")

26
ply/test/lex_doc1.py Normal file
View File

@ -0,0 +1,26 @@
# lex_doc1.py
#
# Missing documentation string
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
]
t_PLUS = r'\+'
t_MINUS = r'-'
def t_NUMBER(t):
pass
def t_error(t):
pass
lex.lex()

29
ply/test/lex_dup1.py Normal file
View File

@ -0,0 +1,29 @@
# lex_dup1.py
#
# Duplicated rule specifiers
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
]
t_PLUS = r'\+'
t_MINUS = r'-'
t_NUMBER = r'\d+'
t_NUMBER = r'\d+'
def t_error(t):
pass
lex.lex()

33
ply/test/lex_dup2.py Normal file
View File

@ -0,0 +1,33 @@
# lex_dup2.py
#
# Duplicated rule specifiers
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
]
t_PLUS = r'\+'
t_MINUS = r'-'
def t_NUMBER(t):
r'\d+'
pass
def t_NUMBER(t):
r'\d+'
pass
def t_error(t):
pass
lex.lex()

31
ply/test/lex_dup3.py Normal file
View File

@ -0,0 +1,31 @@
# lex_dup3.py
#
# Duplicated rule specifiers
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
]
t_PLUS = r'\+'
t_MINUS = r'-'
t_NUMBER = r'\d+'
def t_NUMBER(t):
r'\d+'
pass
def t_error(t):
pass
lex.lex()

20
ply/test/lex_empty.py Normal file
View File

@ -0,0 +1,20 @@
# lex_empty.py
#
# No rules defined
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
]
lex.lex()

24
ply/test/lex_error1.py Normal file
View File

@ -0,0 +1,24 @@
# lex_error1.py
#
# Missing t_error() rule
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
]
t_PLUS = r'\+'
t_MINUS = r'-'
t_NUMBER = r'\d+'
lex.lex()

26
ply/test/lex_error2.py Normal file
View File

@ -0,0 +1,26 @@
# lex_error2.py
#
# t_error defined, but not function
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
]
t_PLUS = r'\+'
t_MINUS = r'-'
t_NUMBER = r'\d+'
t_error = "foo"
lex.lex()

27
ply/test/lex_error3.py Normal file
View File

@ -0,0 +1,27 @@
# lex_error3.py
#
# t_error defined as function, but with wrong # args
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
]
t_PLUS = r'\+'
t_MINUS = r'-'
t_NUMBER = r'\d+'
def t_error():
pass
lex.lex()

27
ply/test/lex_error4.py Normal file
View File

@ -0,0 +1,27 @@
# lex_error4.py
#
# t_error defined as function, but too many args
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
]
t_PLUS = r'\+'
t_MINUS = r'-'
t_NUMBER = r'\d+'
def t_error(t,s):
pass
lex.lex()

47
ply/test/lex_hedit.py Normal file
View File

@ -0,0 +1,47 @@
# -----------------------------------------------------------------------------
# hedit.py
#
# Paring of Fortran H Edit descriptions (Contributed by Pearu Peterson)
#
# These tokens can't be easily tokenized because they are of the following
# form:
#
# nHc1...cn
#
# where n is a positive integer and c1 ... cn are characters.
#
# This example shows how to modify the state of the lexer to parse
# such tokens
# -----------------------------------------------------------------------------
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = (
'H_EDIT_DESCRIPTOR',
)
# Tokens
t_ignore = " \t\n"
def t_H_EDIT_DESCRIPTOR(t):
r"\d+H.*" # This grabs all of the remaining text
i = t.value.index('H')
n = eval(t.value[:i])
# Adjust the tokenizing position
t.lexer.lexpos -= len(t.value) - (i+1+n)
t.value = t.value[i+1:i+1+n]
return t
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
lex.lex()
lex.runmain(data="3Habc 10Habcdefghij 2Hxy")

31
ply/test/lex_ignore.py Normal file
View File

@ -0,0 +1,31 @@
# lex_ignore.py
#
# Improperly specific ignore declaration
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
]
t_PLUS = r'\+'
t_MINUS = r'-'
t_NUMBER = r'\d+'
def t_ignore(t):
' \t'
pass
def t_error(t):
pass
import sys
lex.lex()

29
ply/test/lex_ignore2.py Normal file
View File

@ -0,0 +1,29 @@
# lex_ignore2.py
#
# ignore declaration as a raw string
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
]
t_PLUS = r'\+'
t_MINUS = r'-'
t_NUMBER = r'\d+'
t_ignore = r' \t'
def t_error(t):
pass
lex.lex()

25
ply/test/lex_literal1.py Normal file
View File

@ -0,0 +1,25 @@
# lex_literal1.py
#
# Bad literal specification
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"NUMBER",
]
literals = ["+","-","**"]
def t_NUMBER(t):
r'\d+'
return t
def t_error(t):
pass
lex.lex()

25
ply/test/lex_literal2.py Normal file
View File

@ -0,0 +1,25 @@
# lex_literal2.py
#
# Bad literal specification
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"NUMBER",
]
literals = 23
def t_NUMBER(t):
r'\d+'
return t
def t_error(t):
pass
lex.lex()

View File

@ -0,0 +1,27 @@
# lex_many_tokens.py
#
# Test lex's ability to handle a large number of tokens (beyond the
# 100-group limit of the re module)
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = ["TOK%d" % i for i in range(1000)]
for tok in tokens:
if sys.version_info[0] < 3:
exec("t_%s = '%s:'" % (tok,tok))
else:
exec("t_%s = '%s:'" % (tok,tok), globals())
t_ignore = " \t"
def t_error(t):
pass
lex.lex(optimize=1,lextab="manytab")
lex.runmain(data="TOK34: TOK143: TOK269: TOK372: TOK452: TOK561: TOK999:")

10
ply/test/lex_module.py Normal file
View File

@ -0,0 +1,10 @@
# lex_module.py
#
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
import lex_module_import
lex.lex(module=lex_module_import)
lex.runmain(data="3+4")

View File

@ -0,0 +1,42 @@
# -----------------------------------------------------------------------------
# lex_module_import.py
#
# A lexer defined in a module, but built in lex_module.py
# -----------------------------------------------------------------------------
tokens = (
'NAME','NUMBER',
'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
'LPAREN','RPAREN',
)
# Tokens
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_EQUALS = r'='
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
try:
t.value = int(t.value)
except ValueError:
print("Integer value too large %s" % t.value)
t.value = 0
return t
t_ignore = " \t"
def t_newline(t):
r'\n+'
t.lineno += t.value.count("\n")
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)

55
ply/test/lex_object.py Normal file
View File

@ -0,0 +1,55 @@
# -----------------------------------------------------------------------------
# lex_object.py
# -----------------------------------------------------------------------------
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
class CalcLexer:
tokens = (
'NAME','NUMBER',
'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
'LPAREN','RPAREN',
)
# Tokens
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_EQUALS = r'='
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(self,t):
r'\d+'
try:
t.value = int(t.value)
except ValueError:
print("Integer value too large %s" % t.value)
t.value = 0
return t
t_ignore = " \t"
def t_newline(self,t):
r'\n+'
t.lineno += t.value.count("\n")
def t_error(self,t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
calc = CalcLexer()
# Build the lexer
lex.lex(object=calc)
lex.runmain(data="3+4")

54
ply/test/lex_opt_alias.py Normal file
View File

@ -0,0 +1,54 @@
# -----------------------------------------------------------------------------
# lex_opt_alias.py
#
# Tests ability to match up functions with states, aliases, and
# lexing tables.
# -----------------------------------------------------------------------------
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
tokens = (
'NAME','NUMBER',
)
states = (('instdef','inclusive'),('spam','exclusive'))
literals = ['=','+','-','*','/', '(',')']
# Tokens
def t_instdef_spam_BITS(t):
r'[01-]+'
return t
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def NUMBER(t):
r'\d+'
try:
t.value = int(t.value)
except ValueError:
print("Integer value too large %s" % t.value)
t.value = 0
return t
t_ANY_NUMBER = NUMBER
t_ignore = " \t"
t_spam_ignore = t_ignore
def t_newline(t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
t_spam_error = t_error
# Build the lexer
import ply.lex as lex
lex.lex(optimize=1,lextab="aliastab")
lex.runmain(data="3+4")

50
ply/test/lex_optimize.py Normal file
View File

@ -0,0 +1,50 @@
# -----------------------------------------------------------------------------
# lex_optimize.py
# -----------------------------------------------------------------------------
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = (
'NAME','NUMBER',
'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
'LPAREN','RPAREN',
)
# Tokens
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_EQUALS = r'='
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
try:
t.value = int(t.value)
except ValueError:
print("Integer value too large %s" % t.value)
t.value = 0
return t
t_ignore = " \t"
def t_newline(t):
r'\n+'
t.lineno += t.value.count("\n")
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
lex.lex(optimize=1)
lex.runmain(data="3+4")

50
ply/test/lex_optimize2.py Normal file
View File

@ -0,0 +1,50 @@
# -----------------------------------------------------------------------------
# lex_optimize2.py
# -----------------------------------------------------------------------------
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = (
'NAME','NUMBER',
'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
'LPAREN','RPAREN',
)
# Tokens
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_EQUALS = r'='
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
try:
t.value = int(t.value)
except ValueError:
print("Integer value too large %s" % t.value)
t.value = 0
return t
t_ignore = " \t"
def t_newline(t):
r'\n+'
t.lineno += t.value.count("\n")
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
lex.lex(optimize=1,lextab="opt2tab")
lex.runmain(data="3+4")

52
ply/test/lex_optimize3.py Normal file
View File

@ -0,0 +1,52 @@
# -----------------------------------------------------------------------------
# lex_optimize3.py
#
# Writes table in a subdirectory structure.
# -----------------------------------------------------------------------------
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = (
'NAME','NUMBER',
'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
'LPAREN','RPAREN',
)
# Tokens
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_EQUALS = r'='
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
try:
t.value = int(t.value)
except ValueError:
print("Integer value too large %s" % t.value)
t.value = 0
return t
t_ignore = " \t"
def t_newline(t):
r'\n+'
t.lineno += t.value.count("\n")
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
lex.lex(optimize=1,lextab="lexdir.sub.calctab",outputdir="lexdir/sub")
lex.runmain(data="3+4")

27
ply/test/lex_re1.py Normal file
View File

@ -0,0 +1,27 @@
# lex_re1.py
#
# Bad regular expression in a string
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
]
t_PLUS = r'\+'
t_MINUS = r'-'
t_NUMBER = r'(\d+'
def t_error(t):
pass
lex.lex()

27
ply/test/lex_re2.py Normal file
View File

@ -0,0 +1,27 @@
# lex_re2.py
#
# Regular expression rule matches empty string
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
]
t_PLUS = r'\+?'
t_MINUS = r'-'
t_NUMBER = r'(\d+)'
def t_error(t):
pass
lex.lex()

29
ply/test/lex_re3.py Normal file
View File

@ -0,0 +1,29 @@
# lex_re3.py
#
# Regular expression rule matches empty string
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
"POUND",
]
t_PLUS = r'\+'
t_MINUS = r'-'
t_NUMBER = r'(\d+)'
t_POUND = r'#'
def t_error(t):
pass
lex.lex()

27
ply/test/lex_rule1.py Normal file
View File

@ -0,0 +1,27 @@
# lex_rule1.py
#
# Rule function with incorrect number of arguments
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
]
t_PLUS = r'\+'
t_MINUS = r'-'
t_NUMBER = 1
def t_error(t):
pass
lex.lex()

29
ply/test/lex_rule2.py Normal file
View File

@ -0,0 +1,29 @@
# lex_rule2.py
#
# Rule function with incorrect number of arguments
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
]
t_PLUS = r'\+'
t_MINUS = r'-'
def t_NUMBER():
r'\d+'
return t
def t_error(t):
pass
lex.lex()

27
ply/test/lex_rule3.py Normal file
View File

@ -0,0 +1,27 @@
# lex_rule3.py
#
# Rule function with incorrect number of arguments
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
]
t_PLUS = r'\+'
t_MINUS = r'-'
def t_NUMBER(t,s):
r'\d+'
return t
def t_error(t):
pass
lex.lex()

40
ply/test/lex_state1.py Normal file
View File

@ -0,0 +1,40 @@
# lex_state1.py
#
# Bad state declaration
import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import ply.lex as lex
tokens = [
"PLUS",
"MINUS",
"NUMBER",
]
states = 'comment'
t_PLUS = r'\+'
t_MINUS = r'-'
t_NUMBER = r'\d+'
# Comments
def t_comment(t):
r'/\*'
t.lexer.begin('comment')
print("Entering comment state")
def t_comment_body_part(t):
r'(.|\n)*\*/'
print("comment body %s" % t)
t.lexer.begin('INITIAL')
def t_error(t):
pass
lex.lex()

Some files were not shown because too many files have changed in this diff Show More