123 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			123 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import sys
 | |
| import hashlib
 | |
| import cPickle as pickle
 | |
| import os
 | |
| 
 | |
| from pyparsing import Word, CharsNotIn, Optional, OneOrMore, ZeroOrMore, Group, Forward, ParseException, Literal, Suppress, replaceWith, StringEnd, lineno, QuotedString, White, NotAny, ParserElement, MatchFirst
 | |
| 
 | |
| class Argument:
 | |
|     def __init__(self, s, loc, toks):
 | |
|         self.str = toks[1]
 | |
|     def __repr__(self):
 | |
|         return "[%s]" % self.str
 | |
|     def __str__(self):
 | |
|         return self.str
 | |
| def argfun(s, loc, toks):
 | |
|     return Argument(s, loc, toks)
 | |
| 
 | |
| class Parameter:
 | |
|     def __init__(self, s, loc, toks):
 | |
|         self.str = toks[0].asList()
 | |
|     def __repr__(self):
 | |
|         return '{' + "".join([str(s) for s in self.str]) + '}'
 | |
|         return "{%s}" % self.str
 | |
|     def __str__(self):
 | |
|         return "".join([str(s) for s in self.str])
 | |
| def paramfun(s, loc, toks):
 | |
|     return Parameter(s, loc, toks)
 | |
| 
 | |
| class TexCmd:
 | |
|     def __init__(self, s, loc, toks):
 | |
|         self.cmd = str(toks[0])[1:]
 | |
|         #print 'cmd', self.cmd
 | |
|         self.args = toks[1].asList()
 | |
|         self.params = toks[2].asList()
 | |
|         self.lineno = lineno(loc, s)
 | |
|         self.filename = None
 | |
|     def __repr__(self):
 | |
|         return '\\' + self.cmd + "".join([repr(a) for a in self.args]) + "".join([repr(p) for p in self.params])
 | |
| 
 | |
| class ZeroOrMoreAsList(ZeroOrMore):
 | |
|     def __init__(self, *args):
 | |
|         ZeroOrMore.__init__(self, *args)
 | |
|         def listify(s, loc, toks):
 | |
|             return [toks]
 | |
|         self.setParseAction(listify)
 | |
| 
 | |
| ParserElement.setDefaultWhitespaceChars("\n\t")
 | |
| backslash = chr(92)
 | |
| 
 | |
| texcmd = Forward()
 | |
| filler = CharsNotIn(backslash + '$')
 | |
| filler2 = CharsNotIn(backslash + '$' + '{}')
 | |
| 
 | |
| arg = '[' + CharsNotIn("]") + ']'
 | |
| arg.setParseAction(argfun)
 | |
| 
 | |
| dollarmath = QuotedString('$',  multiline=True, unquoteResults=False)
 | |
| param = Suppress(Literal('{')) + ZeroOrMoreAsList(dollarmath | filler2 | QuotedString('{', endQuoteChar='}', unquoteResults=False) | texcmd) + Suppress(Literal('}'))
 | |
| param.setParseAction(paramfun)
 | |
| def bs(c): return Literal("\\" + c)
 | |
| singles = bs("[") | bs("]") | bs("{") | bs("}") | bs("\\") | bs("&") | bs("_") | bs(",") | bs("#") | bs("\n") | bs(";") | bs("|") | bs("%") | bs("*") | bs("~") | bs("^")
 | |
| texcmd << (singles | Word("\\", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", min = 2)) + ZeroOrMoreAsList(arg) + ZeroOrMoreAsList(param)
 | |
| def texcmdfun(s, loc, toks):
 | |
|     return TexCmd(s, loc, toks)
 | |
| texcmd.setParseAction(texcmdfun)
 | |
| 
 | |
| #legal = "".join([chr(x) for x in set(range(32, 127)) - set(backslash)])
 | |
| #filler = Word(legal)
 | |
| document = ZeroOrMore(dollarmath | texcmd | filler) + StringEnd().suppress()
 | |
| 
 | |
| if 0:
 | |
|     s = "This is \\\\ test"
 | |
|     print s
 | |
|     for t in document.parseString(s):
 | |
|         if isinstance(t, TexCmd):
 | |
|             print '====> cmd=[%s]' % t.cmd, t
 | |
|         else:
 | |
|             print '====>', t
 | |
|     sys.exit(-1)
 | |
| 
 | |
| selfstr = open( __file__).read()        # Own source as a string.  Used as part of hash.
 | |
| hashbase = hashlib.md5(selfstr)
 | |
| 
 | |
| def tokenize(filename):
 | |
|     f = open(filename, "rt")
 | |
| 
 | |
|     def uncomment(s):
 | |
|         if '%' in s and not '\\%' in s:
 | |
|             return s[:s.index('%')] + '\n'
 | |
|         else:
 | |
|             return s
 | |
| 
 | |
|     docstr = "".join([uncomment(l) for l in f])
 | |
|     hash = hashbase.copy()
 | |
|     hash.update(docstr)
 | |
|     cache_filename = os.path.join("parse-cache", hash.hexdigest())
 | |
|     try:
 | |
|         return pickle.load(open(cache_filename))
 | |
|     except IOError:
 | |
|         print "parsing"
 | |
|         try:
 | |
|             r = document.parseString(docstr)
 | |
|             for x in r:
 | |
|                 if isinstance(x, TexCmd) and not x.filename:
 | |
|                     x.filename = filename
 | |
|             pickle.dump(r, open(cache_filename, 'w'))
 | |
|             return r
 | |
|         except ParseException, pe:
 | |
|             print 'Fatal problem at %s line %d col %d' % (filename, pe.lineno, pe.col)
 | |
|             print pe.line
 | |
|             sys.exit(1)
 | |
| 
 | |
| def latexparser(filename):
 | |
|     tokens = tokenize(filename)
 | |
|     def expand(t):
 | |
|         if isinstance(t, TexCmd) and t.cmd == "input":
 | |
|             filename = "../" + str(t.params[0].str[0]) + ".tex"
 | |
|             print filename
 | |
|             return latexparser(filename)
 | |
|         else:
 | |
|             return [t]
 | |
|     return sum([expand(t) for t in tokens], [])
 | 
