 c95eb57405
			
		
	
	c95eb57405
	
	
	
		
			
			And fix the scripts so they stop letting trailing whitespace through. Change-Id: Ie109fbe1f63321e565ba0fa60fee8e9cf3a61cfc
		
			
				
	
	
		
			2276 lines
		
	
	
		
			73 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			2276 lines
		
	
	
		
			73 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # a glorified C pre-processor parser
 | |
| 
 | |
| import sys, re, string
 | |
| from utils import *
 | |
| from defaults import *
 | |
| 
 | |
| debugTokens             = False
 | |
| debugDirectiveTokenizer = False
 | |
| debugLineParsing        = False
 | |
| debugCppExpr            = False
 | |
| debugOptimIf01          = False
 | |
| 
 | |
| #####################################################################################
 | |
| #####################################################################################
 | |
| #####                                                                           #####
 | |
| #####           C P P   T O K E N S                                             #####
 | |
| #####                                                                           #####
 | |
| #####################################################################################
 | |
| #####################################################################################
 | |
| 
 | |
| # the list of supported C-preprocessor tokens
 | |
| # plus a couple of C tokens as well
 | |
| tokEOF       = "\0"
 | |
| tokLN        = "\n"
 | |
| tokSTRINGIFY = "#"
 | |
| tokCONCAT    = "##"
 | |
| tokLOGICAND  = "&&"
 | |
| tokLOGICOR   = "||"
 | |
| tokSHL       = "<<"
 | |
| tokSHR       = ">>"
 | |
| tokEQUAL     = "=="
 | |
| tokNEQUAL    = "!="
 | |
| tokLT        = "<"
 | |
| tokLTE       = "<="
 | |
| tokGT        = ">"
 | |
| tokGTE       = ">="
 | |
| tokELLIPSIS  = "..."
 | |
| tokSPACE     = " "
 | |
| tokDEFINED   = "defined"
 | |
| tokLPAREN    = "("
 | |
| tokRPAREN    = ")"
 | |
| tokNOT       = "!"
 | |
| tokPLUS      = "+"
 | |
| tokMINUS     = "-"
 | |
| tokMULTIPLY  = "*"
 | |
| tokDIVIDE    = "/"
 | |
| tokMODULUS   = "%"
 | |
| tokBINAND    = "&"
 | |
| tokBINOR     = "|"
 | |
| tokBINXOR    = "^"
 | |
| tokCOMMA     = ","
 | |
| tokLBRACE    = "{"
 | |
| tokRBRACE    = "}"
 | |
| tokARROW     = "->"
 | |
| tokINCREMENT = "++"
 | |
| tokDECREMENT = "--"
 | |
| tokNUMBER    = "<number>"
 | |
| tokIDENT     = "<ident>"
 | |
| tokSTRING    = "<string>"
 | |
| 
 | |
| class Token:
 | |
|     """a simple class to hold information about a given token.
 | |
|        each token has a position in the source code, as well as
 | |
|        an 'id' and a 'value'. the id is a string that identifies
 | |
|        the token's class, while the value is the string of the
 | |
|        original token itself.
 | |
| 
 | |
|        for example, the tokenizer concatenates a series of spaces
 | |
|        and tabs as a single tokSPACE id, whose value if the original
 | |
|        spaces+tabs sequence."""
 | |
| 
 | |
|     def __init__(self):
 | |
|         self.id     = None
 | |
|         self.value  = None
 | |
|         self.lineno = 0
 | |
|         self.colno  = 0
 | |
| 
 | |
|     def set(self,id,val=None):
 | |
|         self.id = id
 | |
|         if val:
 | |
|             self.value = val
 | |
|         else:
 | |
|             self.value = id
 | |
|         return None
 | |
| 
 | |
|     def copyFrom(self,src):
 | |
|         self.id     = src.id
 | |
|         self.value  = src.value
 | |
|         self.lineno = src.lineno
 | |
|         self.colno  = src.colno
 | |
| 
 | |
|     def __repr__(self):
 | |
|         if self.id == tokIDENT:
 | |
|             return "(ident %s)" % self.value
 | |
|         if self.id == tokNUMBER:
 | |
|             return "(number %s)" % self.value
 | |
|         if self.id == tokSTRING:
 | |
|             return "(string '%s')" % self.value
 | |
|         if self.id == tokLN:
 | |
|             return "<LN>"
 | |
|         if self.id == tokEOF:
 | |
|             return "<EOF>"
 | |
|         if self.id == tokSPACE and self.value == "\\":
 | |
|             # this corresponds to a trailing \ that was transformed into a tokSPACE
 | |
|             return "<\\>"
 | |
| 
 | |
|         return self.id
 | |
| 
 | |
|     def __str__(self):
 | |
|         if self.id == tokIDENT:
 | |
|             return self.value
 | |
|         if self.id == tokNUMBER:
 | |
|             return self.value
 | |
|         if self.id == tokSTRING:
 | |
|             return self.value
 | |
|         if self.id == tokEOF:
 | |
|             return "<EOF>"
 | |
|         if self.id == tokSPACE:
 | |
|             if self.value == "\\":  # trailing \
 | |
|                 return "\\\n"
 | |
|             else:
 | |
|                 return self.value
 | |
| 
 | |
|         return self.id
 | |
| 
 | |
| class BadExpectedToken(Exception):
 | |
|     def __init__(self,msg):
 | |
|         print msg
 | |
| 
 | |
| #####################################################################################
 | |
| #####################################################################################
 | |
| #####                                                                           #####
 | |
| #####          C P P   T O K E N   C U R S O R                                  #####
 | |
| #####                                                                           #####
 | |
| #####################################################################################
 | |
| #####################################################################################
 | |
| 
 | |
| class TokenCursor:
 | |
|     """a small class to iterate over a list of Token objects"""
 | |
|     def __init__(self,tokens):
 | |
|         self.tokens = tokens
 | |
|         self.n      = 0
 | |
|         self.count  = len(tokens)
 | |
| 
 | |
|     def set(self,n):
 | |
|         """set the current position"""
 | |
|         if n < 0:
 | |
|             n = 0
 | |
|         if n > self.count:
 | |
|             n = self.count
 | |
|         self.n = n
 | |
| 
 | |
|     def peekId(self):
 | |
|         """retrieve the id of the current token"""
 | |
|         if (self.n >= self.count):
 | |
|             return None
 | |
|         return self.tokens[self.n].id
 | |
| 
 | |
|     def peek(self):
 | |
|         """retrieve the current token. does not change position"""
 | |
|         if (self.n >= self.count):
 | |
|             return None
 | |
|         return self.tokens[self.n]
 | |
| 
 | |
|     def skip(self):
 | |
|         """increase current token position"""
 | |
|         if (self.n < self.count):
 | |
|             self.n += 1
 | |
| 
 | |
|     def skipSpaces(self):
 | |
|         """skip over all space tokens, this includes tokSPACE and tokLN"""
 | |
|         while 1:
 | |
|             tok = self.peekId()
 | |
|             if tok != tokSPACE and tok != tokLN:
 | |
|                 break
 | |
|             self.skip()
 | |
| 
 | |
|     def skipIfId(self,id):
 | |
|         """skip an optional token"""
 | |
|         if self.peekId() == id:
 | |
|             self.skip()
 | |
| 
 | |
|     def expectId(self,id):
 | |
|         """raise an exception if the current token hasn't a given id.
 | |
|            otherwise skip over it"""
 | |
|         tok = self.peek()
 | |
|         if tok.id != id:
 | |
|             raise BadExpectedToken, "%d:%d: '%s' expected, received '%s'" % (tok.lineno, tok.colno, id, tok.id)
 | |
|         self.skip()
 | |
| 
 | |
|     def remain(self):
 | |
|         """return the list of remaining tokens"""
 | |
|         return self.tokens[self.n:]
 | |
| 
 | |
| 
 | |
| #####################################################################################
 | |
| #####################################################################################
 | |
| #####                                                                           #####
 | |
| #####           C P P   T O K E N I Z E R                                       #####
 | |
| #####                                                                           #####
 | |
| #####################################################################################
 | |
| #####################################################################################
 | |
| 
 | |
| # list of long symbols, i.e. those that take more than one characters
 | |
| cppLongSymbols = [ tokCONCAT, tokLOGICAND, tokLOGICOR, tokSHL, tokSHR, tokELLIPSIS, tokEQUAL,\
 | |
|                    tokNEQUAL, tokLTE, tokGTE, tokARROW, tokINCREMENT, tokDECREMENT ]
 | |
| 
 | |
| class CppTokenizer:
 | |
|     """an abstract class used to convert some input text into a list
 | |
|        of tokens. real implementations follow and differ in the format
 | |
|        of the input text only"""
 | |
| 
 | |
|     def __init__(self):
 | |
|         """initialize a new CppTokenizer object"""
 | |
|         self.eof  = False  # end of file reached ?
 | |
|         self.text = None   # content of current line, with final \n stripped
 | |
|         self.line = 0      # number of current line
 | |
|         self.pos  = 0      # current character position in current line
 | |
|         self.len  = 0      # length of current line text
 | |
|         self.held = Token()
 | |
| 
 | |
|     def setLineText(self,line):
 | |
|         """set the content of the (next) current line. should be called
 | |
|            by fillLineText() in derived classes"""
 | |
|         self.text = line
 | |
|         self.len  = len(line)
 | |
|         self.pos  = 0
 | |
| 
 | |
|     def fillLineText(self):
 | |
|         """refresh the content of 'line' with a new line of input"""
 | |
|         # to be overriden
 | |
|         self.eof = True
 | |
| 
 | |
|     def markPos(self,tok):
 | |
|         """mark the position of the current token in the source file"""
 | |
|         if self.eof or self.pos > self.len:
 | |
|             tok.lineno = self.line + 1
 | |
|             tok.colno  = 0
 | |
|         else:
 | |
|             tok.lineno = self.line
 | |
|             tok.colno  = self.pos
 | |
| 
 | |
|     def peekChar(self):
 | |
|         """return the current token under the cursor without moving it"""
 | |
|         if self.eof:
 | |
|             return tokEOF
 | |
| 
 | |
|         if self.pos > self.len:
 | |
|             self.pos   = 0
 | |
|             self.line += 1
 | |
|             self.fillLineText()
 | |
|             if self.eof:
 | |
|                 return tokEOF
 | |
| 
 | |
|         if self.pos == self.len:
 | |
|             return tokLN
 | |
|         else:
 | |
|             return self.text[self.pos]
 | |
| 
 | |
|     def peekNChar(self,n):
 | |
|         """try to peek the next n chars on the same line"""
 | |
|         if self.pos + n > self.len:
 | |
|             return None
 | |
|         return self.text[self.pos:self.pos+n]
 | |
| 
 | |
|     def skipChar(self):
 | |
|         """increment the token cursor position"""
 | |
|         if not self.eof:
 | |
|             self.pos += 1
 | |
| 
 | |
|     def skipNChars(self,n):
 | |
|         if self.pos + n <= self.len:
 | |
|             self.pos += n
 | |
|         else:
 | |
|             while n > 0:
 | |
|                 self.skipChar()
 | |
|                 n -= 1
 | |
| 
 | |
|     def nextChar(self):
 | |
|         """retrieve the token at the current cursor position, then skip it"""
 | |
|         result = self.peekChar()
 | |
|         self.skipChar()
 | |
|         return  result
 | |
| 
 | |
|     def getEscape(self):
 | |
|         # try to get all characters after a backslash (\)
 | |
|         result = self.nextChar()
 | |
|         if result == "0":
 | |
|             # octal number ?
 | |
|             num = self.peekNChar(3)
 | |
|             if num != None:
 | |
|                 isOctal = True
 | |
|                 for d in num:
 | |
|                     if not d in "01234567":
 | |
|                         isOctal = False
 | |
|                         break
 | |
|                 if isOctal:
 | |
|                     result += num
 | |
|                     self.skipNChars(3)
 | |
|         elif result == "x" or result == "X":
 | |
|             # hex number ?
 | |
|             num = self.peekNChar(2)
 | |
|             if num != None:
 | |
|                 isHex = True
 | |
|                 for d in num:
 | |
|                     if not d in "012345678abcdefABCDEF":
 | |
|                         isHex = False
 | |
|                         break
 | |
|                 if isHex:
 | |
|                     result += num
 | |
|                     self.skipNChars(2)
 | |
|         elif result == "u" or result == "U":
 | |
|             # unicode char ?
 | |
|             num = self.peekNChar(4)
 | |
|             if num != None:
 | |
|                 isHex = True
 | |
|                 for d in num:
 | |
|                     if not d in "012345678abcdefABCDEF":
 | |
|                         isHex = False
 | |
|                         break
 | |
|                 if isHex:
 | |
|                     result += num
 | |
|                     self.skipNChars(4)
 | |
| 
 | |
|         return result
 | |
| 
 | |
|     def nextRealToken(self,tok):
 | |
|         """return next CPP token, used internally by nextToken()"""
 | |
|         c = self.nextChar()
 | |
|         if c == tokEOF or c == tokLN:
 | |
|             return tok.set(c)
 | |
| 
 | |
|         if c == '/':
 | |
|             c = self.peekChar()
 | |
|             if c == '/':   # C++ comment line
 | |
|                 self.skipChar()
 | |
|                 while 1:
 | |
|                     c = self.nextChar()
 | |
|                     if c == tokEOF or c == tokLN:
 | |
|                         break
 | |
|                 return tok.set(tokLN)
 | |
|             if c == '*':   # C comment start
 | |
|                 self.skipChar()
 | |
|                 value = "/*"
 | |
|                 prev_c = None
 | |
|                 while 1:
 | |
|                     c = self.nextChar()
 | |
|                     if c == tokEOF:
 | |
|                         #print "## EOF after '%s'" % value
 | |
|                         return tok.set(tokEOF,value)
 | |
|                     if c == '/' and prev_c == '*':
 | |
|                         break
 | |
|                     prev_c = c
 | |
|                     value += c
 | |
| 
 | |
|                 value += "/"
 | |
|                 #print "## COMMENT: '%s'" % value
 | |
|                 return tok.set(tokSPACE,value)
 | |
|             c = '/'
 | |
| 
 | |
|         if c.isspace():
 | |
|             while 1:
 | |
|                 c2 = self.peekChar()
 | |
|                 if c2 == tokLN or not c2.isspace():
 | |
|                     break
 | |
|                 c += c2
 | |
|                 self.skipChar()
 | |
|             return tok.set(tokSPACE,c)
 | |
| 
 | |
|         if c == '\\':
 | |
|             if debugTokens:
 | |
|                 print "nextRealToken: \\ found, next token is '%s'" % repr(self.peekChar())
 | |
|             if self.peekChar() == tokLN:   # trailing \
 | |
|                 # eat the tokLN
 | |
|                 self.skipChar()
 | |
|                 # we replace a trailing \ by a tokSPACE whose value is
 | |
|                 # simply "\\". this allows us to detect them later when
 | |
|                 # needed.
 | |
|                 return tok.set(tokSPACE,"\\")
 | |
|             else:
 | |
|                 # treat as a single token here ?
 | |
|                 c +=self.getEscape()
 | |
|                 return tok.set(c)
 | |
| 
 | |
|         if c == "'":  # chars
 | |
|             c2 = self.nextChar()
 | |
|             c += c2
 | |
|             if c2 == '\\':
 | |
|                 c += self.getEscape()
 | |
| 
 | |
|             while 1:
 | |
|                 c2 = self.nextChar()
 | |
|                 if c2 == tokEOF:
 | |
|                     break
 | |
|                 c += c2
 | |
|                 if c2 == "'":
 | |
|                     break
 | |
| 
 | |
|             return tok.set(tokSTRING, c)
 | |
| 
 | |
|         if c == '"':  # strings
 | |
|             quote = 0
 | |
|             while 1:
 | |
|                 c2  = self.nextChar()
 | |
|                 if c2 == tokEOF:
 | |
|                     return tok.set(tokSTRING,c)
 | |
| 
 | |
|                 c += c2
 | |
|                 if not quote:
 | |
|                     if c2 == '"':
 | |
|                         return tok.set(tokSTRING,c)
 | |
|                     if c2 == "\\":
 | |
|                         quote = 1
 | |
|                 else:
 | |
|                     quote = 0
 | |
| 
 | |
|         if c >= "0" and c <= "9":  # integers ?
 | |
|             while 1:
 | |
|                 c2 = self.peekChar()
 | |
|                 if c2 == tokLN or (not c2.isalnum() and c2 != "_"):
 | |
|                     break
 | |
|                 c += c2
 | |
|                 self.skipChar()
 | |
|             return tok.set(tokNUMBER,c)
 | |
| 
 | |
|         if c.isalnum() or c == "_":  # identifiers ?
 | |
|             while 1:
 | |
|                 c2 = self.peekChar()
 | |
|                 if c2 == tokLN or (not c2.isalnum() and c2 != "_"):
 | |
|                     break
 | |
|                 c += c2
 | |
|                 self.skipChar()
 | |
|             if c == tokDEFINED:
 | |
|                 return tok.set(tokDEFINED)
 | |
|             else:
 | |
|                 return tok.set(tokIDENT,c)
 | |
| 
 | |
|         # check special symbols
 | |
|         for sk in cppLongSymbols:
 | |
|             if c == sk[0]:
 | |
|                 sklen = len(sk[1:])
 | |
|                 if self.pos + sklen <= self.len and \
 | |
|                    self.text[self.pos:self.pos+sklen] == sk[1:]:
 | |
|                     self.pos += sklen
 | |
|                     return tok.set(sk)
 | |
| 
 | |
|         return tok.set(c)
 | |
| 
 | |
|     def nextToken(self,tok):
 | |
|         """return the next token from the input text. this function
 | |
|            really updates 'tok', and does not return a new one"""
 | |
|         self.markPos(tok)
 | |
|         self.nextRealToken(tok)
 | |
| 
 | |
|     def getToken(self):
 | |
|         tok = Token()
 | |
|         self.nextToken(tok)
 | |
|         if debugTokens:
 | |
|             print "getTokens: %s" % repr(tok)
 | |
|         return tok
 | |
| 
 | |
|     def toTokenList(self):
 | |
|         """convert the input text of a CppTokenizer into a direct
 | |
|            list of token objects. tokEOF is stripped from the result"""
 | |
|         result = []
 | |
|         while 1:
 | |
|             tok = Token()
 | |
|             self.nextToken(tok)
 | |
|             if tok.id == tokEOF:
 | |
|                 break
 | |
|             result.append(tok)
 | |
|         return result
 | |
| 
 | |
| class CppLineTokenizer(CppTokenizer):
 | |
|     """a CppTokenizer derived class that accepts a single line of text as input"""
 | |
|     def __init__(self,line,lineno=1):
 | |
|         CppTokenizer.__init__(self)
 | |
|         self.line = lineno
 | |
|         self.setLineText(line)
 | |
| 
 | |
| 
 | |
| class CppLinesTokenizer(CppTokenizer):
 | |
|     """a CppTokenizer derived class that accepts a list of texdt lines as input.
 | |
|        the lines must not have a trailing \n"""
 | |
|     def __init__(self,lines=[],lineno=1):
 | |
|         """initialize a CppLinesTokenizer. you can later add lines using addLines()"""
 | |
|         CppTokenizer.__init__(self)
 | |
|         self.line  = lineno
 | |
|         self.lines = lines
 | |
|         self.index = 0
 | |
|         self.count = len(lines)
 | |
| 
 | |
|         if self.count > 0:
 | |
|             self.fillLineText()
 | |
|         else:
 | |
|             self.eof = True
 | |
| 
 | |
|     def addLine(self,line):
 | |
|         """add a line to a CppLinesTokenizer. this can be done after tokenization
 | |
|            happens"""
 | |
|         if self.count == 0:
 | |
|             self.setLineText(line)
 | |
|             self.index = 1
 | |
|         self.lines.append(line)
 | |
|         self.count += 1
 | |
|         self.eof    = False
 | |
| 
 | |
|     def fillLineText(self):
 | |
|         if self.index < self.count:
 | |
|             self.setLineText(self.lines[self.index])
 | |
|             self.index += 1
 | |
|         else:
 | |
|             self.eof = True
 | |
| 
 | |
| 
 | |
| class CppFileTokenizer(CppTokenizer):
 | |
|     def __init__(self,file,lineno=1):
 | |
|         CppTokenizer.__init__(self)
 | |
|         self.file = file
 | |
|         self.line = lineno
 | |
| 
 | |
|     def fillLineText(self):
 | |
|         line = self.file.readline()
 | |
|         if len(line) > 0:
 | |
|             if line[-1] == '\n':
 | |
|                 line = line[:-1]
 | |
|             if len(line) > 0 and line[-1] == "\r":
 | |
|                 line = line[:-1]
 | |
|             self.setLineText(line)
 | |
|         else:
 | |
|             self.eof = True
 | |
| 
 | |
| # Unit testing
 | |
| #
 | |
| class CppTokenizerTester:
 | |
|     """a class used to test CppTokenizer classes"""
 | |
|     def __init__(self,tokenizer=None):
 | |
|         self.tokenizer = tokenizer
 | |
|         self.token     = Token()
 | |
| 
 | |
|     def setTokenizer(self,tokenizer):
 | |
|         self.tokenizer = tokenizer
 | |
| 
 | |
|     def expect(self,id):
 | |
|         self.tokenizer.nextToken(self.token)
 | |
|         tokid = self.token.id
 | |
|         if tokid == id:
 | |
|             return
 | |
|         if self.token.value == id and (tokid == tokIDENT or tokid == tokNUMBER):
 | |
|             return
 | |
|         raise BadExpectedToken, "###  BAD TOKEN: '%s' expecting '%s'" % (self.token.id,id)
 | |
| 
 | |
|     def expectToken(self,id,line,col):
 | |
|         self.expect(id)
 | |
|         if self.token.lineno != line:
 | |
|             raise BadExpectedToken, "###  BAD LINENO: token '%s' got '%d' expecting '%d'" % (id,self.token.lineno,line)
 | |
|         if self.token.colno != col:
 | |
|             raise BadExpectedToken, "###  BAD COLNO: '%d' expecting '%d'" % (self.token.colno,col)
 | |
| 
 | |
|     def expectTokenVal(self,id,value,line,col):
 | |
|         self.expectToken(id,line,col)
 | |
|         if self.token.value != value:
 | |
|             raise BadExpectedToken, "###  BAD VALUE: '%s' expecting '%s'" % (self.token.value,value)
 | |
| 
 | |
|     def expectList(self,list):
 | |
|         for item in list:
 | |
|             self.expect(item)
 | |
| 
 | |
| def test_CppTokenizer():
 | |
|     print "running CppTokenizer tests"
 | |
|     tester = CppTokenizerTester()
 | |
| 
 | |
|     tester.setTokenizer( CppLineTokenizer("#an/example  && (01923_xy)") )
 | |
|     tester.expectList( ["#", "an", "/", "example", tokSPACE, tokLOGICAND, tokSPACE, tokLPAREN, "01923_xy", \
 | |
|                        tokRPAREN, tokLN, tokEOF] )
 | |
| 
 | |
|     tester.setTokenizer( CppLineTokenizer("FOO(BAR) && defined(BAZ)") )
 | |
|     tester.expectList( ["FOO", tokLPAREN, "BAR", tokRPAREN, tokSPACE, tokLOGICAND, tokSPACE,
 | |
|                         tokDEFINED, tokLPAREN, "BAZ", tokRPAREN, tokLN, tokEOF] )
 | |
| 
 | |
|     tester.setTokenizer( CppLinesTokenizer( ["/*", "#", "*/"] ) )
 | |
|     tester.expectList( [ tokSPACE, tokLN, tokEOF ] )
 | |
| 
 | |
|     tester.setTokenizer( CppLinesTokenizer( ["first", "second"] ) )
 | |
|     tester.expectList( [ "first", tokLN, "second", tokLN, tokEOF ] )
 | |
| 
 | |
|     tester.setTokenizer( CppLinesTokenizer( ["first second", "  third"] ) )
 | |
|     tester.expectToken( "first", 1, 0 )
 | |
|     tester.expectToken( tokSPACE, 1, 5 )
 | |
|     tester.expectToken( "second", 1, 6 )
 | |
|     tester.expectToken( tokLN, 1, 12 )
 | |
|     tester.expectToken( tokSPACE, 2, 0 )
 | |
|     tester.expectToken( "third", 2, 2 )
 | |
| 
 | |
|     tester.setTokenizer( CppLinesTokenizer( [ "boo /* what the", "hell */" ] ) )
 | |
|     tester.expectList( [ "boo", tokSPACE ] )
 | |
|     tester.expectTokenVal( tokSPACE, "/* what the\nhell */", 1, 4 )
 | |
|     tester.expectList( [ tokLN, tokEOF ] )
 | |
| 
 | |
|     tester.setTokenizer( CppLinesTokenizer( [ "an \\", " example" ] ) )
 | |
|     tester.expectToken( "an", 1, 0 )
 | |
|     tester.expectToken( tokSPACE, 1, 2 )
 | |
|     tester.expectTokenVal( tokSPACE, "\\", 1, 3 )
 | |
|     tester.expectToken( tokSPACE, 2, 0 )
 | |
|     tester.expectToken( "example", 2, 1 )
 | |
|     tester.expectToken( tokLN, 2, 8 )
 | |
| 
 | |
|     return True
 | |
| 
 | |
| 
 | |
| #####################################################################################
 | |
| #####################################################################################
 | |
| #####                                                                           #####
 | |
| #####           C P P   E X P R E S S I O N S                                   #####
 | |
| #####                                                                           #####
 | |
| #####################################################################################
 | |
| #####################################################################################
 | |
| 
 | |
| # Cpp expressions are modeled by tuples of the form (op,arg) or (op,arg1,arg2), etc..
 | |
| # op is an "operator" string
 | |
| 
 | |
| class Expr:
 | |
|     """a class used to model a CPP expression"""
 | |
|     opInteger   = "int"
 | |
|     opIdent     = "ident"
 | |
|     opCall      = "call"
 | |
|     opDefined   = "defined"
 | |
|     opTest      = "?"
 | |
|     opLogicNot  = "!"
 | |
|     opNot       = "~"
 | |
|     opNeg       = "[-]"
 | |
|     opUnaryPlus = "[+]"
 | |
|     opAdd = "+"
 | |
|     opSub = "-"
 | |
|     opMul = "*"
 | |
|     opDiv = "/"
 | |
|     opMod = "%"
 | |
|     opAnd = "&"
 | |
|     opOr  = "|"
 | |
|     opXor = "^"
 | |
|     opLogicAnd = "&&"
 | |
|     opLogicOr  = "||"
 | |
|     opEqual = "=="
 | |
|     opNotEqual = "!="
 | |
|     opLess = "<"
 | |
|     opLessEq = "<="
 | |
|     opGreater = ">"
 | |
|     opGreaterEq = ">="
 | |
|     opShl = "<<"
 | |
|     opShr = ">>"
 | |
| 
 | |
|     unaries  = [ opLogicNot, opNot, opNeg, opUnaryPlus ]
 | |
|     binaries = [ opAdd, opSub, opMul, opDiv, opMod, opAnd, opOr, opXor, opLogicAnd, opLogicOr,
 | |
|                  opEqual, opNotEqual, opLess, opLessEq, opGreater, opGreaterEq ]
 | |
| 
 | |
|     precedences = {
 | |
|                     opTest: 0,
 | |
|                     opLogicOr:  1,
 | |
|                     opLogicNot: 2,
 | |
|                     opOr : 3,
 | |
|                     opXor: 4,
 | |
|                     opAnd: 5,
 | |
|                     opEqual: 6, opNotEqual: 6,
 | |
|                     opLess:7, opLessEq:7, opGreater:7, opGreaterEq:7,
 | |
|                     opShl:8, opShr:8,
 | |
|                     opAdd:9, opSub:9,
 | |
|                     opMul:10, opDiv:10, opMod:10,
 | |
|                     opLogicNot:11,
 | |
|                     opNot: 12,
 | |
|                     }
 | |
| 
 | |
|     def __init__(self,op):
 | |
|         self.op = op
 | |
| 
 | |
|     def __repr__(self):
 | |
|         return "(%s)" % self.op
 | |
| 
 | |
|     def __str__(self):
 | |
|         return "operator(%s)" % self.op
 | |
| 
 | |
|     def precedence(self):
 | |
|         """return the precedence of a given operator"""
 | |
|         return Expr.precedences.get(self.op, 1000)
 | |
| 
 | |
|     def isUnary(self):
 | |
|         return self.op in Expr.unaries
 | |
| 
 | |
|     def isBinary(self):
 | |
|         return self.op in Expr.binaries
 | |
| 
 | |
|     def isDefined(self):
 | |
|         return self.op is opDefined
 | |
| 
 | |
|     def toInt(self):
 | |
|         """return the integer value of a given expression. only valid for integer expressions
 | |
|            will return None otherwise"""
 | |
|         return None
 | |
| 
 | |
| class IntExpr(Expr):
 | |
|     def __init__(self,value):
 | |
|         Expr.__init__(self,opInteger)
 | |
|         self.arg   = value
 | |
| 
 | |
|     def __repr__(self):
 | |
|         return "(int %s)" % self.arg
 | |
| 
 | |
|     def __str__(self):
 | |
|         return self.arg
 | |
| 
 | |
|     def toInt(self):
 | |
|         s = self.arg  # string value
 | |
|         # get rid of U or L suffixes
 | |
|         while len(s) > 0 and s[-1] in "LUlu":
 | |
|             s = s[:-1]
 | |
|         return string.atoi(s)
 | |
| 
 | |
| class IdentExpr(Expr):
 | |
|     def __init__(self,name):
 | |
|         Expr.__init__(self,opIdent)
 | |
|         self.name = name
 | |
| 
 | |
|     def __repr__(self):
 | |
|         return "(ident %s)" % self.name
 | |
| 
 | |
|     def __str__(self):
 | |
|         return self.name
 | |
| 
 | |
| class CallExpr(Expr):
 | |
|     def __init__(self,funcname,params):
 | |
|         Expr.__init__(self,opCall)
 | |
|         self.funcname = funcname
 | |
|         self.params   = params
 | |
| 
 | |
|     def __repr__(self):
 | |
|         result = "(call %s [" % self.funcname
 | |
|         comma  = ""
 | |
|         for param in self.params:
 | |
|             result += "%s%s" % (comma, repr(param))
 | |
|             comma   = ","
 | |
|         result += "])"
 | |
|         return result
 | |
| 
 | |
|     def __str__(self):
 | |
|         result = "%s(" % self.funcname
 | |
|         comma = ""
 | |
|         for param in self.params:
 | |
|             result += "%s%s" % (comma, str(param))
 | |
|             comma = ","
 | |
| 
 | |
|         result += ")"
 | |
|         return result
 | |
| 
 | |
| class TestExpr(Expr):
 | |
|     def __init__(self,cond,iftrue,iffalse):
 | |
|         Expr.__init__(self,opTest)
 | |
|         self.cond    = cond
 | |
|         self.iftrue  = iftrue
 | |
|         self.iffalse = iffalse
 | |
| 
 | |
|     def __repr__(self):
 | |
|         return "(?: %s %s %s)" % (repr(self.cond),repr(self.iftrue),repr(self.iffalse))
 | |
| 
 | |
|     def __str__(self):
 | |
|         return "(%s) ? (%s) : (%s)" % (self.cond, self.iftrue, self.iffalse)
 | |
| 
 | |
| class SingleArgExpr(Expr):
 | |
|     def __init__(self,op,arg):
 | |
|         Expr.__init__(self,op)
 | |
|         self.arg = arg
 | |
| 
 | |
|     def __repr__(self):
 | |
|         return "(%s %s)" % (self.op, repr(self.arg))
 | |
| 
 | |
| class DefinedExpr(SingleArgExpr):
 | |
|     def __init__(self,op,macroname):
 | |
|         SingleArgExpr.__init__(self.opDefined,macroname)
 | |
| 
 | |
|     def __str__(self):
 | |
|         return "defined(%s)" % self.arg
 | |
| 
 | |
| 
 | |
| class UnaryExpr(SingleArgExpr):
 | |
|     def __init__(self,op,arg,opstr=None):
 | |
|         SingleArgExpr.__init__(self,op,arg)
 | |
|         if not opstr:
 | |
|             opstr = op
 | |
|         self.opstr = opstr
 | |
| 
 | |
|     def __str__(self):
 | |
|         arg_s     = str(self.arg)
 | |
|         arg_prec  = self.arg.precedence()
 | |
|         self_prec = self.precedence()
 | |
|         if arg_prec < self_prec:
 | |
|             return "%s(%s)" % (self.opstr,arg_s)
 | |
|         else:
 | |
|             return "%s%s" % (self.opstr, arg_s)
 | |
| 
 | |
| class TwoArgExpr(Expr):
 | |
|     def __init__(self,op,arg1,arg2):
 | |
|         Expr.__init__(self,op)
 | |
|         self.arg1 = arg1
 | |
|         self.arg2 = arg2
 | |
| 
 | |
|     def __repr__(self):
 | |
|         return "(%s %s %s)" % (self.op, repr(self.arg1), repr(self.arg2))
 | |
| 
 | |
| class BinaryExpr(TwoArgExpr):
 | |
|     def __init__(self,op,arg1,arg2,opstr=None):
 | |
|         TwoArgExpr.__init__(self,op,arg1,arg2)
 | |
|         if not opstr:
 | |
|             opstr = op
 | |
|         self.opstr = opstr
 | |
| 
 | |
|     def __str__(self):
 | |
|         arg1_s    = str(self.arg1)
 | |
|         arg2_s    = str(self.arg2)
 | |
|         arg1_prec = self.arg1.precedence()
 | |
|         arg2_prec = self.arg2.precedence()
 | |
|         self_prec = self.precedence()
 | |
| 
 | |
|         result = ""
 | |
|         if arg1_prec < self_prec:
 | |
|             result += "(%s)" % arg1_s
 | |
|         else:
 | |
|             result += arg1_s
 | |
| 
 | |
|         result += " %s " % self.opstr
 | |
| 
 | |
|         if arg2_prec < self_prec:
 | |
|             result += "(%s)" % arg2_s
 | |
|         else:
 | |
|             result += arg2_s
 | |
| 
 | |
|         return result
 | |
| 
 | |
| #####################################################################################
 | |
| #####################################################################################
 | |
| #####                                                                           #####
 | |
| #####           C P P   E X P R E S S I O N   P A R S E R                       #####
 | |
| #####                                                                           #####
 | |
| #####################################################################################
 | |
| #####################################################################################
 | |
| 
 | |
| 
 | |
| class ExprParser:
 | |
|     """a class used to convert a list of tokens into a cpp Expr object"""
 | |
| 
 | |
|     re_octal   = re.compile(r"\s*\(0[0-7]+\).*")
 | |
|     re_decimal = re.compile(r"\s*\(\d+[ulUL]*\).*")
 | |
|     re_hexadecimal = re.compile(r"\s*\(0[xX][0-9a-fA-F]*\).*")
 | |
| 
 | |
|     def __init__(self,tokens):
 | |
|         self.tok = tokens
 | |
|         self.n   = len(self.tok)
 | |
|         self.i   = 0
 | |
| 
 | |
|     def mark(self):
 | |
|         return self.i
 | |
| 
 | |
|     def release(self,pos):
 | |
|         self.i = pos
 | |
| 
 | |
|     def peekId(self):
 | |
|         if self.i < self.n:
 | |
|             return self.tok[self.i].id
 | |
|         return None
 | |
| 
 | |
|     def peek(self):
 | |
|         if self.i < self.n:
 | |
|             return self.tok[self.i]
 | |
|         return None
 | |
| 
 | |
|     def skip(self):
 | |
|         if self.i < self.n:
 | |
|             self.i += 1
 | |
| 
 | |
|     def skipOptional(self,id):
 | |
|         if self.i < self.n and self.tok[self.i].id == id:
 | |
|             self.i += 1
 | |
| 
 | |
|     def skipSpaces(self):
 | |
|         i   = self.i
 | |
|         n   = self.n
 | |
|         tok = self.tok
 | |
|         while i < n and (tok[i] == tokSPACE or tok[i] == tokLN):
 | |
|             i += 1
 | |
|         self.i = i
 | |
| 
 | |
|     # all the isXXX functions returns a (expr,nextpos) pair if a match is found
 | |
|     # or None if not
 | |
| 
 | |
|     def is_integer(self):
 | |
|         id = self.tok[self.i].id
 | |
|         c  = id[0]
 | |
|         if c < '0' or c > '9':
 | |
|             return None
 | |
| 
 | |
|         m = ExprParser.re_octal.match(id)
 | |
|         if m:
 | |
|             return (IntExpr(id), m.end(1))
 | |
| 
 | |
|         m = ExprParser.re_decimal.match(id)
 | |
|         if m:
 | |
|             return (IntExpr(id), m.end(1))
 | |
| 
 | |
|         m = ExprParser.re_hexadecimal(id)
 | |
|         if m:
 | |
|             return (IntExpr(id), m.end(1))
 | |
| 
 | |
|         return None
 | |
| 
 | |
|     def is_defined(self):
 | |
|         id = self.tok[self.i].id
 | |
|         if id != "defined":
 | |
|             return None
 | |
| 
 | |
|         pos = self.mark()
 | |
| 
 | |
|         use_paren = 0
 | |
|         if self.peekId() == tokLPAREN:
 | |
|             self.skip()
 | |
|             use_paren = 1
 | |
| 
 | |
|         if self.peekId() != tokIDENT:
 | |
|             self.throw( BadExpectedToken, "identifier expected")
 | |
| 
 | |
|         macroname = self.peek().value
 | |
|         self.skip()
 | |
|         if use_paren:
 | |
|             self.skipSpaces()
 | |
|             if self.peekId() != tokRPAREN:
 | |
|                 self.throw( BadExpectedToken, "missing right-paren after 'defined' directive")
 | |
|             self.skip()
 | |
| 
 | |
|         i = self.i
 | |
|         return (DefinedExpr(macroname),i+1)
 | |
| 
 | |
|     def is_call_or_ident(self):
 | |
|         pass
 | |
| 
 | |
|     def parse(self, i):
 | |
|         return None
 | |
| 
 | |
| #####################################################################################
 | |
| #####################################################################################
 | |
| #####                                                                           #####
 | |
| #####           C P P   E X P R E S S I O N S                                   #####
 | |
| #####                                                                           #####
 | |
| #####################################################################################
 | |
| #####################################################################################
 | |
| 
 | |
| class CppInvalidExpression(Exception):
 | |
|     """an exception raised when an invalid/unsupported cpp expression is detected"""
 | |
|     pass
 | |
| 
 | |
| class CppExpr:
 | |
|     """a class that models the condition of #if directives into
 | |
|         an expression tree. each node in the tree is of the form (op,arg) or (op,arg1,arg2)
 | |
|         where "op" is a string describing the operation"""
 | |
| 
 | |
|     unaries  = [ "!", "~" ]
 | |
|     binaries = [ "+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%", "&", "|", "^", "<<", ">>", "==", "!=" ]
 | |
|     precedences = { "||": 1,
 | |
|                     "&&": 2,
 | |
|                      "|": 3,
 | |
|                      "^": 4,
 | |
|                      "&": 5,
 | |
|                      "==":6, "!=":6,
 | |
|                      "<":7, "<=":7, ">":7, ">=":7,
 | |
|                      "<<":8, ">>":8,
 | |
|                      "+":9, "-":9,
 | |
|                      "*":10, "/":10, "%":10,
 | |
|                      "!":11, "~":12
 | |
|                      }
 | |
| 
 | |
|     def __init__(self, tokens):
 | |
|         """initialize a CppExpr. 'tokens' must be a CppToken list"""
 | |
|         self.tok  = tokens
 | |
|         self.n    = len(tokens)
 | |
|         if debugCppExpr:
 | |
|             print "CppExpr: trying to parse %s" % repr(tokens)
 | |
|         expr      = self.is_expr(0)
 | |
|         if debugCppExpr:
 | |
|             print "CppExpr: got " + repr(expr)
 | |
|         self.expr = expr[0]
 | |
| 
 | |
|     re_cpp_constant = re.compile(r"((\d|\w|_)+)")
 | |
| 
 | |
|     def throw(self,exception,i,msg):
 | |
|         if i < self.n:
 | |
|             tok = self.tok[i]
 | |
|             print "%d:%d: %s" % (tok.lineno,tok.colno,msg)
 | |
|         else:
 | |
|             print "EOF: %s" % msg
 | |
|         raise exception
 | |
| 
 | |
|     def skip_spaces(self,i):
 | |
|         """skip spaces in input token list"""
 | |
|         while i < self.n:
 | |
|             t = self.tok[i]
 | |
|             if t.id != tokSPACE and t.id != tokLN:
 | |
|                 break
 | |
|             i += 1
 | |
|         return i
 | |
| 
 | |
|     def expectId(self,i,id):
 | |
|         """check that a given token id is at the current position, then skip over it"""
 | |
|         i = self.skip_spaces(i)
 | |
|         if i >= self.n or self.tok[i].id != id:
 | |
|             self.throw(BadExpectedToken,i,"### expecting '%s' in expression, got '%s'" % (id, self.tok[i].id))
 | |
|         return i+1
 | |
| 
 | |
|     def expectIdent(self,i):
 | |
|         i = self.skip_spaces(i)
 | |
|         if i >= self.n or self.tok[i].id != tokIDENT:
 | |
|             self.throw(BadExpectedToken,i,"### expecting identifier in expression, got '%s'" % (id, self.tok[i].id))
 | |
|         return i+1
 | |
| 
 | |
|     # the is_xxxxx function returns either None or a pair (e,nextpos)
 | |
|     # where 'e' is an expression tuple (e.g. (op,arg)) and 'nextpos' is
 | |
|     # the corresponding next position in the input token list
 | |
|     #
 | |
| 
 | |
|     def is_decimal(self,i):
 | |
|         v = self.tok[i].value[:]
 | |
|         while len(v) > 0 and v[-1] in "ULul":
 | |
|             v = v[:-1]
 | |
|         for digit in v:
 | |
|             if not digit.isdigit():
 | |
|                 return None
 | |
| 
 | |
|         # for an integer expression tuple, the argument
 | |
|         # is simply the value as an integer
 | |
|         val = string.atoi(v)
 | |
|         return ("int", val), i+1
 | |
| 
 | |
|     def is_hexadecimal(self,i):
 | |
|         v = self.tok[i].value[:]
 | |
|         while len(v) > 0 and v[-1] in "ULul":
 | |
|             v = v[:-1]
 | |
|         if len(v) > 2 and (v[0:2] == "0x" or v[0:2] == "0X"):
 | |
|             for digit in v[2:]:
 | |
|                 if not digit in "0123456789abcdefABCDEF":
 | |
|                     return None
 | |
| 
 | |
|             # for an hex expression tuple, the argument
 | |
|             # is the value as an integer
 | |
|             val = int(v[2:], 16)
 | |
|             return ("hex", val), i+1
 | |
| 
 | |
|         return None
 | |
| 
 | |
|     def is_integer(self,i):
 | |
|         if self.tok[i].id != tokNUMBER:
 | |
|             return None
 | |
| 
 | |
|         c = self.is_decimal(i)
 | |
|         if c: return c
 | |
| 
 | |
|         c = self.is_hexadecimal(i)
 | |
|         if c: return c
 | |
| 
 | |
|         return None
 | |
| 
 | |
|     def is_number(self,i):
 | |
|         t = self.tok[i]
 | |
|         if t.id == tokMINUS and i+1 < self.n:
 | |
|             c = self.is_integer(i+1)
 | |
|             if c:
 | |
|                 e, i2 = c
 | |
|                 op, val  = e
 | |
|                 return (op, -val), i2
 | |
|         if t.id == tokPLUS and i+1 < self.n:
 | |
|             c = self.is_integer(i+1)
 | |
|             if c: return c
 | |
| 
 | |
|         return self.is_integer(i)
 | |
| 
 | |
| 
 | |
|     def is_alnum(self,i):
 | |
|         """test wether a given token is alpha-numeric"""
 | |
|         i = self.skip_spaces(i)
 | |
|         if i >= self.n:
 | |
|             return None
 | |
|         t = self.tok[i]
 | |
|         m = CppExpr.re_cpp_constant.match(t.id)
 | |
|         if m:
 | |
|             #print "... alnum '%s'" % m.group(1)
 | |
|             r = m.group(1)
 | |
|             return ("ident", r), i+1
 | |
|         return None
 | |
| 
 | |
|     def is_defined(self,i):
 | |
|         t = self.tok[i]
 | |
|         if t.id != tokDEFINED:
 | |
|             return None
 | |
| 
 | |
|         # we have the defined keyword, check the rest
 | |
|         i = self.skip_spaces(i+1)
 | |
|         use_parens = 0
 | |
|         if i < self.n and self.tok[i].id == tokLPAREN:
 | |
|             use_parens = 1
 | |
|             i = self.skip_spaces(i+1)
 | |
| 
 | |
|         if i >= self.n:
 | |
|             self.throw(CppConstantExpected,i,"### 'defined' must be followed  by macro name or left paren")
 | |
| 
 | |
|         t = self.tok[i]
 | |
|         if t.id != tokIDENT:
 | |
|             self.throw(CppConstantExpected,i,"### 'defined' must be followed by macro name")
 | |
| 
 | |
|         i += 1
 | |
|         if use_parens:
 | |
|             i = self.expectId(i,tokRPAREN)
 | |
| 
 | |
|         return ("defined",t.value), i
 | |
| 
 | |
| 
 | |
|     def is_call_or_ident(self,i):
 | |
|         i = self.skip_spaces(i)
 | |
|         if i >= self.n:
 | |
|             return None
 | |
| 
 | |
|         t = self.tok[i]
 | |
|         if t.id != tokIDENT:
 | |
|             return None
 | |
| 
 | |
|         name = t.value
 | |
| 
 | |
|         i = self.skip_spaces(i+1)
 | |
|         if i >= self.n or self.tok[i].id != tokLPAREN:
 | |
|             return ("ident", name), i
 | |
| 
 | |
|         params    = []
 | |
|         depth     = 1
 | |
|         i += 1
 | |
|         j  = i
 | |
|         while i < self.n:
 | |
|             id = self.tok[i].id
 | |
|             if id == tokLPAREN:
 | |
|                 depth += 1
 | |
|             elif depth == 1 and (id == tokCOMMA or id == tokRPAREN):
 | |
|                 while j < i and self.tok[j].id == tokSPACE:
 | |
|                     j += 1
 | |
|                 k = i
 | |
|                 while k > j and self.tok[k-1].id == tokSPACE:
 | |
|                     k -= 1
 | |
|                 param = self.tok[j:k]
 | |
|                 params.append( param )
 | |
|                 if id == tokRPAREN:
 | |
|                     break
 | |
|                 j = i+1
 | |
|             elif id == tokRPAREN:
 | |
|                 depth -= 1
 | |
|             i += 1
 | |
| 
 | |
|         if i >= self.n:
 | |
|             return None
 | |
| 
 | |
|         return ("call", (name, params)), i+1
 | |
| 
 | |
|     def is_token(self,i,token):
 | |
|         i = self.skip_spaces(i)
 | |
|         if i >= self.n or self.tok[i].id != token:
 | |
|             return None
 | |
|         return token, i+1
 | |
| 
 | |
| 
 | |
|     def is_value(self,i):
 | |
|         t = self.tok[i]
 | |
|         if t.id == tokSTRING:
 | |
|             return ("string", t.value), i+1
 | |
| 
 | |
|         c = self.is_number(i)
 | |
|         if c: return c
 | |
| 
 | |
|         c = self.is_defined(i)
 | |
|         if c: return c
 | |
| 
 | |
|         c = self.is_call_or_ident(i)
 | |
|         if c: return c
 | |
| 
 | |
|         i = self.skip_spaces(i)
 | |
|         if i >= self.n or self.tok[i].id != tokLPAREN:
 | |
|             return None
 | |
| 
 | |
|         popcount = 1
 | |
|         i2       = i+1
 | |
|         while i2 < self.n:
 | |
|             t = self.tok[i2]
 | |
|             if t.id == tokLPAREN:
 | |
|                 popcount += 1
 | |
|             elif t.id == tokRPAREN:
 | |
|                 popcount -= 1
 | |
|                 if popcount == 0:
 | |
|                     break
 | |
|             i2 += 1
 | |
| 
 | |
|         if popcount != 0:
 | |
|             self.throw(CppInvalidExpression, i, "expression missing closing parenthesis")
 | |
| 
 | |
|         if debugCppExpr:
 | |
|             print "CppExpr: trying to parse sub-expression %s" % repr(self.tok[i+1:i2])
 | |
|         oldcount   = self.n
 | |
|         self.n     = i2
 | |
|         c          = self.is_expr(i+1)
 | |
|         self.n     = oldcount
 | |
|         if not c:
 | |
|             self.throw(CppInvalidExpression, i, "invalid expression within parenthesis")
 | |
| 
 | |
|         e, i = c
 | |
|         return e, i2+1
 | |
| 
 | |
|     def is_unary(self,i):
 | |
|         i = self.skip_spaces(i)
 | |
|         if i >= self.n:
 | |
|             return None
 | |
| 
 | |
|         t = self.tok[i]
 | |
|         if t.id in CppExpr.unaries:
 | |
|             c = self.is_unary(i+1)
 | |
|             if not c:
 | |
|                 self.throw(CppInvalidExpression, i, "%s operator must be followed by value" % t.id)
 | |
|             e, i = c
 | |
|             return (t.id, e), i
 | |
| 
 | |
|         return self.is_value(i)
 | |
| 
 | |
|     def is_binary(self,i):
 | |
|         i = self.skip_spaces(i)
 | |
|         if i >= self.n:
 | |
|             return None
 | |
| 
 | |
|         c = self.is_unary(i)
 | |
|         if not c:
 | |
|             return None
 | |
| 
 | |
|         e1, i2 = c
 | |
|         i2 = self.skip_spaces(i2)
 | |
|         if i2 >= self.n:
 | |
|             return c
 | |
| 
 | |
|         t = self.tok[i2]
 | |
|         if t.id in CppExpr.binaries:
 | |
|             c = self.is_binary(i2+1)
 | |
|             if not c:
 | |
|                 self.throw(CppInvalidExpression, i,"### %s operator must be followed by value" % t.id )
 | |
|             e2, i3 = c
 | |
|             return (t.id, e1, e2), i3
 | |
| 
 | |
|         return None
 | |
| 
 | |
|     def is_expr(self,i):
 | |
|         return self.is_binary(i)
 | |
| 
 | |
|     def dump_node(self,e):
 | |
|         op = e[0]
 | |
|         line = "(" + op
 | |
|         if op == "int":
 | |
|             line += " %d)" % e[1]
 | |
|         elif op == "hex":
 | |
|             line += " 0x%x)" % e[1]
 | |
|         elif op == "ident":
 | |
|             line += " %s)" % e[1]
 | |
|         elif op == "defined":
 | |
|             line += " %s)" % e[1]
 | |
|         elif op == "call":
 | |
|             arg = e[1]
 | |
|             line += " %s [" % arg[0]
 | |
|             prefix = ""
 | |
|             for param in arg[1]:
 | |
|                 par = ""
 | |
|                 for tok in param:
 | |
|                     par += str(tok)
 | |
|                 line += "%s%s" % (prefix, par)
 | |
|                 prefix = ","
 | |
|             line += "])"
 | |
|         elif op in CppExpr.unaries:
 | |
|             line += " %s)" % self.dump_node(e[1])
 | |
|         elif op in CppExpr.binaries:
 | |
|             line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2]))
 | |
|         else:
 | |
|             line += " ?%s)" % repr(e[1])
 | |
| 
 | |
|         return line
 | |
| 
 | |
|     def __repr__(self):
 | |
|         return self.dump_node(self.expr)
 | |
| 
 | |
|     def source_node(self,e):
 | |
|         op = e[0]
 | |
|         if op == "int":
 | |
|             return "%d" % e[1]
 | |
|         if op == "hex":
 | |
|             return "0x%x" % e[1]
 | |
|         if op == "ident":
 | |
|             # XXX: should try to expand
 | |
|             return e[1]
 | |
|         if op == "defined":
 | |
|             return "defined(%s)" % e[1]
 | |
| 
 | |
|         prec = CppExpr.precedences.get(op,1000)
 | |
|         arg  = e[1]
 | |
|         if op in CppExpr.unaries:
 | |
|             arg_src = self.source_node(arg)
 | |
|             arg_op  = arg[0]
 | |
|             arg_prec = CppExpr.precedences.get(arg[0],1000)
 | |
|             if arg_prec < prec:
 | |
|                 return "!(" + arg_src + ")"
 | |
|             else:
 | |
|                 return "!" + arg_src
 | |
|         if op in CppExpr.binaries:
 | |
|             arg2     = e[2]
 | |
|             arg1_op  = arg[0]
 | |
|             arg2_op  = arg2[0]
 | |
|             arg1_src = self.source_node(arg)
 | |
|             arg2_src = self.source_node(arg2)
 | |
|             if CppExpr.precedences.get(arg1_op,1000) < prec:
 | |
|                 arg1_src = "(%s)" % arg1_src
 | |
|             if CppExpr.precedences.get(arg2_op,1000) < prec:
 | |
|                 arg2_src = "(%s)" % arg2_src
 | |
| 
 | |
|             return "%s %s %s" % (arg1_src, op, arg2_src)
 | |
|         return "???"
 | |
| 
 | |
|     def __str__(self):
 | |
|         return self.source_node(self.expr)
 | |
| 
 | |
|     def int_node(self,e):
 | |
|         if e[0] == "int":
 | |
|             return e[1]
 | |
|         elif e[1] == "hex":
 | |
|             return int(e[1],16)
 | |
|         else:
 | |
|             return None
 | |
| 
 | |
|     def toInt(self):
 | |
|         return self.int_node(self.expr)
 | |
| 
 | |
|     def optimize_node(self,e,macros={}):
 | |
|         op = e[0]
 | |
|         if op == "defined":
 | |
|             name = e[1]
 | |
|             if macros.has_key(name):
 | |
|                 if macros[name] == kCppUndefinedMacro:
 | |
|                     return ("int", 0)
 | |
|                 else:
 | |
|                     return ("int", 1)
 | |
| 
 | |
|             if kernel_remove_config_macros and name.startswith("CONFIG_"):
 | |
|                 return ("int", 0)
 | |
| 
 | |
|         elif op == "!":
 | |
|             op, v = e
 | |
|             v = self.optimize_node(v, macros)
 | |
|             if v[0] == "int":
 | |
|                 if v[1] == 0:
 | |
|                     return ("int", 1)
 | |
|                 else:
 | |
|                     return ("int", 0)
 | |
| 
 | |
|         elif op == "&&":
 | |
|             op, l, r = e
 | |
|             l  = self.optimize_node(l, macros)
 | |
|             r  = self.optimize_node(r, macros)
 | |
|             li = self.int_node(l)
 | |
|             ri = self.int_node(r)
 | |
|             if li != None:
 | |
|                 if li == 0:
 | |
|                     return ("int", 0)
 | |
|                 else:
 | |
|                     return r
 | |
| 
 | |
|         elif op == "||":
 | |
|             op, l, r = e
 | |
|             l  = self.optimize_node(l, macros)
 | |
|             r  = self.optimize_node(r, macros)
 | |
|             li = self.int_node(l)
 | |
|             ri = self.int_node(r)
 | |
|             if li != None:
 | |
|                 if li == 0:
 | |
|                     return r
 | |
|                 else:
 | |
|                     return ("int", 1)
 | |
|             elif ri != None:
 | |
|                 if ri == 0:
 | |
|                     return l
 | |
|                 else:
 | |
|                     return ("int", 1)
 | |
|         return e
 | |
| 
 | |
|     def optimize(self,macros={}):
 | |
|         self.expr = self.optimize_node(self.expr,macros)
 | |
| 
 | |
|     def removePrefixedNode(self,e,prefix,names):
 | |
|         op = e[0]
 | |
|         if op == "defined":
 | |
|             name = e[1]
 | |
|             if name.startswith(prefix):
 | |
|                 if names.has_key[name] and names[name] == "y":
 | |
|                     return ("int", 1)
 | |
|                 else:
 | |
|                     return ("int", 0)
 | |
| 
 | |
|         elif op in CppExpr.unaries:
 | |
|             op, v = e
 | |
|             v = self.removePrefixedNode(v,prefix,names)
 | |
|             return (op, v)
 | |
|         elif op in CppExpr.binaries:
 | |
|             op, v1, v2 = e
 | |
|             v1 = self.removePrefixedNode(v1,prefix,names)
 | |
|             v2 = self.removePrefixedNode(v2,prefix,names)
 | |
|             return (op, v1, v2)
 | |
|         elif op == "call":
 | |
|             func, params = e[1]
 | |
|             params2 = []
 | |
|             for param in params:
 | |
|                 params2.append( self.removePrefixedNode(param,prefix,names) )
 | |
|             return (op, (func, params2))
 | |
| 
 | |
|         return e
 | |
| 
 | |
|     def removePrefixed(self,prefix,names={}):
 | |
|         self.expr = self.removePrefixedNode(self.expr,prefix,names)
 | |
| 
 | |
|     def is_equal_node(self,e1,e2):
 | |
|         if e1[0] != e2[0] or len(e1) != len(e2):
 | |
|             return False
 | |
| 
 | |
|         op = e1[0]
 | |
|         if op == "int" or op == "hex" or op == "!" or op == "defined":
 | |
|             return e1[0] == e2[0]
 | |
| 
 | |
|         return self.is_equal_node(e1[1],e2[1]) and self.is_equal_node(e1[2],e2[2])
 | |
| 
 | |
|     def is_equal(self,other):
 | |
|         return self.is_equal_node(self.expr,other.expr)
 | |
| 
 | |
| def test_cpp_expr(expr, expected):
 | |
|     e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
 | |
|     #print repr(e.expr)
 | |
|     s1 = repr(e)
 | |
|     if s1 != expected:
 | |
|         print "KO: expression '%s' generates '%s', should be '%s'" % (expr, s1, expected)
 | |
|     else:
 | |
|         #print "OK: expression '%s'" % expr
 | |
|         pass
 | |
| 
 | |
| def test_cpp_expr_optim(expr, expected, macros={}):
 | |
|     e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
 | |
|     e.optimize(macros)
 | |
| 
 | |
|     s1 = repr(e)
 | |
|     if s1 != expected:
 | |
|         print "KO: optimized expression '%s' generates '%s', should be '%s'" % (expr, s1, expected)
 | |
|     else:
 | |
|         #print "OK: optmized expression '%s'" % expr
 | |
|         pass
 | |
| 
 | |
| def test_cpp_expr_source(expr, expected):
 | |
|     e = CppExpr( CppLineTokenizer( expr ).toTokenList() )
 | |
|     s1 = str(e)
 | |
|     if s1 != expected:
 | |
|         print "KO: source expression '%s' generates '%s', should be '%s'" % (expr, s1, expected)
 | |
|     else:
 | |
|         #print "OK: source expression '%s'" % expr
 | |
|         pass
 | |
| 
 | |
| def test_CppExpr():
 | |
|     print "testing CppExpr"
 | |
|     test_cpp_expr( "0", "(int 0)" )
 | |
|     test_cpp_expr( "1", "(int 1)" )
 | |
|     test_cpp_expr( "1 && 1", "(&& (int 1) (int 1))" )
 | |
|     test_cpp_expr( "1 && 0", "(&& (int 1) (int 0))" )
 | |
|     test_cpp_expr( "EXAMPLE", "(ident EXAMPLE)" )
 | |
|     test_cpp_expr( "EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))" )
 | |
|     test_cpp_expr( "defined(EXAMPLE)", "(defined EXAMPLE)" )
 | |
|     test_cpp_expr( "!defined(EXAMPLE)", "(! (defined EXAMPLE))" )
 | |
|     test_cpp_expr( "defined(ABC) || defined(BINGO)", "(|| (defined ABC) (defined BINGO))" )
 | |
|     test_cpp_expr( "FOO(BAR)", "(call FOO [BAR])" )
 | |
| 
 | |
|     test_cpp_expr_optim( "0", "(int 0)" )
 | |
|     test_cpp_expr_optim( "1", "(int 1)" )
 | |
|     test_cpp_expr_optim( "1 && 1", "(int 1)" )
 | |
|     test_cpp_expr_optim( "1 && 0", "(int 0)" )
 | |
|     test_cpp_expr_optim( "0 && 1", "(int 0)" )
 | |
|     test_cpp_expr_optim( "0 && 0", "(int 0)" )
 | |
|     test_cpp_expr_optim( "1 || 1", "(int 1)" )
 | |
|     test_cpp_expr_optim( "1 || 0", "(int 1)" )
 | |
|     test_cpp_expr_optim( "0 || 1", "(int 1)" )
 | |
|     test_cpp_expr_optim( "0 || 0", "(int 0)" )
 | |
|     test_cpp_expr_optim( "EXAMPLE", "(ident EXAMPLE)" )
 | |
|     test_cpp_expr_optim( "EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))" )
 | |
|     test_cpp_expr_optim( "defined(EXAMPLE)", "(defined EXAMPLE)" )
 | |
|     test_cpp_expr_optim( "defined(EXAMPLE)", "(int 1)", { "EXAMPLE": "XOWOE" } )
 | |
|     test_cpp_expr_optim( "defined(EXAMPLE)", "(int 0)", { "EXAMPLE": kCppUndefinedMacro} )
 | |
|     test_cpp_expr_optim( "!defined(EXAMPLE)", "(! (defined EXAMPLE))" )
 | |
|     test_cpp_expr_optim( "!defined(EXAMPLE)", "(int 0)", { "EXAMPLE" : "XOWOE" } )
 | |
|     test_cpp_expr_optim( "!defined(EXAMPLE)", "(int 1)", { "EXAMPLE" : kCppUndefinedMacro } )
 | |
|     test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(|| (defined ABC) (defined BINGO))" )
 | |
|     test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 1)", { "ABC" : "1" } )
 | |
|     test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 1)", { "BINGO" : "1" } )
 | |
|     test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(defined ABC)", { "BINGO" : kCppUndefinedMacro } )
 | |
|     test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 0)", { "ABC" : kCppUndefinedMacro, "BINGO" : kCppUndefinedMacro } )
 | |
| 
 | |
|     test_cpp_expr_source( "0", "0" )
 | |
|     test_cpp_expr_source( "1", "1" )
 | |
|     test_cpp_expr_source( "1 && 1", "1 && 1" )
 | |
|     test_cpp_expr_source( "1 && 0", "1 && 0" )
 | |
|     test_cpp_expr_source( "0 && 1", "0 && 1" )
 | |
|     test_cpp_expr_source( "0 && 0", "0 && 0" )
 | |
|     test_cpp_expr_source( "1 || 1", "1 || 1" )
 | |
|     test_cpp_expr_source( "1 || 0", "1 || 0" )
 | |
|     test_cpp_expr_source( "0 || 1", "0 || 1" )
 | |
|     test_cpp_expr_source( "0 || 0", "0 || 0" )
 | |
|     test_cpp_expr_source( "EXAMPLE", "EXAMPLE" )
 | |
|     test_cpp_expr_source( "EXAMPLE - 3", "EXAMPLE - 3" )
 | |
|     test_cpp_expr_source( "defined(EXAMPLE)", "defined(EXAMPLE)" )
 | |
|     test_cpp_expr_source( "defined EXAMPLE", "defined(EXAMPLE)" )
 | |
| 
 | |
| 
 | |
| #####################################################################################
 | |
| #####################################################################################
 | |
| #####                                                                           #####
 | |
| #####          C P P   B L O C K                                                #####
 | |
| #####                                                                           #####
 | |
| #####################################################################################
 | |
| #####################################################################################
 | |
| 
 | |
| class Block:
 | |
|     """a class used to model a block of input source text. there are two block types:
 | |
|         - directive blocks: contain the tokens of a single pre-processor directive (e.g. #if)
 | |
|         - text blocks, contain the tokens of non-directive blocks
 | |
| 
 | |
|        the cpp parser class below will transform an input source file into a list of Block
 | |
|        objects (grouped in a BlockList object for convenience)"""
 | |
| 
 | |
|     def __init__(self,tokens,directive=None,lineno=0):
 | |
|         """initialize a new block, if 'directive' is None, this is a text block
 | |
|            NOTE: this automatically converts '#ifdef MACRO' into '#if defined(MACRO)'
 | |
|                  and '#ifndef MACRO' into '#if !defined(MACRO)'"""
 | |
|         if directive == "ifdef":
 | |
|             tok = Token()
 | |
|             tok.set(tokDEFINED)
 | |
|             tokens = [ tok ] + tokens
 | |
|             directive = "if"
 | |
| 
 | |
|         elif directive == "ifndef":
 | |
|             tok1 = Token()
 | |
|             tok2 = Token()
 | |
|             tok1.set(tokNOT)
 | |
|             tok2.set(tokDEFINED)
 | |
|             tokens = [ tok1, tok2 ] + tokens
 | |
|             directive = "if"
 | |
| 
 | |
|         self.tokens    = tokens
 | |
|         self.directive = directive
 | |
|         if lineno > 0:
 | |
|             self.lineno = lineno
 | |
|         else:
 | |
|             self.lineno = self.tokens[0].lineno
 | |
| 
 | |
|         if self.isIf():
 | |
|             self.expr = CppExpr( self.tokens )
 | |
| 
 | |
|     def isDirective(self):
 | |
|         """returns True iff this is a directive block"""
 | |
|         return self.directive != None
 | |
| 
 | |
|     def isConditional(self):
 | |
|         """returns True iff this is a conditional directive block"""
 | |
|         return self.directive in ["if","ifdef","ifndef","else","elif","endif"]
 | |
| 
 | |
|     def isDefine(self):
 | |
|         """returns the macro name in a #define directive, or None otherwise"""
 | |
|         if self.directive != "define":
 | |
|             return None
 | |
| 
 | |
|         return self.tokens[0].value
 | |
| 
 | |
|     def isIf(self):
 | |
|         """returns True iff this is an #if-like directive block"""
 | |
|         return self.directive in ["if","ifdef","ifndef","elif"]
 | |
| 
 | |
|     def isInclude(self):
 | |
|         """checks wether this is a #include directive. if true, then returns the
 | |
|            corresponding file name (with brackets or double-qoutes). None otherwise"""
 | |
|         if self.directive != "include":
 | |
|             return None
 | |
| 
 | |
|         #print "iii " + repr(self.tokens)
 | |
|         if self.tokens[0].id == tokSTRING:
 | |
|             # a double-quote include, that's easy
 | |
|             return self.tokens[0].value
 | |
| 
 | |
|         # we only want the bracket part, not any comments or junk after it
 | |
|         if self.tokens[0].id == "<":
 | |
|             i   = 0
 | |
|             tok = self.tokens
 | |
|             n   = len(tok)
 | |
|             while i < n and tok[i].id != ">":
 | |
|                 i += 1
 | |
| 
 | |
|             if i >= n:
 | |
|                 return None
 | |
| 
 | |
|             return string.join([ str(x) for x in tok[:i+1] ],"")
 | |
| 
 | |
|         else:
 | |
|             return None
 | |
| 
 | |
|     def removeWhiteSpace(self):
 | |
|         # Remove trailing whitespace and empty lines
 | |
|         # All whitespace is also contracted to a single space
 | |
|         if self.directive != None:
 | |
|             return
 | |
| 
 | |
|         tokens = []
 | |
|         line   = 0     # index of line start
 | |
|         space  = -1    # index of first space, or -1
 | |
|         ii = 0
 | |
|         nn = len(self.tokens)
 | |
|         while ii < nn:
 | |
|             tok = self.tokens[ii]
 | |
| 
 | |
|             # If we find a space, record its position if this is the first
 | |
|             # one the line start or the previous character. Don't append
 | |
|             # anything to tokens array yet though.
 | |
|             if tok.id == tokSPACE:
 | |
|                 if space < 0:
 | |
|                     space = ii
 | |
|                 ii += 1
 | |
|                 continue
 | |
| 
 | |
|             # If this is a line space, ignore the spaces we found previously
 | |
|             # on the line, and remove empty lines.
 | |
|             if tok.id == tokLN:
 | |
|                 old_line  = line
 | |
|                 old_space = space
 | |
|                 #print "N line=%d space=%d ii=%d" % (line, space, ii)
 | |
|                 ii   += 1
 | |
|                 line  = ii
 | |
|                 space = -1
 | |
|                 if old_space == old_line:  # line only contains spaces
 | |
|                     #print "-s"
 | |
|                     continue
 | |
|                 if ii-1 == old_line:  # line is empty
 | |
|                     #print "-e"
 | |
|                     continue
 | |
|                 tokens.append(tok)
 | |
|                 continue
 | |
| 
 | |
|             # Other token, append any space range if any, converting each
 | |
|             # one to a single space character, then append the token.
 | |
|             if space >= 0:
 | |
|                 jj = space
 | |
|                 space = -1
 | |
|                 while jj < ii:
 | |
|                     tok2 = self.tokens[jj]
 | |
|                     tok2.value = " "
 | |
|                     tokens.append(tok2)
 | |
|                     jj += 1
 | |
| 
 | |
|             tokens.append(tok)
 | |
|             ii += 1
 | |
| 
 | |
|         self.tokens = tokens
 | |
| 
 | |
|     def writeWithWarning(self,out,warning,left_count,repeat_count):
 | |
|         # removeWhiteSpace() will sometimes creates non-directive blocks
 | |
|         # without any tokens. These come from blocks that only contained
 | |
|         # empty lines and spaces. They should not be printed in the final
 | |
|         # output, and then should not be counted for this operation.
 | |
|         #
 | |
|         if not self.directive and self.tokens == []:
 | |
|             return left_count
 | |
| 
 | |
|         if self.directive:
 | |
|             out.write(str(self).rstrip() + "\n")
 | |
|             left_count -= 1
 | |
|             if left_count == 0:
 | |
|                 out.write(warning)
 | |
|                 left_count = repeat_count
 | |
| 
 | |
|         else:
 | |
|             for tok in self.tokens:
 | |
|                 out.write(str(tok))
 | |
|                 if tok.id == tokLN:
 | |
|                     left_count -= 1
 | |
|                     if left_count == 0:
 | |
|                         out.write(warning)
 | |
|                         left_count = repeat_count
 | |
| 
 | |
|         return left_count
 | |
| 
 | |
| 
 | |
|     def __repr__(self):
 | |
|         """generate the representation of a given block"""
 | |
|         if self.directive:
 | |
|             result = "#%s " % self.directive
 | |
|             if self.isIf():
 | |
|                 result += repr(self.expr)
 | |
|             else:
 | |
|                 for tok in self.tokens:
 | |
|                     result += repr(tok)
 | |
|         else:
 | |
|             result = ""
 | |
|             for tok in self.tokens:
 | |
|                 result += repr(tok)
 | |
| 
 | |
|         return result
 | |
| 
 | |
|     def __str__(self):
 | |
|         """generate the string representation of a given block"""
 | |
|         if self.directive:
 | |
|             if self.directive == "if":
 | |
|                 # small optimization to re-generate #ifdef and #ifndef
 | |
|                 e = self.expr.expr
 | |
|                 op = e[0]
 | |
|                 if op == "defined":
 | |
|                     result = "#ifdef %s" % e[1]
 | |
|                 elif op == "!" and e[1][0] == "defined":
 | |
|                     result = "#ifndef %s" % e[1][1]
 | |
|                 else:
 | |
|                     result = "#if " + str(self.expr)
 | |
|             else:
 | |
|                 result = "#%s" % self.directive
 | |
|                 if len(self.tokens):
 | |
|                     result += " "
 | |
|                 for tok in self.tokens:
 | |
|                     result += str(tok)
 | |
|         else:
 | |
|             result = ""
 | |
|             for tok in self.tokens:
 | |
|                 result += str(tok)
 | |
| 
 | |
|         return result
 | |
| 
 | |
| class BlockList:
 | |
|     """a convenience class used to hold and process a list of blocks returned by
 | |
|        the cpp parser"""
 | |
|     def __init__(self,blocks):
 | |
|         self.blocks = blocks
 | |
| 
 | |
|     def __len__(self):
 | |
|         return len(self.blocks)
 | |
| 
 | |
|     def __getitem__(self,n):
 | |
|         return self.blocks[n]
 | |
| 
 | |
|     def __repr__(self):
 | |
|         return repr(self.blocks)
 | |
| 
 | |
|     def __str__(self):
 | |
|         result = ""
 | |
|         for b in self.blocks:
 | |
|             result += str(b)
 | |
|             if b.isDirective():
 | |
|                 result = result.rstrip() + '\n'
 | |
|         return result
 | |
| 
 | |
|     def  optimizeIf01(self):
 | |
|         """remove the code between #if 0 .. #endif in a BlockList"""
 | |
|         self.blocks = optimize_if01(self.blocks)
 | |
| 
 | |
|     def optimizeMacros(self, macros):
 | |
|         """remove known defined and undefined macros from a BlockList"""
 | |
|         for b in self.blocks:
 | |
|             if b.isIf():
 | |
|                 b.expr.optimize(macros)
 | |
| 
 | |
|     def removeMacroDefines(self,macros):
 | |
|         """remove known macro definitions from a BlockList"""
 | |
|         self.blocks = remove_macro_defines(self.blocks,macros)
 | |
| 
 | |
|     def removePrefixed(self,prefix,names):
 | |
|         for b in self.blocks:
 | |
|             if b.isIf():
 | |
|                 b.expr.removePrefixed(prefix,names)
 | |
| 
 | |
|     def removeWhiteSpace(self):
 | |
|         for b in self.blocks:
 | |
|             b.removeWhiteSpace()
 | |
| 
 | |
|     def optimizeAll(self,macros):
 | |
|         self.optimizeMacros(macros)
 | |
|         self.optimizeIf01()
 | |
|         return
 | |
| 
 | |
|     def findIncludes(self):
 | |
|         """return the list of included files in a BlockList"""
 | |
|         result = []
 | |
|         for b in self.blocks:
 | |
|             i = b.isInclude()
 | |
|             if i:
 | |
|                 result.append(i)
 | |
| 
 | |
|         return result
 | |
| 
 | |
| 
 | |
|     def write(self,out):
 | |
|         out.write(str(self))
 | |
| 
 | |
|     def writeWithWarning(self,out,warning,repeat_count):
 | |
|         left_count = repeat_count
 | |
|         for b in self.blocks:
 | |
|             left_count = b.writeWithWarning(out,warning,left_count,repeat_count)
 | |
| 
 | |
|     def removeComments(self):
 | |
|         for b in self.blocks:
 | |
|             for tok in b.tokens:
 | |
|                 if tok.id == tokSPACE:
 | |
|                     tok.value = " "
 | |
| 
 | |
|     def removeVarsAndFuncs(self,knownStatics=set()):
 | |
|         """remove all extern and static declarations corresponding
 | |
|            to variable and function declarations. we only accept typedefs
 | |
|            and enum/structs/union declarations.
 | |
| 
 | |
|            however, we keep the definitions corresponding to the set
 | |
|            of known static inline functions in the set 'knownStatics',
 | |
|            which is useful for optimized byteorder swap functions and
 | |
|            stuff like that.
 | |
|            """
 | |
|         # state = 0 => normal (i.e. LN + spaces)
 | |
|         # state = 1 => typedef/struct encountered, ends with ";"
 | |
|         # state = 2 => var declaration encountered, ends with ";"
 | |
|         # state = 3 => func declaration encountered, ends with "}"
 | |
|         state      = 0
 | |
|         depth      = 0
 | |
|         blocks2    = []
 | |
|         skipTokens = False
 | |
|         for b in self.blocks:
 | |
|             if b.isDirective():
 | |
|                 blocks2.append(b)
 | |
|             else:
 | |
|                 n     = len(b.tokens)
 | |
|                 i     = 0
 | |
|                 if skipTokens:
 | |
|                     first = n
 | |
|                 else:
 | |
|                     first = 0
 | |
|                 while i < n:
 | |
|                     tok = b.tokens[i]
 | |
|                     tokid = tok.id
 | |
|                     # If we are not looking for the start of a new
 | |
|                     # type/var/func, then skip over tokens until
 | |
|                     # we find our terminator, managing the depth of
 | |
|                     # accolades as we go.
 | |
|                     if state > 0:
 | |
|                         terminator = False
 | |
|                         if tokid == '{':
 | |
|                             depth += 1
 | |
|                         elif tokid == '}':
 | |
|                             if depth > 0:
 | |
|                                 depth -= 1
 | |
|                             if (depth == 0) and (state == 3):
 | |
|                                 terminator = True
 | |
|                         elif tokid == ';' and depth == 0:
 | |
|                             terminator = True
 | |
| 
 | |
|                         if terminator:
 | |
|                             # we found the terminator
 | |
|                             state = 0
 | |
|                             if skipTokens:
 | |
|                                 skipTokens = False
 | |
|                                 first = i+1
 | |
| 
 | |
|                         i = i+1
 | |
|                         continue
 | |
| 
 | |
|                     # We are looking for the start of a new type/func/var
 | |
|                     # ignore whitespace
 | |
|                     if tokid in [tokLN, tokSPACE]:
 | |
|                         i = i+1
 | |
|                         continue
 | |
| 
 | |
|                     # Is it a new type definition, then start recording it
 | |
|                     if tok.value in [ 'struct', 'typedef', 'enum', 'union', '__extension__' ]:
 | |
|                         #print "$$$ keep type declr" + repr(b.tokens[i:])
 | |
|                         state = 1
 | |
|                         i     = i+1
 | |
|                         continue
 | |
| 
 | |
|                     # Is it a variable or function definition. If so, first
 | |
|                     # try to determine which type it is, and also extract
 | |
|                     # its name.
 | |
|                     #
 | |
|                     # We're going to parse the next tokens of the same block
 | |
|                     # until we find a semi-column or a left parenthesis.
 | |
|                     #
 | |
|                     # The semi-column corresponds to a variable definition,
 | |
|                     # the left-parenthesis to a function definition.
 | |
|                     #
 | |
|                     # We also assume that the var/func name is the last
 | |
|                     # identifier before the terminator.
 | |
|                     #
 | |
|                     j = i+1
 | |
|                     ident = ""
 | |
|                     while j < n:
 | |
|                         tokid = b.tokens[j].id
 | |
|                         if tokid == '(':  # a function declaration
 | |
|                             state = 3
 | |
|                             break
 | |
|                         elif tokid == ';': # a variable declaration
 | |
|                             state = 2
 | |
|                             break
 | |
|                         if tokid == tokIDENT:
 | |
|                             ident = b.tokens[j].value
 | |
|                         j += 1
 | |
| 
 | |
|                     if j >= n:
 | |
|                         # This can only happen when the declaration
 | |
|                         # does not end on the current block (e.g. with
 | |
|                         # a directive mixed inside it.
 | |
|                         #
 | |
|                         # We will treat it as malformed because
 | |
|                         # it's very hard to recover from this case
 | |
|                         # without making our parser much more
 | |
|                         # complex.
 | |
|                         #
 | |
|                         #print "### skip unterminated static '%s'" % ident
 | |
|                         break
 | |
| 
 | |
|                     if ident in knownStatics:
 | |
|                         #print "### keep var/func '%s': %s" % (ident,repr(b.tokens[i:j]))
 | |
|                         pass
 | |
|                     else:
 | |
|                         # We're going to skip the tokens for this declaration
 | |
|                         #print "### skip variable /func'%s': %s" % (ident,repr(b.tokens[i:j]))
 | |
|                         if i > first:
 | |
|                             blocks2.append( Block(b.tokens[first:i]))
 | |
|                         skipTokens = True
 | |
|                         first      = n
 | |
| 
 | |
|                     i = i+1
 | |
| 
 | |
|                 if i > first:
 | |
|                     #print "### final '%s'" % repr(b.tokens[first:i])
 | |
|                     blocks2.append( Block(b.tokens[first:i]) )
 | |
| 
 | |
|         self.blocks = blocks2
 | |
| 
 | |
|     def insertDisclaimer(self,disclaimer="/* auto-generated file, DO NOT EDIT */"):
 | |
|         """insert your standard issue disclaimer that this is an
 | |
|            auto-generated file, etc.."""
 | |
|         tokens = CppLineTokenizer( disclaimer ).toTokenList()
 | |
|         tokens = tokens[:-1]  # remove trailing tokLN
 | |
|         self.blocks = [ Block(tokens) ] + self.blocks
 | |
| 
 | |
|     def replaceTokens(self,replacements=dict()):
 | |
|         """replace tokens according to the given dict
 | |
|            """
 | |
|         for b in self.blocks:
 | |
|             if not b.isDirective():
 | |
|                 for tok in b.tokens:
 | |
|                     if tok.id == tokIDENT:
 | |
|                         if tok.value in replacements:
 | |
|                             tok.value = replacements[tok.value]
 | |
| 
 | |
| class BlockParser:
 | |
|     """a class used to convert an input source file into a BlockList object"""
 | |
| 
 | |
|     def __init__(self,tokzer=None):
 | |
|         """initialize a block parser. the input source is provided through a Tokenizer
 | |
|            object"""
 | |
|         self.reset(tokzer)
 | |
| 
 | |
|     def reset(self,tokzer):
 | |
|         self.state  = 1
 | |
|         self.tokzer = tokzer
 | |
| 
 | |
|     def getBlocks(self,tokzer=None):
 | |
|         """tokenize and parse the input source, return a BlockList object
 | |
|            NOTE: empty and line-numbering directives are ignored and removed
 | |
|                  from the result. as a consequence, it is possible to have
 | |
|                  two successive text blocks in the result"""
 | |
|         # state 0 => in source code
 | |
|         # state 1 => in source code, after a LN
 | |
|         # state 2 => in source code, after LN then some space
 | |
|         state   = 1
 | |
|         lastLN  = 0
 | |
|         current = []
 | |
|         blocks  = []
 | |
| 
 | |
|         if tokzer == None:
 | |
|             tokzer = self.tokzer
 | |
| 
 | |
|         while 1:
 | |
|             tok = tokzer.getToken()
 | |
|             if tok.id == tokEOF:
 | |
|                 break
 | |
| 
 | |
|             if tok.id == tokLN:
 | |
|                 state    = 1
 | |
|                 current.append(tok)
 | |
|                 lastLN   = len(current)
 | |
| 
 | |
|             elif tok.id == tokSPACE:
 | |
|                 if state == 1:
 | |
|                     state = 2
 | |
|                 current.append(tok)
 | |
| 
 | |
|             elif tok.id == "#":
 | |
|                 if state > 0:
 | |
|                     # this is the start of a directive
 | |
| 
 | |
|                     if lastLN > 0:
 | |
|                         # record previous tokens as text block
 | |
|                         block   = Block(current[:lastLN])
 | |
|                         blocks.append(block)
 | |
|                         lastLN  = 0
 | |
| 
 | |
|                     current = []
 | |
| 
 | |
|                     # skip spaces after the #
 | |
|                     while 1:
 | |
|                         tok = tokzer.getToken()
 | |
|                         if tok.id != tokSPACE:
 | |
|                             break
 | |
| 
 | |
|                     if tok.id != tokIDENT:
 | |
|                         # empty or line-numbering, ignore it
 | |
|                         if tok.id != tokLN and tok.id != tokEOF:
 | |
|                             while 1:
 | |
|                                 tok = tokzer.getToken()
 | |
|                                 if tok.id == tokLN or tok.id == tokEOF:
 | |
|                                     break
 | |
|                         continue
 | |
| 
 | |
|                     directive = tok.value
 | |
|                     lineno    = tok.lineno
 | |
| 
 | |
|                     # skip spaces
 | |
|                     tok = tokzer.getToken()
 | |
|                     while tok.id == tokSPACE:
 | |
|                         tok = tokzer.getToken()
 | |
| 
 | |
|                     # then record tokens until LN
 | |
|                     dirtokens = []
 | |
|                     while tok.id != tokLN and tok.id != tokEOF:
 | |
|                         dirtokens.append(tok)
 | |
|                         tok = tokzer.getToken()
 | |
| 
 | |
|                     block = Block(dirtokens,directive,lineno)
 | |
|                     blocks.append(block)
 | |
|                     state   = 1
 | |
| 
 | |
|             else:
 | |
|                 state = 0
 | |
|                 current.append(tok)
 | |
| 
 | |
|         if len(current) > 0:
 | |
|             block = Block(current)
 | |
|             blocks.append(block)
 | |
| 
 | |
|         return BlockList(blocks)
 | |
| 
 | |
|     def parse(self,tokzer):
 | |
|         return self.getBlocks( tokzer )
 | |
| 
 | |
|     def parseLines(self,lines):
 | |
|         """parse a list of text lines into a BlockList object"""
 | |
|         return self.getBlocks( CppLinesTokenizer(lines) )
 | |
| 
 | |
|     def parseFile(self,path):
 | |
|         """parse a file into a BlockList object"""
 | |
|         file = open(path, "rt")
 | |
|         result = self.getBlocks( CppFileTokenizer(file) )
 | |
|         file.close()
 | |
|         return result
 | |
| 
 | |
| 
 | |
| def test_block_parsing(lines,expected):
 | |
|     blocks = BlockParser().parse( CppLinesTokenizer(lines) )
 | |
|     if len(blocks) != len(expected):
 | |
|         raise BadExpectedToken, "parser.buildBlocks returned '%s' expecting '%s'" \
 | |
|               % (str(blocks), repr(expected))
 | |
|     for n in range(len(blocks)):
 | |
|         if str(blocks[n]) != expected[n]:
 | |
|             raise BadExpectedToken, "parser.buildBlocks()[%d] is '%s', expecting '%s'" \
 | |
|                   % (n, str(blocks[n]), expected[n])
 | |
|     #for block in blocks:
 | |
|     #    print block
 | |
| 
 | |
| def test_BlockParser():
 | |
|     test_block_parsing(["#error hello"],["#error hello"])
 | |
|     test_block_parsing([ "foo", "", "bar" ], [ "foo\n\nbar\n" ])
 | |
|     test_block_parsing([ "foo", "  #  ", "bar" ], [ "foo\n","bar\n" ])
 | |
|     test_block_parsing(\
 | |
|         [ "foo", "   #  ", "  #  /* ahah */ if defined(__KERNEL__) ", "bar", "#endif" ],
 | |
|         [ "foo\n", "#ifdef __KERNEL__", "bar\n", "#endif" ] )
 | |
| 
 | |
| 
 | |
| #####################################################################################
 | |
| #####################################################################################
 | |
| #####                                                                           #####
 | |
| #####        B L O C K   L I S T   O P T I M I Z A T I O N                      #####
 | |
| #####                                                                           #####
 | |
| #####################################################################################
 | |
| #####################################################################################
 | |
| 
 | |
| def  remove_macro_defines( blocks, excludedMacros=set() ):
 | |
|     """remove macro definitions like #define <macroName>  ...."""
 | |
|     result = []
 | |
|     for b in blocks:
 | |
|         macroName = b.isDefine()
 | |
|         if macroName == None or not macroName in excludedMacros:
 | |
|             result.append(b)
 | |
| 
 | |
|     return result
 | |
| 
 | |
| def  find_matching_endif( blocks, i ):
 | |
|     n     = len(blocks)
 | |
|     depth = 1
 | |
|     while i < n:
 | |
|         if blocks[i].isDirective():
 | |
|             dir = blocks[i].directive
 | |
|             if dir in [ "if", "ifndef", "ifdef" ]:
 | |
|                 depth += 1
 | |
|             elif depth == 1 and dir in [ "else", "elif" ]:
 | |
|                 return i
 | |
|             elif dir == "endif":
 | |
|                 depth -= 1
 | |
|                 if depth == 0:
 | |
|                     return i
 | |
|         i += 1
 | |
|     return i
 | |
| 
 | |
| def  optimize_if01( blocks ):
 | |
|     """remove the code between #if 0 .. #endif in a list of CppBlocks"""
 | |
|     i = 0
 | |
|     n = len(blocks)
 | |
|     result = []
 | |
|     while i < n:
 | |
|         j = i
 | |
|         while j < n and not blocks[j].isIf():
 | |
|             j += 1
 | |
|         if j > i:
 | |
|             D2("appending lines %d to %d" % (blocks[i].lineno, blocks[j-1].lineno))
 | |
|             result += blocks[i:j]
 | |
|         if j >= n:
 | |
|             break
 | |
|         expr = blocks[j].expr
 | |
|         r    = expr.toInt()
 | |
|         if r == None:
 | |
|             result.append(blocks[j])
 | |
|             i = j + 1
 | |
|             continue
 | |
| 
 | |
|         if r == 0:
 | |
|             # if 0 => skip everything until the corresponding #endif
 | |
|             j = find_matching_endif( blocks, j+1 )
 | |
|             if j >= n:
 | |
|                 # unterminated #if 0, finish here
 | |
|                 break
 | |
|             dir = blocks[j].directive
 | |
|             if dir == "endif":
 | |
|                 D2("remove 'if 0' .. 'endif' (lines %d to %d)" % (blocks[i].lineno, blocks[j].lineno))
 | |
|                 i = j + 1
 | |
|             elif dir == "else":
 | |
|                 # convert 'else' into 'if 1'
 | |
|                 D2("convert 'if 0' .. 'else' into 'if 1' (lines %d to %d)" % (blocks[i].lineno, blocks[j-1].lineno))
 | |
|                 blocks[j].directive = "if"
 | |
|                 blocks[j].expr      = CppExpr( CppLineTokenizer("1").toTokenList() )
 | |
|                 i = j
 | |
|             elif dir == "elif":
 | |
|                 # convert 'elif' into 'if'
 | |
|                 D2("convert 'if 0' .. 'elif' into 'if'")
 | |
|                 blocks[j].directive = "if"
 | |
|                 i = j
 | |
|             continue
 | |
| 
 | |
|         # if 1 => find corresponding endif and remove/transform them
 | |
|         k = find_matching_endif( blocks, j+1 )
 | |
|         if k >= n:
 | |
|             # unterminated #if 1, finish here
 | |
|             D2("unterminated 'if 1'")
 | |
|             result += blocks[j+1:k]
 | |
|             break
 | |
| 
 | |
|         dir = blocks[k].directive
 | |
|         if dir == "endif":
 | |
|             D2("convert 'if 1' .. 'endif' (lines %d to %d)"  % (blocks[j].lineno, blocks[k].lineno))
 | |
|             result += optimize_if01(blocks[j+1:k])
 | |
|             i       = k+1
 | |
|         elif dir == "else":
 | |
|             # convert 'else' into 'if 0'
 | |
|             D2("convert 'if 1' .. 'else' (lines %d to %d)"  % (blocks[j].lineno, blocks[k].lineno))
 | |
|             result += optimize_if01(blocks[j+1:k])
 | |
|             blocks[k].directive = "if"
 | |
|             blocks[k].expr      = CppExpr( CppLineTokenizer("0").toTokenList() )
 | |
|             i = k
 | |
|         elif dir == "elif":
 | |
|             # convert 'elif' into 'if 0'
 | |
|             D2("convert 'if 1' .. 'elif' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno))
 | |
|             result += optimize_if01(blocks[j+1:k])
 | |
|             blocks[k].expr      = CppExpr( CppLineTokenizer("0").toTokenList() )
 | |
|             i = k
 | |
|     return result
 | |
| 
 | |
| def  test_optimizeAll():
 | |
|     text = """\
 | |
| #if 1
 | |
| #define  GOOD_1
 | |
| #endif
 | |
| #if 0
 | |
| #define  BAD_2
 | |
| #define  BAD_3
 | |
| #endif
 | |
| 
 | |
| #if 1
 | |
| #define  GOOD_2
 | |
| #else
 | |
| #define  BAD_4
 | |
| #endif
 | |
| 
 | |
| #if 0
 | |
| #define  BAD_5
 | |
| #else
 | |
| #define  GOOD_3
 | |
| #endif
 | |
| 
 | |
| #if 0
 | |
| #if 1
 | |
| #define  BAD_6
 | |
| #endif
 | |
| #endif\
 | |
| """
 | |
| 
 | |
|     expected = """\
 | |
| #define GOOD_1
 | |
| 
 | |
| #define GOOD_2
 | |
| 
 | |
| #define GOOD_3
 | |
| 
 | |
| """
 | |
| 
 | |
|     print "running test_BlockList.optimizeAll"
 | |
|     out = StringOutput()
 | |
|     lines = string.split(text, '\n')
 | |
|     list = BlockParser().parse( CppLinesTokenizer(lines) )
 | |
|     #D_setlevel(2)
 | |
|     list.optimizeAll( {"__KERNEL__":kCppUndefinedMacro} )
 | |
|     #print repr(list)
 | |
|     list.write(out)
 | |
|     if out.get() != expected:
 | |
|         print "KO: macro optimization failed\n"
 | |
|         print "<<<< expecting '",
 | |
|         print expected,
 | |
|         print "'\n>>>> result '"
 | |
|         print out.get(),
 | |
|         print "'\n----"
 | |
| 
 | |
| 
 | |
| #####################################################################################
 | |
| #####################################################################################
 | |
| #####                                                                           #####
 | |
| #####                                                                           #####
 | |
| #####                                                                           #####
 | |
| #####################################################################################
 | |
| #####################################################################################
 | |
| 
 | |
| def runUnitTests():
 | |
|     """run all unit tests for this program"""
 | |
|     print "running unit tests"
 | |
|     test_CppTokenizer()
 | |
|     test_CppExpr()
 | |
|     test_optimizeAll()
 | |
|     test_BlockParser()
 | |
|     print "OK"
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     runUnitTests()
 |