opencv/modules/java/rst_parser.py
2011-07-05 17:29:54 +00:00

373 lines
13 KiB
Python

import os, sys, re, string, glob
from string import Template
class DeclarationParser(object):
def __init__(self, line=None):
if line is None:
self.fdecl = ""
self.lang = ""
self.balance = 0
return
self.lang = self.getLang(line)
assert self.lang is not None
self.fdecl = line[line.find("::")+2:].strip()
self.balance = self.fdecl.count("(") - self.fdecl.count(")")
assert self.balance >= 0
def append(self, line):
self.fdecl += line
self.balance = self.fdecl.count("(") - self.fdecl.count(")")
def isready(self):
return self.balance == 0
def getLang(self, line):
if line.startswith(".. ocv:function::"):
return "C++"
if line.startswith(".. ocv:cfunction::"):
return "C"
if line.startswith(".. ocv:pyfunction::"):
return "Python2"
if line.startswith(".. ocv:pyoldfunction::"):
return "Python1"
if line.startswith(".. ocv:jfunction::"):
return "Java"
return None
def hasDeclaration(self, line):
return self.getLang(line) is not None
class ParamParser(object):
def __init__(self, line=None):
if line is None:
self.prefix = ""
self.name = ""
self.comment = ""
self.active = False
return
offset = line.find(":param")
assert offset > 0
self.prefix = line[:offset]
assert self.prefix==" "*len(self.prefix)
line = line[offset + 6:].lstrip()
name_end = line.find(":")
assert name_end > 0
self.name = line[:name_end]
self.comment = line[name_end+1:].lstrip()
self.active = True
def append(self, line):
assert self.active
if (self.hasDeclaration(line)):
self.active = False
elif line.startswith(self.prefix) or not line:
self.comment += "\n" + line.lstrip()
else:
self.active = False
def hasDeclaration(self, line):
return line.lstrip().startswith(":param")
class RstParser(object):
def __init__(self, cpp_parser):
self.cpp_parser = cpp_parser
self.definitions = {}
def parse(self, module_path):
doclist = glob.glob(os.path.join(module_path,"doc/*.rst"))
for doc in doclist:
self.parse_rst_file(doc)
def parse_section(self, section_name, file_name, lineno, lines):
func = {}
func["name"] = section_name
func["file"] = file_name
func["line"] = lineno
# parse section name
class_separator_idx = func["name"].find("::")
if class_separator_idx > 0:
func["class"] = func["name"][:class_separator_idx]
func["method"] = func["name"][class_separator_idx+2:]
else:
func["method"] = func["name"]
skip_code_lines = False
expected_brief = True
fdecl = DeclarationParser()
pdecl = ParamParser()
for l in lines:
# read tail of function/method declaration if needed
if not fdecl.isready():
fdecl.append(ll)
if fdecl.isready():
self.add_new_fdecl(func, fdecl)
continue
# skip lines if line-skipping mode is activated
if skip_code_lines:
if not l or l.startswith(" ") or l.startswith("\t"):
continue
else:
skip_code_lines = False
ll = l.strip()
if ll == "..": #strange construction...
continue
# turn on line-skipping mode for code fragments
if ll.endswith("::"):
skip_code_lines = True
ll = ll[:len(ll)-3]
if ll.startswith(".. code-block::"):
skip_code_lines = True
continue
# continue param parsing
if pdecl.active:
pdecl.append(l)
if pdecl.active:
continue
else:
self.add_new_pdecl(func, pdecl)
#do not continue - current line can contain next parameter definition
# todo: parse structure members; skip them for now
if ll.startswith(".. ocv:member::"):
skip_code_lines = True
continue
# parse class & struct definitions
if ll.startswith(".. ocv:class::"):
func["class"] = ll[ll.find("::")+2:].strip()
if "method" in func:
del func["method"]
func["isclass"] = True
expected_brief = True
continue
if ll.startswith(".. ocv:struct::"):
func["class"] = ll[ll.find("::")+2:].strip()
if "method" in func:
del func["method"]
func["isstruct"] = True
expected_brief = True
continue
# parse function/method definitions
if fdecl.hasDeclaration(ll):
fdecl = DeclarationParser(ll)
if fdecl.isready():
self.add_new_fdecl(func, fdecl)
expected_brief = False
continue
# parse parameters
if pdecl.hasDeclaration(l):
pdecl = ParamParser(l)
expected_brief = False
continue
# record brief description
if expected_brief and len(ll) == 0:
if "brief" in func:
expected_brief = False
continue
if expected_brief:
func["brief"] = func.get("brief", "") + "\n" + ll
if skip_code_lines:
expected_brief = False #force end brief if code block begins
continue
# record other lines as long description
func["long"] = func.get("long", "") + "\n" + ll
# endfor l in lines
# save last parameter if needed
if pdecl.active:
self.add_new_pdecl(func, pdecl)
# add definition to list
func = self.normalize(func)
if self.validate(func):
self.definitions[func["name"]] = func
#self.print_info(func)
elif func:
self.print_info(func, True)
def parse_rst_file(self, doc):
doc = os.path.abspath(doc)
lineno = 0
lines = []
flineno = 0
fname = ""
prev_line = None
df = open(doc, "rt")
for l in df.readlines():
lineno += 1
if prev_line == None:
prev_line = l.rstrip()
continue
ll = l.rstrip()
if len(prev_line) > 0 and len(ll) >= len(prev_line) and ll == "-" * len(ll):
#new function candidate
if len(lines) > 1:
self.parse_section(fname, doc, flineno, lines[:len(lines)-1])
lines = []
flineno = lineno-1
fname = prev_line.strip()
elif flineno > 0:
lines.append(ll)
prev_line = ll
df.close()
#don't forget about the last function section in file!!!
if len(lines) > 1:
self.parse_section(fname, doc, flineno, lines[:len(lines)])
def add_new_fdecl(self, func, decl):
decls = func.get("decls",[])
if (decl.lang == "C++" or decl.lang == "C"):
rst_decl = self.cpp_parser.parse_func_decl_no_wrap(decl.fdecl)
decls.append( (decl.lang, decl.fdecl, rst_decl) )
else:
decls.append( (decl.lang, decl.fdecl) )
func["decls"] = decls
def add_new_pdecl(self, func, decl):
params = func.get("params",{})
if decl.name in params:
print "Parser error: parameter \"%s\" for %s is defined multiple times. See %s line %s" \
% (decl.name, func["name"], func["file"], func["line"])
else:
params[decl.name] = decl.comment
func["params"] = params
def print_info(self, func, skipped=False):
print ""
if skipped:
print "SKIPPED DEFINITION:"
print "name: %s" % (func.get("name","~empty~"))
print "file: %s (line %s)" % (func.get("file","~empty~"), func.get("line","~empty~"))
print "is class: %s" % func.get("isclass",False)
print "is struct: %s" % func.get("isstruct",False)
print "class: %s" % (func.get("class","~empty~"))
print "method: %s" % (func.get("method","~empty~"))
print "brief: %s" % (func.get("brief","~empty~"))
if "decls" in func:
print "declarations:"
for d in func["decls"]:
print " %7s: %s" % (d[0], re.sub(r"[ \t]+", " ", d[1]))
if "params" in func:
print "parameters:"
for name, comment in func["params"].items():
print "%23s: %s" % (name, comment)
if not skipped:
print "long: %s" % (func.get("long","~empty~"))
def validate(self, func):
if func.get("decls",None) is None:
if not func.get("isclass",False):
return False
if func["name"] in self.definitions:
print "Parser error: function/class/struct \"%s\" in %s line %s is already documented in %s line %s" \
% (func["name"], func["file"], func["line"], self.definitions[func["name"]]["file"], self.definitions[func["name"]]["line"])
return False
#todo: validate parameter names
return True
def normalize(self, func):
if not func:
return func
func["name"] = self.normalizeText(func["name"])
if "method" in func:
func["method"] = self.normalizeText(func["method"])
if "class" in func:
func["class"] = self.normalizeText(func["class"])
if "brief" in func:
func["brief"] = self.normalizeText(func.get("brief",None))
if not func["brief"]:
del func["brief"]
if "long" in func:
func["long"] = self.normalizeText(func.get("long",None))
if not func["long"]:
del func["long"]
if "decls" in func:
func["decls"].sort()
if "params" in func:
params = {}
for name, comment in func["params"].items():
cmt = self.normalizeText(comment)
if cmt:
params[name] = cmt
func["params"] = params
return func
def normalizeText(self, s):
if s is None:
return s
# normalize line endings
s = re.sub(r"\r\n", "\n", s)
# remove tailing ::
s = re.sub(r"::$", "\n", s)
# remove extra line breaks before/after _ or ,
s = re.sub(r"\n[ \t]*([_,])\n", r"\1", s)
# remove extra line breaks after `
#s = re.sub(r"`\n", "` ", s)
# remove extra line breaks before *
s = re.sub(r"\n\n\*", "\n\*", s)
# remove extra line breaks before #.
s = re.sub(r"\n\n#\.", "\n#.", s)
# remove extra line breaks after #.
s = re.sub(r"\n#\.\n", "\n#. ", s)
# remove extra line breaks before `
s = re.sub(r"\n[ \t]*`", " `", s)
# remove trailing whitespaces
s = re.sub(r"[ \t]+$", "", s)
# remove whitespace before .
s = re.sub(r"[ \t]+\.", "\.", s)
# remove .. for references
s = re.sub(r"\.\. \[", "[", s)
# unescape
s = re.sub(r"\\(.)", "\\1", s)
# compress whitespace
s = re.sub(r"[ \t]+", " ", s)
s = s.replace("**", "")
s = s.replace("``", "\"")
s = s.replace("`", "\"")
s = s.replace("\"\"", "\"")
s = s.replace(":ocv:cfunc:","")
s = s.replace(":math:", "")
s = s.replace(":ocv:class:", "")
s = s.replace(":ocv:func:", "")
s = s.replace("]_", "]")
s = s.strip()
return s
if __name__ == "__main__":
if len(sys.argv) < 1:
print "Usage:\n", os.path.basename(sys.argv[0]), " <module path>"
exit(0)
rst_parser_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
hdr_parser_path = os.path.join(rst_parser_dir, "../python/src2")
sys.path.append(hdr_parser_path)
import hdr_parser
module = sys.argv[1]
if not os.path.isdir(os.path.join(rst_parser_dir, "../" + module)):
print "Module \"" + module + "\" could not be found."
exit(1)
parser = RstParser(hdr_parser.CppHeaderParser())
parser.parse(os.path.join(rst_parser_dir, "../" + module))