From 2688e22cb595c6b652538c36dacf2bb35cc58ac3 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Thu, 20 Jun 2013 19:34:32 +0400 Subject: [PATCH 1/3] Made xls-report.py use global properties in XML files. Now it can determine, without looking at the file name, both the module name and the configuration name (the latter with a little help from the configuration file). --- modules/ts/misc/testlog_parser.py | 48 +++++++++++++++---- modules/ts/misc/xls-report.py | 79 ++++++++++++++++++++++--------- 2 files changed, 96 insertions(+), 31 deletions(-) diff --git a/modules/ts/misc/testlog_parser.py b/modules/ts/misc/testlog_parser.py index 8ab21417c..5d478645b 100755 --- a/modules/ts/misc/testlog_parser.py +++ b/modules/ts/misc/testlog_parser.py @@ -1,6 +1,9 @@ #!/usr/bin/env python -import sys, re, os.path +import collections +import re +import os.path +import sys from xml.dom.minidom import parse class TestInfo(object): @@ -159,12 +162,31 @@ class TestInfo(object): return 1 return 0 +# This is a Sequence for compatibility with old scripts, +# which treat parseLogFile's return value as a list. +class TestRunInfo(collections.Sequence): + def __init__(self, properties, tests): + self.properties = properties + self.tests = tests + + def __len__(self): + return len(self.tests) + + def __getitem__(self, key): + return self.tests[key] + def parseLogFile(filename): - tests = [] log = parse(filename) - for case in log.getElementsByTagName("testcase"): - tests.append(TestInfo(case)) - return tests + + properties = { + attr_name[3:]: attr_value + for (attr_name, attr_value) in log.documentElement.attributes.items() + if attr_name.startswith('cv_') + } + + tests = map(TestInfo, log.getElementsByTagName("testcase")) + + return TestRunInfo(properties, tests) if __name__ == "__main__": @@ -173,8 +195,18 @@ if __name__ == "__main__": exit(0) for arg in sys.argv[1:]: - print "Tests found in", arg - tests = parseLogFile(arg) - for t in sorted(tests): + print "Processing {}...".format(arg) + + run = parseLogFile(arg) + + print "Properties:" + + for (prop_name, prop_value) in run.properties.items(): + print "\t{} = {}".format(prop_name, prop_value) + + print "Tests:" + + for t in sorted(run.tests): t.dump() + print diff --git a/modules/ts/misc/xls-report.py b/modules/ts/misc/xls-report.py index e79bb123d..a3cf8daca 100755 --- a/modules/ts/misc/xls-report.py +++ b/modules/ts/misc/xls-report.py @@ -3,6 +3,7 @@ from __future__ import division import ast +import fnmatch import logging import numbers import os, os.path @@ -45,15 +46,55 @@ no_speedup_style = no_time_style error_speedup_style = xlwt.easyxf('pattern: pattern solid, fore_color orange') header_style = xlwt.easyxf('font: bold true; alignment: horizontal centre, vertical top, wrap True') -def collect_xml(collection, configuration, xml_fullname): - xml_fname = os.path.split(xml_fullname)[1] - module = xml_fname[:xml_fname.index('_')] +class Collector(object): + def __init__(self, config_match_func): + self.__config_cache = {} + self.config_match_func = config_match_func + self.tests = {} - module_tests = collection.setdefault(module, OrderedDict()) + def collect_from(self, xml_path): + run = parseLogFile(xml_path) - for test in sorted(parseLogFile(xml_fullname)): - test_results = module_tests.setdefault((test.shortName(), test.param()), {}) - test_results[configuration] = test.get("gmean") if test.status == 'run' else test.status + module = run.properties['module_name'] + + properties = run.properties.copy() + del properties['module_name'] + + props_key = tuple(sorted(properties.iteritems())) # dicts can't be keys + + if props_key in self.__config_cache: + configuration = self.__config_cache[props_key] + else: + configuration = self.config_match_func(properties) + + if configuration is None: + logging.warning('failed to match properties to a configuration: %r', props_key) + else: + same_config_props = [it[0] for it in self.__config_cache.iteritems() if it[1] == configuration] + if len(same_config_props) > 0: + logging.warning('property set %r matches the same configuration %r as property set %r', + props_key, configuration, same_config_props[0]) + + self.__config_cache[props_key] = configuration + + if configuration is None: return + + module_tests = self.tests.setdefault(module, OrderedDict()) + + for test in run.tests: + test_results = module_tests.setdefault((test.shortName(), test.param()), {}) + test_results[configuration] = test.get("gmean") if test.status == 'run' else test.status + +def make_match_func(matchers): + def match_func(properties): + for matcher in matchers: + if all(properties.get(name) == value + for (name, value) in matcher['properties'].iteritems()): + return matcher['name'] + + return None + + return match_func def main(): arg_parser = ArgumentParser(description='Build an XLS performance report.') @@ -83,23 +124,15 @@ def main(): sheet_conf = dict(global_conf.items() + sheet_conf.items()) - if 'configurations' in sheet_conf: - config_names = sheet_conf['configurations'] - else: - try: - config_names = [p for p in os.listdir(sheet_path) - if os.path.isdir(os.path.join(sheet_path, p))] - except Exception as e: - logging.warning('error while determining configuration names for %s: %s', sheet_path, e) - continue + config_names = sheet_conf.get('configurations', []) + config_matchers = sheet_conf.get('configuration_matchers', []) - collection = {} + collector = Collector(make_match_func(config_matchers)) - for configuration, configuration_path in \ - [(c, os.path.join(sheet_path, c)) for c in config_names]: - logging.info('processing %s', configuration_path) - for xml_fullname in glob(os.path.join(configuration_path, '*.xml')): - collect_xml(collection, configuration, xml_fullname) + for root, _, filenames in os.walk(sheet_path): + logging.info('looking in %s', root) + for filename in fnmatch.filter(filenames, '*.xml'): + collector.collect_from(os.path.join(root, filename)) sheet = wb.add_sheet(sheet_conf.get('sheet_name', os.path.basename(os.path.abspath(sheet_path)))) @@ -126,7 +159,7 @@ def main(): module_styles = {module: xlwt.easyxf('pattern: pattern solid, fore_color {}'.format(color)) for module, color in module_colors.iteritems()} - for module, tests in sorted(collection.iteritems()): + for module, tests in sorted(collector.tests.iteritems()): for ((test, param), configs) in tests.iteritems(): sheet.write(row, 0, module, module_styles.get(module, xlwt.Style.default_style)) sheet.write(row, 1, test) From 0e3a9eaf980b484a9d5f56c0f38c92164e9c5910 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Fri, 21 Jun 2013 13:43:16 +0400 Subject: [PATCH 2/3] Made Collector render property sets as dicts instead of tuples of pairs. --- modules/ts/misc/xls-report.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/modules/ts/misc/xls-report.py b/modules/ts/misc/xls-report.py index a3cf8daca..2dcbf89cf 100755 --- a/modules/ts/misc/xls-report.py +++ b/modules/ts/misc/xls-report.py @@ -52,6 +52,12 @@ class Collector(object): self.config_match_func = config_match_func self.tests = {} + # Format a sorted sequence of pairs as if it was a dictionary. + # We can't just use a dictionary instead, since we want to preserve the sorted order of the keys. + @staticmethod + def __format_config_cache_key(pairs): + return '{' + ', '.join(repr(k) + ': ' + repr(v) for (k, v) in pairs) + '}' + def collect_from(self, xml_path): run = parseLogFile(xml_path) @@ -68,12 +74,15 @@ class Collector(object): configuration = self.config_match_func(properties) if configuration is None: - logging.warning('failed to match properties to a configuration: %r', props_key) + logging.warning('failed to match properties to a configuration: %s', + Collector.__format_config_cache_key(props_key)) else: same_config_props = [it[0] for it in self.__config_cache.iteritems() if it[1] == configuration] if len(same_config_props) > 0: - logging.warning('property set %r matches the same configuration %r as property set %r', - props_key, configuration, same_config_props[0]) + logging.warning('property set %s matches the same configuration %r as property set %s', + Collector.__format_config_cache_key(props_key), + configuration, + Collector.__format_config_cache_key(same_config_props[0])) self.__config_cache[props_key] = configuration From d4a8b87645f6df2ee5a61c8b0c52a4248d2c600a Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Fri, 21 Jun 2013 16:45:17 +0400 Subject: [PATCH 3/3] Wrote relevant docs. --- modules/ts/misc/xls-report.py | 79 ++++++++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 15 deletions(-) diff --git a/modules/ts/misc/xls-report.py b/modules/ts/misc/xls-report.py index 2dcbf89cf..e911314e9 100755 --- a/modules/ts/misc/xls-report.py +++ b/modules/ts/misc/xls-report.py @@ -1,5 +1,69 @@ #!/usr/bin/env python +""" + This script can generate XLS reports from OpenCV tests' XML output files. + + To use it, first, create a directory for each machine you ran tests on. + Each such directory will become a sheet in the report. Put each XML file + into the corresponding directory. + + Then, create your configuration file(s). You can have a global configuration + file (specified with the -c option), and per-sheet configuration files, which + must be called sheet.conf and placed in the directory corresponding to the sheet. + The settings in the per-sheet configuration file will override those in the + global configuration file, if both are present. + + A configuration file must consist of a Python dictionary. The following keys + will be recognized: + + * 'comparisons': [{'from': string, 'to': string}] + List of configurations to compare performance between. For each item, + the sheet will have a column showing speedup from configuration named + 'from' to configuration named "to". + + * 'configuration_matchers': [{'properties': {string: object}, 'name': string}] + Instructions for matching test run property sets to configuration names. + + For each found XML file: + + 1) All attributes of the root element starting with the prefix 'cv_' are + placed in a dictionary, with the cv_ prefix stripped and the cv_module_name + element deleted. + + 2) The first matcher for which the XML's file property set contains the same + keys with equal values as its 'properties' dictionary is searched for. + A missing property can be matched by using None as the value. + + Corollary 1: you should place more specific matchers before less specific + ones. + + Corollary 2: an empty 'properties' dictionary matches every property set. + + 3) If a matching matcher is found, its 'name' string is presumed to be the name + of the configuration the XML file corresponds to. Otherwise, a warning is + printed. A warning is also printed if two different property sets match to the + same configuration name. + + * 'configurations': [string] + List of names for compile-time and runtime configurations of OpenCV. + Each item will correspond to a column of the sheet. + + * 'module_colors': {string: string} + Mapping from module name to color name. In the sheet, cells containing module + names from this mapping will be colored with the corresponding color. You can + find the list of available colors here: + . + + * 'sheet_name': string + Name for the sheet. If this parameter is missing, the name of sheet's directory + will be used. + + Note that all keys are optional, although to get useful results, you'll want to + specify at least 'configurations' and 'configuration_matchers'. + + Finally, run the script. Use the --help option for usage information. +""" + from __future__ import division import ast @@ -18,21 +82,6 @@ import xlwt from testlog_parser import parseLogFile -# To build XLS report you neet to put your xmls (OpenCV tests output) in the -# following way: -# -# "root" --- folder, representing the whole XLS document. It contains several -# subfolders --- sheet-paths of the XLS document. Each sheet-path contains it's -# subfolders --- config-paths. Config-paths are columns of the sheet and -# they contains xmls files --- output of OpenCV modules testing. -# Config-path means OpenCV build configuration, including different -# options such as NEON, TBB, GPU enabling/disabling. -# -# root -# root\sheet_path -# root\sheet_path\configuration1 (column 1) -# root\sheet_path\configuration2 (column 2) - re_image_size = re.compile(r'^ \d+ x \d+$', re.VERBOSE) re_data_type = re.compile(r'^ (?: 8 | 16 | 32 | 64 ) [USF] C [1234] $', re.VERBOSE)