Add symbol checking script to libc++ to help manage exported symbols.

Summary:
Add symbol checking scripts for extracting a list of symbols from shared libraries and for comparing symbol lists for differences.



Reviewers: mclow.lists, danalbert, EricWF

Reviewed By: EricWF

Subscribers: majnemer, emaste, cfe-commits

Differential Revision: http://reviews.llvm.org/D4946

git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@232855 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Eric Fiselier 2015-03-20 22:09:29 +00:00
parent ffbfbcda66
commit 2d9feb59f0
10 changed files with 492 additions and 0 deletions

View File

@ -0,0 +1,19 @@
# all guard variables
_ZGVNSt3__
# all vtables
_ZTV
# all VTT
_ZTT
# all non-virtual thunks
_ZTh
# all virtual thunks
_ZTv
# typeinfo for std::__1::__types
# There are no std::__types
_ZTINSt3__1[0-9][0-9]*__
# typeinfo name for std::__1::__types
_ZTSNSt3__1[0-9][0-9]*__
# anything using __hidden_allocator
.*__hidden_allocator
# anything using __sso_allocator
.*__sso_allocator

View File

@ -0,0 +1,19 @@
# all guard variables
__ZGVNSt3__
# all vtables
__ZTV
# all VTT
__ZTT
# all non-virtual thunks
__ZTh
# all virtual thunks
__ZTv
# typeinfo for std::__1::__types
# There are no std::__types
__ZTINSt3__1[0-9][0-9]*__
# typeinfo name for std::__1::__types
__ZTSNSt3__1[0-9][0-9]*__
# anything using __hidden_allocator
.*__hidden_allocator
# anything using __sso_allocator
.*__sso_allocator

View File

@ -0,0 +1,8 @@
"""libcxx abi symbol checker"""
__author__ = 'Eric Fiselier'
__email__ = 'eric@efcs.ca'
__versioninfo__ = (0, 1, 0)
__version__ = ' '.join(str(v) for v in __versioninfo__) + 'dev'
__all__ = ['diff', 'extract', 'util']

View File

@ -0,0 +1,93 @@
# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80:
"""
diff - A set of functions for diff-ing two symbol lists.
"""
from sym_check import util
def _symbol_difference(lhs, rhs):
lhs_names = set((n['name'] for n in lhs))
rhs_names = set((n['name'] for n in rhs))
diff_names = lhs_names - rhs_names
return [n for n in lhs if n['name'] in diff_names]
def _find_by_key(sym_list, k):
for sym in sym_list:
if sym['name'] == k:
return sym
return None
def added_symbols(old, new):
return _symbol_difference(new, old)
def removed_symbols(old, new):
return _symbol_difference(old, new)
def changed_symbols(old, new):
changed = []
for old_sym in old:
if old_sym in new:
continue
new_sym = _find_by_key(new, old_sym['name'])
if (new_sym is not None and not new_sym in old
and cmp(old_sym, new_sym) != 0):
changed += [(old_sym, new_sym)]
return changed
def diff(old, new):
added = added_symbols(old, new)
removed = removed_symbols(old, new)
changed = changed_symbols(old, new)
return added, removed, changed
def report_diff(added_syms, removed_syms, changed_syms, names_only=False,
demangle=True):
def maybe_demangle(name):
return util.demangle_symbol(name) if demangle else name
report = ''
for sym in added_syms:
report += 'Symbol added: %s\n' % maybe_demangle(sym['name'])
if not names_only:
report += ' %s\n\n' % sym
if added_syms and names_only:
report += '\n'
for sym in removed_syms:
report += 'SYMBOL REMOVED: %s\n' % maybe_demangle(sym['name'])
if not names_only:
report += ' %s\n\n' % sym
if removed_syms and names_only:
report += '\n'
if not names_only:
for sym_pair in changed_syms:
old_sym, new_sym = sym_pair
old_str = '\n OLD SYMBOL: %s' % old_sym
new_str = '\n NEW SYMBOL: %s' % new_sym
report += ('SYMBOL CHANGED: %s%s%s\n\n' %
(maybe_demangle(old_sym['name']),
old_str, new_str))
added = bool(len(added_syms) != 0)
abi_break = bool(len(removed_syms))
if not names_only:
abi_break = abi_break or len(changed_syms)
if added or abi_break:
report += 'Summary\n'
report += ' Added: %d\n' % len(added_syms)
report += ' Removed: %d\n' % len(removed_syms)
if not names_only:
report += ' Changed: %d\n' % len(changed_syms)
if not abi_break:
report += 'Symbols added.'
else:
report += 'ABI BREAKAGE: SYMBOLS ADDED OR REMOVED!'
else:
report += 'Symbols match.'
return report, int(abi_break)

View File

@ -0,0 +1,101 @@
# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80:
"""
extract - A set of function that extract symbol lists from shared libraries.
"""
import distutils.spawn
import sys
from sym_check import util
class Extractor(object):
"""
Extractor - Extract symbol lists from libraries using nm.
"""
@staticmethod
def find_nm():
"""
Search for the nm executable and return the path and type.
"""
nm_exe = distutils.spawn.find_executable('nm')
if nm_exe is not None:
return nm_exe
# ERROR no NM found
print("ERROR: Could not find nm")
sys.exit(1)
def __init__(self):
"""
Initialize the nm executable and flags that will be used to extract
symbols from shared libraries.
"""
self.nm_exe = Extractor.find_nm()
self.flags = ['-P', '-g']
def extract(self, lib):
"""
Extract symbols from a library and return the results as a dict of
parsed symbols.
"""
cmd = [self.nm_exe] + self.flags + [lib]
out, _, exit_code = util.execute_command_verbose(cmd)
if exit_code != 0:
raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib))
fmt_syms = (self._extract_sym(l)
for l in out.splitlines() if l.strip())
# Cast symbol to string.
final_syms = (repr(s) for s in fmt_syms if self._want_sym(s))
# Make unique and sort strings.
tmp_list = list(sorted(set(final_syms)))
# Cast string back to symbol.
return util.read_syms_from_list(tmp_list)
def _extract_sym(self, sym_str):
bits = sym_str.split()
# Everything we want has at least two columns.
if len(bits) < 2:
return None
new_sym = {
'name': bits[0],
'type': bits[1]
}
new_sym = self._transform_sym_type(new_sym)
# NM types which we want to save the size for.
if new_sym['type'] == 'OBJECT' and len(bits) > 3:
new_sym['size'] = int(bits[3], 16)
return new_sym
@staticmethod
def _want_sym(sym):
"""
Check that s is a valid symbol that we want to keep.
"""
if sym is None or len(sym) < 2:
return False
bad_types = ['t', 'b', 'r', 'd', 'w']
return sym['type'] not in bad_types
@staticmethod
def _transform_sym_type(sym):
"""
Map the nm single letter output for type to either FUNC or OBJECT.
If the type is not recognized it is left unchanged.
"""
func_types = ['T', 'W']
obj_types = ['B', 'D', 'R', 'V', 'S']
if sym['type'] in func_types:
sym['type'] = 'FUNC'
elif sym['type'] in obj_types:
sym['type'] = 'OBJECT'
return sym
def extract_symbols(lib_file):
"""
Extract and return a list of symbols extracted from a dynamic library.
The symbols are extracted using NM. They are then filtered and formated.
Finally they symbols are made unique.
"""
extractor = Extractor()
return extractor.extract(lib_file)

View File

@ -0,0 +1,32 @@
# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80:
"""
match - A set of functions for matching symbols in a list to a list of regexs
"""
import re
def find_and_report_matching(symbol_list, regex_list):
report = ''
found_count = 0
for regex_str in regex_list:
report += 'Matching regex "%s":\n' % regex_str
matching_list = find_matching_symbols(symbol_list, regex_str)
if not matching_list:
report += ' No matches found\n\n'
continue
# else
found_count += len(matching_list)
for m in matching_list:
report += ' MATCHES: %s\n' % m['name']
report += '\n'
return found_count, report
def find_matching_symbols(symbol_list, regex_str):
regex = re.compile(regex_str)
matching_list = []
for s in symbol_list:
if regex.match(s['name']):
matching_list += [s]
return matching_list

View File

@ -0,0 +1,102 @@
import ast
import distutils.spawn
import signal
import subprocess
import sys
def execute_command(cmd, input_str=None):
"""
Execute a command, capture and return its output.
"""
kwargs = {
'stdin': subprocess.PIPE,
'stdout': subprocess.PIPE,
'stderr': subprocess.PIPE,
}
p = subprocess.Popen(cmd, **kwargs)
out, err = p.communicate(input=input_str)
exitCode = p.wait()
if exitCode == -signal.SIGINT:
raise KeyboardInterrupt
return out, err, exitCode
def execute_command_verbose(cmd, input_str=None):
"""
Execute a command and print its output on failure.
"""
out, err, exitCode = execute_command(cmd, input_str=input_str)
if exitCode != 0:
report = "Command: %s\n" % ' '.join(["'%s'" % a for a in cmd])
report += "Exit Code: %d\n" % exitCode
if out:
report += "Standard Output:\n--\n%s--" % out
if err:
report += "Standard Error:\n--\n%s--" % err
report += "\n\nFailed!"
sys.stderr.write('%s\n' % report)
return out, err, exitCode
def read_syms_from_list(slist):
"""
Read a list of symbols from a list of strings.
Each string is one symbol.
"""
return [ast.literal_eval(l) for l in slist]
def read_syms_from_file(filename):
"""
Read a list of symbols in from a file.
"""
with open(filename, 'r') as f:
data = f.read()
return read_syms_from_list(data.splitlines())
def read_blacklist(filename):
with open(filename, 'r') as f:
data = f.read()
lines = [l.strip() for l in data.splitlines() if l.strip()]
lines = [l for l in lines if not l.startswith('#')]
return lines
def write_syms(sym_list, out=None, names_only=False):
"""
Write a list of symbols to the file named by out.
"""
out_str = ''
out_list = sym_list
if names_only:
out_list = [sym['name'] for sym in sym_list]
out_list.sort()
for sym in out_list:
out_str += '%s\n' % sym
if out is None:
sys.stdout.write(out_str)
else:
with open(out, 'w') as f:
f.write(out_str)
_cppfilt_exe = distutils.spawn.find_executable('c++filt')
def demangle_symbol(symbol):
if _cppfilt_exe is None:
return symbol
out, _, exit_code = execute_command_verbose(
[_cppfilt_exe], input_str=symbol)
if exit_code != 0:
return symbol
return out
def extract_or_load(filename):
import sym_check.extract
if filename.endswith('.so') or filename.endswith('.dylib'):
return extract.extract_symbols(filename)
return read_syms_from_file(filename)

47
utils/sym_check/sym_diff.py Executable file
View File

@ -0,0 +1,47 @@
#!/usr/bin/env python
"""
sym_diff - Compare two symbol lists and output the differences.
"""
from argparse import ArgumentParser
import sys
from sym_check import diff, util
def main():
parser = ArgumentParser(
description='Extract a list of symbols from a shared library.')
parser.add_argument(
'--names-only', dest='names_only',
help='Only print symbol names',
action='store_true', default=False)
parser.add_argument(
'-o', '--output', dest='output',
help='The output file. stdout is used if not given',
type=str, action='store', default=None)
parser.add_argument(
'--demangle', dest='demangle', action='store_true', default=False)
parser.add_argument(
'old_syms', metavar='old-syms', type=str,
help='The file containing the old symbol list or a library')
parser.add_argument(
'new_syms', metavar='new-syms', type=str,
help='The file containing the new symbol list or a library')
args = parser.parse_args()
old_syms_list = util.extract_or_load(args.old_syms)
new_syms_list = util.extract_or_load(args.new_syms)
added, removed, changed = diff.diff(old_syms_list, new_syms_list)
report, is_break = diff.report_diff(added, removed, changed,
names_only=args.names_only,
demangle=args.demangle)
if args.output is None:
print(report)
else:
with open(args.output, 'w') as f:
f.write(report + '\n')
sys.exit(is_break)
if __name__ == '__main__':
main()

29
utils/sym_check/sym_extract.py Executable file
View File

@ -0,0 +1,29 @@
#!/usr/bin/env python
"""
sym_extract - Extract and output a list of symbols from a shared library.
"""
from argparse import ArgumentParser
from sym_check import extract, util
def main():
parser = ArgumentParser(
description='Extract a list of symbols from a shared library.')
parser.add_argument('library', metavar='shared-lib', type=str,
help='The library to extract symbols from')
parser.add_argument('-o', '--output', dest='output',
help='The output file. stdout is used if not given',
type=str, action='store', default=None)
parser.add_argument('--names-only', dest='names_only',
help='Output only the name of the symbol',
action='store_true', default=False)
args = parser.parse_args()
if args.output is not None:
print('Extracting symbols from %s to %s.'
% (args.library, args.output))
syms = extract.extract_symbols(args.library)
util.write_syms(syms, out=args.output, names_only=args.names_only)
if __name__ == '__main__':
main()

42
utils/sym_check/sym_match.py Executable file
View File

@ -0,0 +1,42 @@
#!/usr/bin/env python
"""
sym_match - Match all symbols in a list against a list of regexes.
"""
from argparse import ArgumentParser
import sys
from sym_check import util, match, extract
def main():
parser = ArgumentParser(
description='Extract a list of symbols from a shared library.')
parser.add_argument(
'--blacklist', dest='blacklist',
type=str, action='store', default=None)
parser.add_argument(
'symbol_list', metavar='symbol_list', type=str,
help='The file containing the old symbol list')
parser.add_argument(
'regexes', metavar='regexes', default=[], nargs='*',
help='The file containing the new symbol list or a library')
args = parser.parse_args()
if not args.regexes and args.blacklist is None:
sys.stderr.write('Either a regex or a blacklist must be specified.\n')
sys.exit(1)
if args.blacklist:
search_list = util.read_blacklist(args.blacklist)
else:
search_list = args.regexes
symbol_list = util.extract_or_load(args.symbol_list)
matching_count, report = match.find_and_report_matching(
symbol_list, search_list)
sys.stdout.write(report)
if matching_count != 0:
print('%d matching symbols found...' % matching_count)
if __name__ == '__main__':
main()