bionic/libc/tools/generate-NOTICE.py
Elliott Hughes 5d2f86f363 Fix generate-NOTICE to cope better with BSD __COPYRIGHT macros.
Change-Id: I99f9d2e0a28342663cec6aed483e1a23c12e5e87
2014-07-23 11:10:48 -07:00

160 lines
5.1 KiB
Python
Executable File

#!/usr/bin/python
# Run with directory arguments from any directory, with no special setup required.
# Or:
# for i in libc libdl libm linker libstdc++ libthread_db ; do ./libc/tools/generate-NOTICE.py $i > $i/NOTICE ; done
import ftplib
import hashlib
import os
import re
import shutil
import string
import subprocess
import sys
import tarfile
import tempfile
def IsUninteresting(path):
path = path.lower()
if path.endswith(".mk") or path.endswith(".py") or path.endswith(".pyc") or path.endswith(".txt") or path.endswith(".3"):
return True
if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/caveats"):
return True
if path.endswith("/tzdata") or path.endswith("/zoneinfo/generate"):
return True
return False
def IsAutoGenerated(content):
if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:
return True
if "This header was automatically generated from a Linux kernel header" in content:
return True
return False
copyrights = set()
def ExtractCopyrightAt(lines, i):
hash = lines[i].startswith("#")
# Do we need to back up to find the start of the copyright header?
start = i
if not hash:
while start > 0:
if "/*" in lines[start - 1]:
break
start -= 1
# Read comment lines until we hit something that terminates a
# copyright header.
while i < len(lines):
if "*/" in lines[i]:
break
if hash and len(lines[i]) == 0:
break
if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:
break
if "\tcitrus Id: " in lines[i]:
break
if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:
break
if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:
break
# OpenBSD likes to say where stuff originally came from:
if "Original version ID:" in lines[i]:
break
i += 1
end = i
# Trim trailing cruft.
while end > 0:
if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================":
break
end -= 1
# Remove C/assembler comment formatting, pulling out just the text.
clean_lines = []
for line in lines[start:end]:
line = line.replace("\t", " ")
line = line.replace("/* ", "")
line = re.sub("^ \* ", "", line)
line = line.replace("** ", "")
line = line.replace("# ", "")
if line.startswith("++Copyright++"):
continue
line = line.replace("--Copyright--", "")
line = line.rstrip()
# These come last and take care of "blank" comment lines.
if line == "#" or line == " *" or line == "**" or line == "-":
line = ""
clean_lines.append(line)
# Trim blank lines from head and tail.
while clean_lines[0] == "":
clean_lines = clean_lines[1:]
while clean_lines[len(clean_lines) - 1] == "":
clean_lines = clean_lines[0:(len(clean_lines) - 1)]
copyright = "\n".join(clean_lines)
copyrights.add(copyright)
return i
args = sys.argv[1:]
if len(args) == 0:
args = [ "." ]
for arg in args:
sys.stderr.write('Searching for source files in "%s"...\n' % arg)
for directory, sub_directories, filenames in os.walk(arg):
if ".git" in sub_directories:
sub_directories.remove(".git")
sub_directories = sorted(sub_directories)
for filename in sorted(filenames):
path = os.path.join(directory, filename)
if IsUninteresting(path):
#print "ignoring uninteresting file %s" % path
continue
try:
content = open(path, 'r').read().decode('utf-8')
except:
# TODO: update hash.h, md5.c, and md5.h; upstream is probably UTF-8 already.
sys.stderr.write('warning: bad UTF-8 in %s\n' % path)
content = open(path, 'r').read().decode('iso-8859-1')
lines = content.split("\n")
if len(lines) <= 4:
#print "ignoring short file %s" % path
continue
if IsAutoGenerated(content):
#print "ignoring auto-generated file %s" % path
continue
if not "Copyright" in content:
if "public domain" in content.lower():
#print "ignoring public domain file %s" % path
continue
sys.stderr.write('warning: no copyright notice found in "%s" (%d lines)\n' % (path, len(lines)))
continue
i = 0
while i < len(lines):
if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
i = ExtractCopyrightAt(lines, i)
i += 1
#print path
for copyright in sorted(copyrights):
print copyright.encode('utf-8')
print
print '-------------------------------------------------------------------'
print
sys.exit(0)