Auto-generate a complete NOTICE file.
Remove the hand-collated ones, and switch to a script that pulls the copyright headers out of every file and collects the unique ones. Change-Id: Ied3b98b3f56241df97166c410ff81de4e0157c9d
This commit is contained in:
147
libc/tools/generate-NOTICE.py
Executable file
147
libc/tools/generate-NOTICE.py
Executable file
@@ -0,0 +1,147 @@
|
||||
#!/usr/bin/python
|
||||
# Run with directory arguments from any directory, with no special setup required.
|
||||
|
||||
import ftplib
|
||||
import hashlib
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import string
|
||||
import subprocess
|
||||
import sys
|
||||
import tarfile
|
||||
import tempfile
|
||||
|
||||
def IsUninteresting(path):
|
||||
path = path.lower()
|
||||
if path.endswith(".mk") or path.endswith(".py") or path.endswith(".pyc") or path.endswith(".txt") or path.endswith(".3"):
|
||||
return True
|
||||
if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/caveats"):
|
||||
return True
|
||||
if path.endswith("/zoneinfo.dat") or path.endswith("/zoneinfo.idx") or path.endswith("/zoneinfo.version") or path.endswith("/zoneinfo/generate"):
|
||||
return True
|
||||
return False
|
||||
|
||||
def IsAutoGenerated(content):
|
||||
if "generated by gensyscalls.py" in content or "generated by genserv.py" in content:
|
||||
return True
|
||||
if "This header was automatically generated from a Linux kernel header" in content:
|
||||
return True
|
||||
return False
|
||||
|
||||
copyrights = set()
|
||||
|
||||
def ExtractCopyrightAt(lines, i):
|
||||
hash = lines[i].startswith("#")
|
||||
|
||||
# Read comment lines until we hit something that terminates a
|
||||
# copyright header.
|
||||
start = i
|
||||
while i < len(lines):
|
||||
if "*/" in lines[i]:
|
||||
break
|
||||
if hash and len(lines[i]) == 0:
|
||||
break
|
||||
if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:
|
||||
break
|
||||
if "\tcitrus Id: " in lines[i]:
|
||||
break
|
||||
if "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:
|
||||
break
|
||||
if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:
|
||||
break
|
||||
i += 1
|
||||
|
||||
end = i
|
||||
|
||||
# Trim trailing cruft.
|
||||
while end > 0:
|
||||
if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================":
|
||||
break
|
||||
end -= 1
|
||||
|
||||
# Remove C/assembler comment formatting, pulling out just the text.
|
||||
clean_lines = []
|
||||
for line in lines[start:end]:
|
||||
line = line.replace("\t", " ")
|
||||
line = line.replace("/* ", "")
|
||||
line = line.replace(" * ", "")
|
||||
line = line.replace("** ", "")
|
||||
line = line.replace("# ", "")
|
||||
if line.startswith("++Copyright++"):
|
||||
continue
|
||||
line = line.replace("--Copyright--", "")
|
||||
line = line.rstrip()
|
||||
# These come last and take care of "blank" comment lines.
|
||||
if line == "#" or line == " *" or line == "**" or line == "-":
|
||||
line = ""
|
||||
clean_lines.append(line)
|
||||
|
||||
# Trim blank lines from head and tail.
|
||||
while clean_lines[0] == "":
|
||||
clean_lines = clean_lines[1:]
|
||||
while clean_lines[len(clean_lines) - 1] == "":
|
||||
clean_lines = clean_lines[0:(len(clean_lines) - 1)]
|
||||
|
||||
copyright = "\n".join(clean_lines)
|
||||
copyrights.add(copyright)
|
||||
|
||||
return i
|
||||
|
||||
args = sys.argv[1:]
|
||||
if len(args) == 0:
|
||||
args = [ "." ]
|
||||
|
||||
for arg in args:
|
||||
sys.stderr.write('Searching for source files in "%s"...\n' % arg)
|
||||
|
||||
for directory, sub_directories, filenames in os.walk(arg):
|
||||
if ".git" in sub_directories:
|
||||
sub_directories.remove(".git")
|
||||
sub_directories = sorted(sub_directories)
|
||||
|
||||
for filename in sorted(filenames):
|
||||
path = os.path.join(directory, filename)
|
||||
if IsUninteresting(path):
|
||||
#print "ignoring uninteresting file %s" % path
|
||||
continue
|
||||
|
||||
try:
|
||||
content = open(path, 'r').read().decode('utf-8')
|
||||
except:
|
||||
# TODO: update hash.h, md5.c, and md5.h; upstream is probably UTF-8 already.
|
||||
sys.stderr.write('warning: bad UTF-8 in %s\n' % path)
|
||||
content = open(path, 'r').read().decode('iso-8859-1')
|
||||
|
||||
lines = content.split("\n")
|
||||
|
||||
if len(lines) <= 4:
|
||||
#print "ignoring short file %s" % path
|
||||
continue
|
||||
|
||||
if IsAutoGenerated(content):
|
||||
#print "ignoring auto-generated file %s" % path
|
||||
continue
|
||||
|
||||
if not "Copyright" in content:
|
||||
if "public domain" in content.lower():
|
||||
#print "ignoring public domain file %s" % path
|
||||
continue
|
||||
sys.stderr.write('warning: no copyright notice found in "%s" (%d lines)\n' % (path, len(lines)))
|
||||
continue
|
||||
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
if "Copyright" in lines[i]:
|
||||
i = ExtractCopyrightAt(lines, i)
|
||||
i += 1
|
||||
|
||||
#print path
|
||||
|
||||
for copyright in copyrights:
|
||||
print copyright.encode('utf-8')
|
||||
print
|
||||
print '-------------------------------------------------------------------'
|
||||
print
|
||||
|
||||
sys.exit(0)
|
Reference in New Issue
Block a user