Merge "Change ZoneCompator to generate the single "tzdata" file."

This commit is contained in:
Elliott Hughes 2012-10-19 17:12:37 -07:00 committed by Gerrit Code Review
commit dc595d8308
6 changed files with 268 additions and 320 deletions

View File

@ -7,8 +7,7 @@ import libcore.util.ZoneInfo;
// usage: java ZoneCompiler <setup file> <data directory> <output directory> <tzdata version> // usage: java ZoneCompiler <setup file> <data directory> <output directory> <tzdata version>
// //
// Compile a set of tzfile-formatted files into a single file plus // Compile a set of tzfile-formatted files into a single file containing an index.
// an index file.
// //
// The compilation is controlled by a setup file, which is provided as a // The compilation is controlled by a setup file, which is provided as a
// command-line argument. The setup file has the form: // command-line argument. The setup file has the form:
@ -18,195 +17,219 @@ import libcore.util.ZoneInfo;
// <zone filename> // <zone filename>
// ... // ...
// //
// Note that the links must be declared prior to the zone names. A // Note that the links must be declared prior to the zone names.
// zone name is a filename relative to the source directory such as // A zone name is a filename relative to the source directory such as
// 'GMT', 'Africa/Dakar', or 'America/Argentina/Jujuy'. // 'GMT', 'Africa/Dakar', or 'America/Argentina/Jujuy'.
// //
// Use the 'zic' command-line tool to convert from flat files // Use the 'zic' command-line tool to convert from flat files
// (e.g., 'africa', 'northamerica') into a suitable source directory // (such as 'africa' or 'northamerica') to a directory
// hierarchy for this tool (e.g., 'data/Africa/Abidjan'). // hierarchy suitable for this tool (containing files such as 'data/Africa/Abidjan').
// //
// Example:
// zic -d data tz2007h
// javac ZoneCompactor.java
// java ZoneCompactor setup data
// <produces zoneinfo.dat and zoneinfo.idx>
public class ZoneCompactor { public class ZoneCompactor {
public static class ByteArrayBufferIteratorBE extends BufferIterator { public static class ByteArrayBufferIteratorBE extends BufferIterator {
private final byte[] bytes; private final byte[] bytes;
private int offset = 0; private int offset = 0;
public ByteArrayBufferIteratorBE(byte[] bytes) { public ByteArrayBufferIteratorBE(byte[] bytes) {
this.bytes = bytes; this.bytes = bytes;
this.offset = 0; this.offset = 0;
}
public void seek(int offset) {
this.offset = offset;
}
public void skip(int byteCount) {
this.offset += byteCount;
}
public void readByteArray(byte[] dst, int dstOffset, int byteCount) {
System.arraycopy(bytes, offset, dst, dstOffset, byteCount);
offset += byteCount;
}
public byte readByte() {
return bytes[offset++];
}
public int readInt() {
return ((readByte() & 0xff) << 24) | ((readByte() & 0xff) << 16) | ((readByte() & 0xff) << 8) | (readByte() & 0xff);
}
public void readIntArray(int[] dst, int dstOffset, int intCount) {
for (int i = 0; i < intCount; ++i) {
dst[dstOffset++] = readInt();
}
}
public short readShort() {
throw new UnsupportedOperationException();
}
} }
// Maximum number of characters in a zone name, including '\0' terminator public void seek(int offset) {
private static final int MAXNAME = 40; this.offset = offset;
// Zone name synonyms
private Map<String,String> links = new HashMap<String,String>();
// File starting bytes by zone name
private Map<String,Integer> starts = new HashMap<String,Integer>();
// File lengths by zone name
private Map<String,Integer> lengths = new HashMap<String,Integer>();
// Raw GMT offsets by zone name
private Map<String,Integer> offsets = new HashMap<String,Integer>();
private int start = 0;
// Concatenate the contents of 'inFile' onto 'out'
// and return the contents as a byte array.
private static byte[] copyFile(File inFile, OutputStream out) throws Exception {
byte[] ret = new byte[0];
InputStream in = new FileInputStream(inFile);
byte[] buf = new byte[8192];
while (true) {
int nbytes = in.read(buf);
if (nbytes == -1) {
break;
}
out.write(buf, 0, nbytes);
byte[] nret = new byte[ret.length + nbytes];
System.arraycopy(ret, 0, nret, 0, ret.length);
System.arraycopy(buf, 0, nret, ret.length, nbytes);
ret = nret;
}
out.flush();
return ret;
} }
// Write a 32-bit integer in network byte order public void skip(int byteCount) {
private void writeInt(OutputStream os, int x) throws IOException { this.offset += byteCount;
os.write((x >> 24) & 0xff);
os.write((x >> 16) & 0xff);
os.write((x >> 8) & 0xff);
os.write( x & 0xff);
} }
public ZoneCompactor(String setupFile, String dataDirectory, String outputDirectory, String version) throws Exception { public void readByteArray(byte[] dst, int dstOffset, int byteCount) {
File zoneInfoFile = new File(outputDirectory, "zoneinfo.dat"); System.arraycopy(bytes, offset, dst, dstOffset, byteCount);
zoneInfoFile.delete(); offset += byteCount;
OutputStream zoneInfo = new FileOutputStream(zoneInfoFile);
BufferedReader rdr = new BufferedReader(new FileReader(setupFile));
String s;
while ((s = rdr.readLine()) != null) {
s = s.trim();
if (s.startsWith("Link")) {
StringTokenizer st = new StringTokenizer(s);
st.nextToken();
String to = st.nextToken();
String from = st.nextToken();
links.put(from, to);
} else {
String link = links.get(s);
if (link == null) {
File f = new File(dataDirectory, s);
long length = f.length();
starts.put(s, new Integer(start));
lengths.put(s, new Integer((int)length));
start += length;
byte[] data = copyFile(f, zoneInfo);
BufferIterator it = new ByteArrayBufferIteratorBE(data);
TimeZone tz = ZoneInfo.makeTimeZone(s, it);
int gmtOffset = tz.getRawOffset();
offsets.put(s, new Integer(gmtOffset));
}
}
}
zoneInfo.close();
// Fill in fields for links
Iterator<String> iter = links.keySet().iterator();
while (iter.hasNext()) {
String from = iter.next();
String to = links.get(from);
starts.put(from, starts.get(to));
lengths.put(from, lengths.get(to));
offsets.put(from, offsets.get(to));
}
File idxFile = new File(outputDirectory, "zoneinfo.idx");
idxFile.delete();
FileOutputStream idx = new FileOutputStream(idxFile);
ArrayList<String> l = new ArrayList<String>();
l.addAll(starts.keySet());
Collections.sort(l);
Iterator<String> ziter = l.iterator();
while (ziter.hasNext()) {
String zname = ziter.next();
if (zname.length() >= MAXNAME) {
System.err.println("Error - zone filename exceeds " +
(MAXNAME - 1) + " characters!");
}
byte[] znameBuf = new byte[MAXNAME];
for (int i = 0; i < zname.length(); i++) {
znameBuf[i] = (byte)zname.charAt(i);
}
idx.write(znameBuf);
writeInt(idx, starts.get(zname).intValue());
writeInt(idx, lengths.get(zname).intValue());
writeInt(idx, offsets.get(zname).intValue());
}
idx.close();
OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(new File(outputDirectory, "zoneinfo.version")), "US-ASCII");
writer.write(version);
writer.write('\n');
writer.close();
// System.out.println("maxLength = " + maxLength);
} }
public static void main(String[] args) throws Exception { public byte readByte() {
if (args.length != 4) { return bytes[offset++];
System.err.println("usage: java ZoneCompactor <setup file> <data directory> <output directory> <tzdata version>");
System.exit(0);
}
new ZoneCompactor(args[0], args[1], args[2], args[3]);
} }
public int readInt() {
return ((readByte() & 0xff) << 24) | ((readByte() & 0xff) << 16) | ((readByte() & 0xff) << 8) | (readByte() & 0xff);
}
public void readIntArray(int[] dst, int dstOffset, int intCount) {
for (int i = 0; i < intCount; ++i) {
dst[dstOffset++] = readInt();
}
}
public short readShort() {
throw new UnsupportedOperationException();
}
}
// Maximum number of characters in a zone name, including '\0' terminator
private static final int MAXNAME = 40;
// Zone name synonyms
private Map<String,String> links = new HashMap<String,String>();
// File starting bytes by zone name
private Map<String,Integer> starts = new HashMap<String,Integer>();
// File lengths by zone name
private Map<String,Integer> lengths = new HashMap<String,Integer>();
// Raw GMT offsets by zone name
private Map<String,Integer> offsets = new HashMap<String,Integer>();
private int start = 0;
// Concatenate the contents of 'inFile' onto 'out'
// and return the contents as a byte array.
private static byte[] copyFile(File inFile, OutputStream out) throws Exception {
byte[] ret = new byte[0];
InputStream in = new FileInputStream(inFile);
byte[] buf = new byte[8192];
while (true) {
int nbytes = in.read(buf);
if (nbytes == -1) {
break;
}
out.write(buf, 0, nbytes);
byte[] nret = new byte[ret.length + nbytes];
System.arraycopy(ret, 0, nret, 0, ret.length);
System.arraycopy(buf, 0, nret, ret.length, nbytes);
ret = nret;
}
out.flush();
return ret;
}
public ZoneCompactor(String setupFile, String dataDirectory, String outputDirectory, String version) throws Exception {
// Read the setup file, and concatenate all the data.
ByteArrayOutputStream allData = new ByteArrayOutputStream();
BufferedReader reader = new BufferedReader(new FileReader(setupFile));
String s;
while ((s = reader.readLine()) != null) {
s = s.trim();
if (s.startsWith("Link")) {
StringTokenizer st = new StringTokenizer(s);
st.nextToken();
String to = st.nextToken();
String from = st.nextToken();
links.put(from, to);
} else {
String link = links.get(s);
if (link == null) {
File sourceFile = new File(dataDirectory, s);
long length = sourceFile.length();
starts.put(s, start);
lengths.put(s, (int) length);
start += length;
byte[] data = copyFile(sourceFile, allData);
BufferIterator it = new ByteArrayBufferIteratorBE(data);
TimeZone tz = ZoneInfo.makeTimeZone(s, it);
int gmtOffset = tz.getRawOffset();
offsets.put(s, gmtOffset);
}
}
}
// Fill in fields for links.
Iterator<String> it = links.keySet().iterator();
while (it.hasNext()) {
String from = it.next();
String to = links.get(from);
starts.put(from, starts.get(to));
lengths.put(from, lengths.get(to));
offsets.put(from, offsets.get(to));
}
// Create/truncate the destination file.
RandomAccessFile f = new RandomAccessFile(new File(outputDirectory, "tzdata"), "rw");
f.setLength(0);
// Write the header.
// byte[12] tzdata_version -- 'tzdata2012f\0'
// int file_format_version -- probably won't need this, but just in case
// int index_offset -- likewise
// int data_offset
// int zonetab_offset
// tzdata_version
f.write(toAscii(new byte[12], version));
// file_format_version
f.writeInt(1);
// Write dummy values for the three offsets, and remember where we need to seek back to later
// when we have the real values.
int index_offset_offset = (int) f.getFilePointer();
f.writeInt(0);
int data_offset_offset = (int) f.getFilePointer();
f.writeInt(0);
int zonetab_offset_offset = (int) f.getFilePointer();
f.writeInt(0);
int index_offset = (int) f.getFilePointer();
// Write the index.
ArrayList<String> sortedOlsonIds = new ArrayList<String>();
sortedOlsonIds.addAll(starts.keySet());
Collections.sort(sortedOlsonIds);
it = sortedOlsonIds.iterator();
while (it.hasNext()) {
String zoneName = it.next();
if (zoneName.length() >= MAXNAME) {
throw new RuntimeException("zone filename too long: " + zoneName.length());
}
f.write(toAscii(new byte[MAXNAME], zoneName));
f.writeInt(starts.get(zoneName));
f.writeInt(lengths.get(zoneName));
f.writeInt(offsets.get(zoneName));
}
int data_offset = (int) f.getFilePointer();
// Write the data.
f.write(allData.toByteArray());
// TODO: append the zonetab.
int zonetab_offset = 0;
// Go back and fix up the offsets in the header.
f.seek(index_offset_offset);
f.writeInt(index_offset);
f.seek(data_offset_offset);
f.writeInt(data_offset);
f.seek(zonetab_offset_offset);
f.writeInt(zonetab_offset);
f.close();
}
private static byte[] toAscii(byte[] dst, String src) {
for (int i = 0; i < src.length(); ++i) {
if (src.charAt(i) > '~') {
throw new RuntimeException("non-ASCII string: " + src);
}
dst[i] = (byte) src.charAt(i);
}
return dst;
}
public static void main(String[] args) throws Exception {
if (args.length != 4) {
System.err.println("usage: java ZoneCompactor <setup file> <data directory> <output directory> <tzdata version>");
System.exit(0);
}
new ZoneCompactor(args[0], args[1], args[2], args[3]);
}
} }

View File

@ -1,12 +1,10 @@
#!/usr/bin/python #!/usr/bin/python
# Run with no arguments from any directory, with no special setup required.
"""Updates the tzdata file."""
import ftplib import ftplib
import hashlib
import os import os
import re import re
import shutil
import string
import subprocess import subprocess
import sys import sys
import tarfile import tarfile
@ -18,41 +16,58 @@ bionic_libc_tools_dir = os.path.dirname(bionic_libc_tools_zoneinfo_dir)
bionic_libc_dir = os.path.dirname(bionic_libc_tools_dir) bionic_libc_dir = os.path.dirname(bionic_libc_tools_dir)
bionic_dir = os.path.dirname(bionic_libc_dir) bionic_dir = os.path.dirname(bionic_libc_dir)
bionic_libc_zoneinfo_dir = '%s/libc/zoneinfo' % bionic_dir bionic_libc_zoneinfo_dir = '%s/libc/zoneinfo' % bionic_dir
if not os.path.isdir(bionic_libc_tools_zoneinfo_dir) or not os.path.isdir(bionic_libc_zoneinfo_dir):
if not os.path.isdir(bionic_libc_tools_zoneinfo_dir):
print "Couldn't find bionic/libc/tools/zoneinfo!" print "Couldn't find bionic/libc/tools/zoneinfo!"
sys.exit(1) sys.exit(1)
if not os.path.isdir(bionic_libc_zoneinfo_dir):
print "Couldn't find bionic/libc/zoneinfo!"
sys.exit(1)
print 'Found bionic in %s...' % bionic_dir print 'Found bionic in %s...' % bionic_dir
regions = ['africa', 'antarctica', 'asia', 'australasia', 'backward', 'etcetera', 'europe', 'northamerica', 'southamerica'] regions = ['africa', 'antarctica', 'asia', 'australasia', 'backward',
'etcetera', 'europe', 'northamerica', 'southamerica']
def current_tzdata_version(): def GetCurrentTzDataVersion():
return open('%s/zoneinfo.version' % bionic_libc_zoneinfo_dir).readline().rstrip('\n') return open('%s/tzdata' % bionic_libc_zoneinfo_dir).read().split('\0', 1)[0]
def md5_file(filename): def WriteSetupFile():
md5 = hashlib.md5() links = []
f = open(filename, 'rb') zones = []
while True: for region in regions:
data = f.read(8192) for line in open('extracted/%s' % region):
if not data: fields = line.split()
break if len(fields) == 0:
md5.update(data) continue
return md5.hexdigest() elif fields[0] == 'Link':
links.append('%s %s %s\n' % (fields[0], fields[1], fields[2]))
zones.append(fields[2])
elif fields[0] == 'Zone':
zones.append(fields[1])
zones.sort()
setup = open('setup', 'w')
for link in links:
setup.write(link)
for zone in zones:
setup.write('%s\n' % zone)
setup.close()
def upgrade_to(ftp, filename): def UpgradeTo(ftp, filename):
version = re.search('tzdata(.+)\.tar\.gz', filename).group(1) new_version = re.search('(tzdata.+)\.tar\.gz', filename).group(1)
# Switch to a temporary directory. # Switch to a temporary directory.
tmp_dir = tempfile.mkdtemp('-tzdata') tmp_dir = tempfile.mkdtemp('-tzdata')
os.chdir(tmp_dir) os.chdir(tmp_dir)
print 'Created temporary directory "%s"...' % tmp_dir print 'Created temporary directory "%s"...' % tmp_dir
print 'Downloading %s...' % filename print 'Downloading...'
ftp.retrbinary('RETR %s' % filename, open(filename, 'wb').write) ftp.retrbinary('RETR %s' % filename, open(filename, 'wb').write)
print 'MD5: %s' % md5_file(filename)
print 'Extracting...' print 'Extracting...'
os.mkdir('extracted') os.mkdir('extracted')
@ -65,58 +80,46 @@ def upgrade_to(ftp, filename):
if region != 'backward': if region != 'backward':
subprocess.check_call(['zic', '-d', 'data', 'extracted/%s' % region]) subprocess.check_call(['zic', '-d', 'data', 'extracted/%s' % region])
# Collect the data ZoneCompactor needs. WriteSetupFile()
links = []
zones = []
for region in regions:
for line in open('extracted/%s' % region).readlines():
fields = string.split(line)
if len(fields) == 0:
continue
elif fields[0] == 'Link':
links.append('%s %s %s\n' % (fields[0], fields[1], fields[2]))
zones.append(fields[2])
elif fields[0] == 'Zone':
zones.append(fields[1])
zones.sort()
# Write it into the "setup" file. print 'Calling ZoneCompactor to update bionic to %s...' % new_version
setup = open('setup', 'w')
for link in links:
setup.write(link)
for zone in zones:
setup.write('%s\n' % zone)
setup.close()
print 'Calling ZoneCompactor to update bionic from %s to %s...' % (current_tzdata_version(), version)
libcore_src_dir = '%s/../libcore/luni/src/main/java/' % bionic_dir libcore_src_dir = '%s/../libcore/luni/src/main/java/' % bionic_dir
subprocess.check_call(['javac', '-d', '.', subprocess.check_call(['javac', '-d', '.',
'%s/ZoneCompactor.java' % bionic_libc_tools_zoneinfo_dir, '%s/ZoneCompactor.java' % bionic_libc_tools_zoneinfo_dir,
'%s/libcore/util/ZoneInfo.java' % libcore_src_dir, '%s/libcore/util/ZoneInfo.java' % libcore_src_dir,
'%s/libcore/io/BufferIterator.java' % libcore_src_dir]) '%s/libcore/io/BufferIterator.java' % libcore_src_dir])
subprocess.check_call(['java', 'ZoneCompactor', 'setup', 'data', bionic_libc_zoneinfo_dir, version]) subprocess.check_call(['java', 'ZoneCompactor',
'setup', 'data', bionic_libc_zoneinfo_dir, new_version])
# URL from "Sources for Time Zone and Daylight Saving Time Data" # Run with no arguments from any directory, with no special setup required.
# http://www.twinsun.com/tz/tz-link.htm def main():
# URL from "Sources for Time Zone and Daylight Saving Time Data"
# http://www.twinsun.com/tz/tz-link.htm
print 'Looking for new tzdata...' print 'Looking for new tzdata...'
ftp = ftplib.FTP('ftp.iana.org') ftp = ftplib.FTP('ftp.iana.org')
ftp.login() ftp.login()
ftp.cwd('tz/releases') ftp.cwd('tz/releases')
tzdata_filenames = [] tzdata_filenames = []
for filename in ftp.nlst(): for filename in ftp.nlst():
if filename.startswith('tzdata20'): if filename.startswith('tzdata20'):
tzdata_filenames.append(filename) tzdata_filenames.append(filename)
tzdata_filenames.sort() tzdata_filenames.sort()
# If you're several releases behind, we'll walk you through the upgrades one by one. # If you're several releases behind, we'll walk you through the upgrades
current_version = current_tzdata_version() # one by one.
current_filename = 'tzdata%s.tar.gz' % current_version current_version = GetCurrentTzDataVersion()
for filename in tzdata_filenames: current_filename = '%s.tar.gz' % current_version
if filename > current_filename: for filename in tzdata_filenames:
upgrade_to(ftp, filename) if filename > current_filename:
sys.exit(0) print 'Found new tzdata: %s' % filename
UpgradeTo(ftp, filename)
sys.exit(0)
print 'You already have the latest tzdata (%s)!' % current_version print 'You already have the latest tzdata (%s)!' % current_version
sys.exit(0) sys.exit(0)
if __name__ == '__main__':
main()

View File

@ -1,77 +0,0 @@
#!/usr/bin/python
# Run with no arguments from any directory, with no special setup required.
import ftplib
import hashlib
import os
import re
import shutil
import string
import struct
import subprocess
import sys
import tarfile
import tempfile
# Find the bionic directory, searching upward from this script.
bionic_libc_tools_zoneinfo_dir = os.path.realpath(os.path.dirname(sys.argv[0]))
bionic_libc_tools_dir = os.path.dirname(bionic_libc_tools_zoneinfo_dir)
bionic_libc_dir = os.path.dirname(bionic_libc_tools_dir)
bionic_dir = os.path.dirname(bionic_libc_dir)
bionic_libc_zoneinfo_dir = '%s/libc/zoneinfo' % bionic_dir
if not os.path.isdir(bionic_libc_tools_zoneinfo_dir) or not os.path.isdir(bionic_libc_zoneinfo_dir):
print "Couldn't find bionic/libc/tools/zoneinfo!"
sys.exit(1)
def current_tzdata_version():
return open('%s/zoneinfo.version' % bionic_libc_zoneinfo_dir).readline().rstrip('\n')
# TODO: make the regular "generate" script just output this format directly.
# Open the output file.
f = open('%s/tzdata' % bionic_libc_zoneinfo_dir, 'wb+')
# -- header
# char[12] tzdata_version -- 'tzdata2012f\0'
# u32 file_format_version -- probably won't need this, but just in case
# u32 index_offset -- likewise
# u32 data_offset
# u32 zonetab_offset
header_format = "! 12s i i i i"
header_size = struct.calcsize(header_format)
index_offset = header_size
index_bytes = open('%s/zoneinfo.idx' % bionic_libc_zoneinfo_dir, "rb").read()
index_size = len(index_bytes)
data_offset = index_offset + index_size
data_bytes = open('%s/zoneinfo.dat' % bionic_libc_zoneinfo_dir).read()
data_size = len(data_bytes)
zonetab_offset = 0 # TODO: data_offset + data_size
tzdata_version = current_tzdata_version()
file_format_version = 1
header = struct.pack(header_format, 'tzdata%s' % tzdata_version, file_format_version, index_offset, data_offset, zonetab_offset)
f.write(header)
# -- index (@index_offset)
# u8* index_bytes
f.write(index_bytes)
# -- data (@data_offset)
# u8* data_bytes
f.write(data_bytes)
# TODO: zonetab
# -- zonetab (@zonetab_offset)
# u8* zonetab_bytes
f.close()
sys.exit(0)

Binary file not shown.

Binary file not shown.

View File

@ -1 +0,0 @@
2012g