Merge "Change ZoneCompator to generate the single "tzdata" file."

This commit is contained in:
Elliott Hughes 2012-10-19 17:12:37 -07:00 committed by Gerrit Code Review
commit dc595d8308
6 changed files with 268 additions and 320 deletions

View File

@ -7,8 +7,7 @@ import libcore.util.ZoneInfo;
// usage: java ZoneCompiler <setup file> <data directory> <output directory> <tzdata version>
//
// Compile a set of tzfile-formatted files into a single file plus
// an index file.
// Compile a set of tzfile-formatted files into a single file containing an index.
//
// The compilation is controlled by a setup file, which is provided as a
// command-line argument. The setup file has the form:
@ -18,195 +17,219 @@ import libcore.util.ZoneInfo;
// <zone filename>
// ...
//
// Note that the links must be declared prior to the zone names. A
// zone name is a filename relative to the source directory such as
// Note that the links must be declared prior to the zone names.
// A zone name is a filename relative to the source directory such as
// 'GMT', 'Africa/Dakar', or 'America/Argentina/Jujuy'.
//
// Use the 'zic' command-line tool to convert from flat files
// (e.g., 'africa', 'northamerica') into a suitable source directory
// hierarchy for this tool (e.g., 'data/Africa/Abidjan').
// (such as 'africa' or 'northamerica') to a directory
// hierarchy suitable for this tool (containing files such as 'data/Africa/Abidjan').
//
// Example:
// zic -d data tz2007h
// javac ZoneCompactor.java
// java ZoneCompactor setup data
// <produces zoneinfo.dat and zoneinfo.idx>
public class ZoneCompactor {
public static class ByteArrayBufferIteratorBE extends BufferIterator {
private final byte[] bytes;
private int offset = 0;
public static class ByteArrayBufferIteratorBE extends BufferIterator {
private final byte[] bytes;
private int offset = 0;
public ByteArrayBufferIteratorBE(byte[] bytes) {
this.bytes = bytes;
this.offset = 0;
}
public void seek(int offset) {
this.offset = offset;
}
public void skip(int byteCount) {
this.offset += byteCount;
}
public void readByteArray(byte[] dst, int dstOffset, int byteCount) {
System.arraycopy(bytes, offset, dst, dstOffset, byteCount);
offset += byteCount;
}
public byte readByte() {
return bytes[offset++];
}
public int readInt() {
return ((readByte() & 0xff) << 24) | ((readByte() & 0xff) << 16) | ((readByte() & 0xff) << 8) | (readByte() & 0xff);
}
public void readIntArray(int[] dst, int dstOffset, int intCount) {
for (int i = 0; i < intCount; ++i) {
dst[dstOffset++] = readInt();
}
}
public short readShort() {
throw new UnsupportedOperationException();
}
public ByteArrayBufferIteratorBE(byte[] bytes) {
this.bytes = bytes;
this.offset = 0;
}
// Maximum number of characters in a zone name, including '\0' terminator
private static final int MAXNAME = 40;
// Zone name synonyms
private Map<String,String> links = new HashMap<String,String>();
// File starting bytes by zone name
private Map<String,Integer> starts = new HashMap<String,Integer>();
// File lengths by zone name
private Map<String,Integer> lengths = new HashMap<String,Integer>();
// Raw GMT offsets by zone name
private Map<String,Integer> offsets = new HashMap<String,Integer>();
private int start = 0;
// Concatenate the contents of 'inFile' onto 'out'
// and return the contents as a byte array.
private static byte[] copyFile(File inFile, OutputStream out) throws Exception {
byte[] ret = new byte[0];
InputStream in = new FileInputStream(inFile);
byte[] buf = new byte[8192];
while (true) {
int nbytes = in.read(buf);
if (nbytes == -1) {
break;
}
out.write(buf, 0, nbytes);
byte[] nret = new byte[ret.length + nbytes];
System.arraycopy(ret, 0, nret, 0, ret.length);
System.arraycopy(buf, 0, nret, ret.length, nbytes);
ret = nret;
}
out.flush();
return ret;
public void seek(int offset) {
this.offset = offset;
}
// Write a 32-bit integer in network byte order
private void writeInt(OutputStream os, int x) throws IOException {
os.write((x >> 24) & 0xff);
os.write((x >> 16) & 0xff);
os.write((x >> 8) & 0xff);
os.write( x & 0xff);
public void skip(int byteCount) {
this.offset += byteCount;
}
public ZoneCompactor(String setupFile, String dataDirectory, String outputDirectory, String version) throws Exception {
File zoneInfoFile = new File(outputDirectory, "zoneinfo.dat");
zoneInfoFile.delete();
OutputStream zoneInfo = new FileOutputStream(zoneInfoFile);
BufferedReader rdr = new BufferedReader(new FileReader(setupFile));
String s;
while ((s = rdr.readLine()) != null) {
s = s.trim();
if (s.startsWith("Link")) {
StringTokenizer st = new StringTokenizer(s);
st.nextToken();
String to = st.nextToken();
String from = st.nextToken();
links.put(from, to);
} else {
String link = links.get(s);
if (link == null) {
File f = new File(dataDirectory, s);
long length = f.length();
starts.put(s, new Integer(start));
lengths.put(s, new Integer((int)length));
start += length;
byte[] data = copyFile(f, zoneInfo);
BufferIterator it = new ByteArrayBufferIteratorBE(data);
TimeZone tz = ZoneInfo.makeTimeZone(s, it);
int gmtOffset = tz.getRawOffset();
offsets.put(s, new Integer(gmtOffset));
}
}
}
zoneInfo.close();
// Fill in fields for links
Iterator<String> iter = links.keySet().iterator();
while (iter.hasNext()) {
String from = iter.next();
String to = links.get(from);
starts.put(from, starts.get(to));
lengths.put(from, lengths.get(to));
offsets.put(from, offsets.get(to));
}
File idxFile = new File(outputDirectory, "zoneinfo.idx");
idxFile.delete();
FileOutputStream idx = new FileOutputStream(idxFile);
ArrayList<String> l = new ArrayList<String>();
l.addAll(starts.keySet());
Collections.sort(l);
Iterator<String> ziter = l.iterator();
while (ziter.hasNext()) {
String zname = ziter.next();
if (zname.length() >= MAXNAME) {
System.err.println("Error - zone filename exceeds " +
(MAXNAME - 1) + " characters!");
}
byte[] znameBuf = new byte[MAXNAME];
for (int i = 0; i < zname.length(); i++) {
znameBuf[i] = (byte)zname.charAt(i);
}
idx.write(znameBuf);
writeInt(idx, starts.get(zname).intValue());
writeInt(idx, lengths.get(zname).intValue());
writeInt(idx, offsets.get(zname).intValue());
}
idx.close();
OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(new File(outputDirectory, "zoneinfo.version")), "US-ASCII");
writer.write(version);
writer.write('\n');
writer.close();
// System.out.println("maxLength = " + maxLength);
public void readByteArray(byte[] dst, int dstOffset, int byteCount) {
System.arraycopy(bytes, offset, dst, dstOffset, byteCount);
offset += byteCount;
}
public static void main(String[] args) throws Exception {
if (args.length != 4) {
System.err.println("usage: java ZoneCompactor <setup file> <data directory> <output directory> <tzdata version>");
System.exit(0);
}
new ZoneCompactor(args[0], args[1], args[2], args[3]);
public byte readByte() {
return bytes[offset++];
}
public int readInt() {
return ((readByte() & 0xff) << 24) | ((readByte() & 0xff) << 16) | ((readByte() & 0xff) << 8) | (readByte() & 0xff);
}
public void readIntArray(int[] dst, int dstOffset, int intCount) {
for (int i = 0; i < intCount; ++i) {
dst[dstOffset++] = readInt();
}
}
public short readShort() {
throw new UnsupportedOperationException();
}
}
// Maximum number of characters in a zone name, including '\0' terminator
private static final int MAXNAME = 40;
// Zone name synonyms
private Map<String,String> links = new HashMap<String,String>();
// File starting bytes by zone name
private Map<String,Integer> starts = new HashMap<String,Integer>();
// File lengths by zone name
private Map<String,Integer> lengths = new HashMap<String,Integer>();
// Raw GMT offsets by zone name
private Map<String,Integer> offsets = new HashMap<String,Integer>();
private int start = 0;
// Concatenate the contents of 'inFile' onto 'out'
// and return the contents as a byte array.
private static byte[] copyFile(File inFile, OutputStream out) throws Exception {
byte[] ret = new byte[0];
InputStream in = new FileInputStream(inFile);
byte[] buf = new byte[8192];
while (true) {
int nbytes = in.read(buf);
if (nbytes == -1) {
break;
}
out.write(buf, 0, nbytes);
byte[] nret = new byte[ret.length + nbytes];
System.arraycopy(ret, 0, nret, 0, ret.length);
System.arraycopy(buf, 0, nret, ret.length, nbytes);
ret = nret;
}
out.flush();
return ret;
}
public ZoneCompactor(String setupFile, String dataDirectory, String outputDirectory, String version) throws Exception {
// Read the setup file, and concatenate all the data.
ByteArrayOutputStream allData = new ByteArrayOutputStream();
BufferedReader reader = new BufferedReader(new FileReader(setupFile));
String s;
while ((s = reader.readLine()) != null) {
s = s.trim();
if (s.startsWith("Link")) {
StringTokenizer st = new StringTokenizer(s);
st.nextToken();
String to = st.nextToken();
String from = st.nextToken();
links.put(from, to);
} else {
String link = links.get(s);
if (link == null) {
File sourceFile = new File(dataDirectory, s);
long length = sourceFile.length();
starts.put(s, start);
lengths.put(s, (int) length);
start += length;
byte[] data = copyFile(sourceFile, allData);
BufferIterator it = new ByteArrayBufferIteratorBE(data);
TimeZone tz = ZoneInfo.makeTimeZone(s, it);
int gmtOffset = tz.getRawOffset();
offsets.put(s, gmtOffset);
}
}
}
// Fill in fields for links.
Iterator<String> it = links.keySet().iterator();
while (it.hasNext()) {
String from = it.next();
String to = links.get(from);
starts.put(from, starts.get(to));
lengths.put(from, lengths.get(to));
offsets.put(from, offsets.get(to));
}
// Create/truncate the destination file.
RandomAccessFile f = new RandomAccessFile(new File(outputDirectory, "tzdata"), "rw");
f.setLength(0);
// Write the header.
// byte[12] tzdata_version -- 'tzdata2012f\0'
// int file_format_version -- probably won't need this, but just in case
// int index_offset -- likewise
// int data_offset
// int zonetab_offset
// tzdata_version
f.write(toAscii(new byte[12], version));
// file_format_version
f.writeInt(1);
// Write dummy values for the three offsets, and remember where we need to seek back to later
// when we have the real values.
int index_offset_offset = (int) f.getFilePointer();
f.writeInt(0);
int data_offset_offset = (int) f.getFilePointer();
f.writeInt(0);
int zonetab_offset_offset = (int) f.getFilePointer();
f.writeInt(0);
int index_offset = (int) f.getFilePointer();
// Write the index.
ArrayList<String> sortedOlsonIds = new ArrayList<String>();
sortedOlsonIds.addAll(starts.keySet());
Collections.sort(sortedOlsonIds);
it = sortedOlsonIds.iterator();
while (it.hasNext()) {
String zoneName = it.next();
if (zoneName.length() >= MAXNAME) {
throw new RuntimeException("zone filename too long: " + zoneName.length());
}
f.write(toAscii(new byte[MAXNAME], zoneName));
f.writeInt(starts.get(zoneName));
f.writeInt(lengths.get(zoneName));
f.writeInt(offsets.get(zoneName));
}
int data_offset = (int) f.getFilePointer();
// Write the data.
f.write(allData.toByteArray());
// TODO: append the zonetab.
int zonetab_offset = 0;
// Go back and fix up the offsets in the header.
f.seek(index_offset_offset);
f.writeInt(index_offset);
f.seek(data_offset_offset);
f.writeInt(data_offset);
f.seek(zonetab_offset_offset);
f.writeInt(zonetab_offset);
f.close();
}
private static byte[] toAscii(byte[] dst, String src) {
for (int i = 0; i < src.length(); ++i) {
if (src.charAt(i) > '~') {
throw new RuntimeException("non-ASCII string: " + src);
}
dst[i] = (byte) src.charAt(i);
}
return dst;
}
public static void main(String[] args) throws Exception {
if (args.length != 4) {
System.err.println("usage: java ZoneCompactor <setup file> <data directory> <output directory> <tzdata version>");
System.exit(0);
}
new ZoneCompactor(args[0], args[1], args[2], args[3]);
}
}

View File

@ -1,12 +1,10 @@
#!/usr/bin/python
# Run with no arguments from any directory, with no special setup required.
"""Updates the tzdata file."""
import ftplib
import hashlib
import os
import re
import shutil
import string
import subprocess
import sys
import tarfile
@ -18,41 +16,58 @@ bionic_libc_tools_dir = os.path.dirname(bionic_libc_tools_zoneinfo_dir)
bionic_libc_dir = os.path.dirname(bionic_libc_tools_dir)
bionic_dir = os.path.dirname(bionic_libc_dir)
bionic_libc_zoneinfo_dir = '%s/libc/zoneinfo' % bionic_dir
if not os.path.isdir(bionic_libc_tools_zoneinfo_dir) or not os.path.isdir(bionic_libc_zoneinfo_dir):
if not os.path.isdir(bionic_libc_tools_zoneinfo_dir):
print "Couldn't find bionic/libc/tools/zoneinfo!"
sys.exit(1)
if not os.path.isdir(bionic_libc_zoneinfo_dir):
print "Couldn't find bionic/libc/zoneinfo!"
sys.exit(1)
print 'Found bionic in %s...' % bionic_dir
regions = ['africa', 'antarctica', 'asia', 'australasia', 'backward', 'etcetera', 'europe', 'northamerica', 'southamerica']
regions = ['africa', 'antarctica', 'asia', 'australasia', 'backward',
'etcetera', 'europe', 'northamerica', 'southamerica']
def current_tzdata_version():
return open('%s/zoneinfo.version' % bionic_libc_zoneinfo_dir).readline().rstrip('\n')
def GetCurrentTzDataVersion():
return open('%s/tzdata' % bionic_libc_zoneinfo_dir).read().split('\0', 1)[0]
def md5_file(filename):
md5 = hashlib.md5()
f = open(filename, 'rb')
while True:
data = f.read(8192)
if not data:
break
md5.update(data)
return md5.hexdigest()
def WriteSetupFile():
links = []
zones = []
for region in regions:
for line in open('extracted/%s' % region):
fields = line.split()
if len(fields) == 0:
continue
elif fields[0] == 'Link':
links.append('%s %s %s\n' % (fields[0], fields[1], fields[2]))
zones.append(fields[2])
elif fields[0] == 'Zone':
zones.append(fields[1])
zones.sort()
setup = open('setup', 'w')
for link in links:
setup.write(link)
for zone in zones:
setup.write('%s\n' % zone)
setup.close()
def upgrade_to(ftp, filename):
version = re.search('tzdata(.+)\.tar\.gz', filename).group(1)
def UpgradeTo(ftp, filename):
new_version = re.search('(tzdata.+)\.tar\.gz', filename).group(1)
# Switch to a temporary directory.
tmp_dir = tempfile.mkdtemp('-tzdata')
os.chdir(tmp_dir)
print 'Created temporary directory "%s"...' % tmp_dir
print 'Downloading %s...' % filename
print 'Downloading...'
ftp.retrbinary('RETR %s' % filename, open(filename, 'wb').write)
print 'MD5: %s' % md5_file(filename)
print 'Extracting...'
os.mkdir('extracted')
@ -65,58 +80,46 @@ def upgrade_to(ftp, filename):
if region != 'backward':
subprocess.check_call(['zic', '-d', 'data', 'extracted/%s' % region])
# Collect the data ZoneCompactor needs.
links = []
zones = []
for region in regions:
for line in open('extracted/%s' % region).readlines():
fields = string.split(line)
if len(fields) == 0:
continue
elif fields[0] == 'Link':
links.append('%s %s %s\n' % (fields[0], fields[1], fields[2]))
zones.append(fields[2])
elif fields[0] == 'Zone':
zones.append(fields[1])
zones.sort()
WriteSetupFile()
# Write it into the "setup" file.
setup = open('setup', 'w')
for link in links:
setup.write(link)
for zone in zones:
setup.write('%s\n' % zone)
setup.close()
print 'Calling ZoneCompactor to update bionic from %s to %s...' % (current_tzdata_version(), version)
print 'Calling ZoneCompactor to update bionic to %s...' % new_version
libcore_src_dir = '%s/../libcore/luni/src/main/java/' % bionic_dir
subprocess.check_call(['javac', '-d', '.',
'%s/ZoneCompactor.java' % bionic_libc_tools_zoneinfo_dir,
'%s/libcore/util/ZoneInfo.java' % libcore_src_dir,
'%s/libcore/io/BufferIterator.java' % libcore_src_dir])
subprocess.check_call(['java', 'ZoneCompactor', 'setup', 'data', bionic_libc_zoneinfo_dir, version])
subprocess.check_call(['java', 'ZoneCompactor',
'setup', 'data', bionic_libc_zoneinfo_dir, new_version])
# URL from "Sources for Time Zone and Daylight Saving Time Data"
# http://www.twinsun.com/tz/tz-link.htm
# Run with no arguments from any directory, with no special setup required.
def main():
# URL from "Sources for Time Zone and Daylight Saving Time Data"
# http://www.twinsun.com/tz/tz-link.htm
print 'Looking for new tzdata...'
ftp = ftplib.FTP('ftp.iana.org')
ftp.login()
ftp.cwd('tz/releases')
tzdata_filenames = []
for filename in ftp.nlst():
if filename.startswith('tzdata20'):
tzdata_filenames.append(filename)
tzdata_filenames.sort()
print 'Looking for new tzdata...'
ftp = ftplib.FTP('ftp.iana.org')
ftp.login()
ftp.cwd('tz/releases')
tzdata_filenames = []
for filename in ftp.nlst():
if filename.startswith('tzdata20'):
tzdata_filenames.append(filename)
tzdata_filenames.sort()
# If you're several releases behind, we'll walk you through the upgrades one by one.
current_version = current_tzdata_version()
current_filename = 'tzdata%s.tar.gz' % current_version
for filename in tzdata_filenames:
if filename > current_filename:
upgrade_to(ftp, filename)
sys.exit(0)
# If you're several releases behind, we'll walk you through the upgrades
# one by one.
current_version = GetCurrentTzDataVersion()
current_filename = '%s.tar.gz' % current_version
for filename in tzdata_filenames:
if filename > current_filename:
print 'Found new tzdata: %s' % filename
UpgradeTo(ftp, filename)
sys.exit(0)
print 'You already have the latest tzdata (%s)!' % current_version
sys.exit(0)
print 'You already have the latest tzdata (%s)!' % current_version
sys.exit(0)
if __name__ == '__main__':
main()

View File

@ -1,77 +0,0 @@
#!/usr/bin/python
# Run with no arguments from any directory, with no special setup required.
import ftplib
import hashlib
import os
import re
import shutil
import string
import struct
import subprocess
import sys
import tarfile
import tempfile
# Find the bionic directory, searching upward from this script.
bionic_libc_tools_zoneinfo_dir = os.path.realpath(os.path.dirname(sys.argv[0]))
bionic_libc_tools_dir = os.path.dirname(bionic_libc_tools_zoneinfo_dir)
bionic_libc_dir = os.path.dirname(bionic_libc_tools_dir)
bionic_dir = os.path.dirname(bionic_libc_dir)
bionic_libc_zoneinfo_dir = '%s/libc/zoneinfo' % bionic_dir
if not os.path.isdir(bionic_libc_tools_zoneinfo_dir) or not os.path.isdir(bionic_libc_zoneinfo_dir):
print "Couldn't find bionic/libc/tools/zoneinfo!"
sys.exit(1)
def current_tzdata_version():
return open('%s/zoneinfo.version' % bionic_libc_zoneinfo_dir).readline().rstrip('\n')
# TODO: make the regular "generate" script just output this format directly.
# Open the output file.
f = open('%s/tzdata' % bionic_libc_zoneinfo_dir, 'wb+')
# -- header
# char[12] tzdata_version -- 'tzdata2012f\0'
# u32 file_format_version -- probably won't need this, but just in case
# u32 index_offset -- likewise
# u32 data_offset
# u32 zonetab_offset
header_format = "! 12s i i i i"
header_size = struct.calcsize(header_format)
index_offset = header_size
index_bytes = open('%s/zoneinfo.idx' % bionic_libc_zoneinfo_dir, "rb").read()
index_size = len(index_bytes)
data_offset = index_offset + index_size
data_bytes = open('%s/zoneinfo.dat' % bionic_libc_zoneinfo_dir).read()
data_size = len(data_bytes)
zonetab_offset = 0 # TODO: data_offset + data_size
tzdata_version = current_tzdata_version()
file_format_version = 1
header = struct.pack(header_format, 'tzdata%s' % tzdata_version, file_format_version, index_offset, data_offset, zonetab_offset)
f.write(header)
# -- index (@index_offset)
# u8* index_bytes
f.write(index_bytes)
# -- data (@data_offset)
# u8* data_bytes
f.write(data_bytes)
# TODO: zonetab
# -- zonetab (@zonetab_offset)
# u8* zonetab_bytes
f.close()
sys.exit(0)

Binary file not shown.

Binary file not shown.

View File

@ -1 +0,0 @@
2012g