Merge "Change ZoneCompator to generate the single "tzdata" file."

2012-10-19 17:12:37 -07:00
parent 7a85c42b8f 5b1497acdb
commit dc595d8308
6 changed files with 268 additions and 320 deletions
--- a/libc/tools/zoneinfo/ZoneCompactor.java
+++ b/libc/tools/zoneinfo/ZoneCompactor.java
@@ -7,8 +7,7 @@ import libcore.util.ZoneInfo;
 // usage: java ZoneCompiler <setup file> <data directory> <output directory> <tzdata version>
 //
-// Compile a set of tzfile-formatted files into a single file plus
+// Compile a set of tzfile-formatted files into a single file containing an index.
 // an index file.
 //
 // The compilation is controlled by a setup file, which is provided as a
 // command-line argument.  The setup file has the form:
@@ -18,195 +17,219 @@ import libcore.util.ZoneInfo;
 // <zone filename>
 // ...
 //
-// Note that the links must be declared prior to the zone names.  A
+// Note that the links must be declared prior to the zone names.
-// zone name is a filename relative to the source directory such as
+// A zone name is a filename relative to the source directory such as
 // 'GMT', 'Africa/Dakar', or 'America/Argentina/Jujuy'.
 //
 // Use the 'zic' command-line tool to convert from flat files
-// (e.g., 'africa', 'northamerica') into a suitable source directory
+// (such as 'africa' or 'northamerica') to a directory
-// hierarchy for this tool (e.g., 'data/Africa/Abidjan').
+// hierarchy suitable for this tool (containing files such as 'data/Africa/Abidjan').
 //
 // Example:
 //     zic -d data tz2007h
 //     javac ZoneCompactor.java
 //     java ZoneCompactor setup data
 //     <produces zoneinfo.dat and zoneinfo.idx>
 public class ZoneCompactor {
-    public static class ByteArrayBufferIteratorBE extends BufferIterator {
+  public static class ByteArrayBufferIteratorBE extends BufferIterator {
-        private final byte[] bytes;
+    private final byte[] bytes;
-        private int offset = 0;
+    private int offset = 0;
-        public ByteArrayBufferIteratorBE(byte[] bytes) {
+    public ByteArrayBufferIteratorBE(byte[] bytes) {
-            this.bytes = bytes;
+      this.bytes = bytes;
-            this.offset = 0;
+      this.offset = 0;
        }
        public void seek(int offset) {
            this.offset = offset;
        }
        public void skip(int byteCount) {
            this.offset += byteCount;
        }
        public void readByteArray(byte[] dst, int dstOffset, int byteCount) {
            System.arraycopy(bytes, offset, dst, dstOffset, byteCount);
            offset += byteCount;
        }
        public byte readByte() {
            return bytes[offset++];
        }
        public int readInt() {
            return ((readByte() & 0xff) << 24) | ((readByte() & 0xff) << 16) | ((readByte() & 0xff) << 8) | (readByte() & 0xff);
        }
        public void readIntArray(int[] dst, int dstOffset, int intCount) {
            for (int i = 0; i < intCount; ++i) {
                dst[dstOffset++] = readInt();
            }
        }
        public short readShort() {
            throw new UnsupportedOperationException();
        }
    }
-    // Maximum number of characters in a zone name, including '\0' terminator
+    public void seek(int offset) {
-    private static final int MAXNAME = 40;
+      this.offset = offset;
    // Zone name synonyms
    private Map<String,String> links = new HashMap<String,String>();
    // File starting bytes by zone name
    private Map<String,Integer> starts = new HashMap<String,Integer>();
    // File lengths by zone name
    private Map<String,Integer> lengths = new HashMap<String,Integer>();
    // Raw GMT offsets by zone name
    private Map<String,Integer> offsets = new HashMap<String,Integer>();
    private int start = 0;
    // Concatenate the contents of 'inFile' onto 'out'
    // and return the contents as a byte array.
    private static byte[] copyFile(File inFile, OutputStream out) throws Exception {
        byte[] ret = new byte[0];
        InputStream in = new FileInputStream(inFile);
        byte[] buf = new byte[8192];
        while (true) {
            int nbytes = in.read(buf);
            if (nbytes == -1) {
                break;
            }
            out.write(buf, 0, nbytes);
            byte[] nret = new byte[ret.length + nbytes];
            System.arraycopy(ret, 0, nret, 0, ret.length);
            System.arraycopy(buf, 0, nret, ret.length, nbytes);
            ret = nret;
        }
        out.flush();
        return ret;
    }
-    // Write a 32-bit integer in network byte order
+    public void skip(int byteCount) {
-    private void writeInt(OutputStream os, int x) throws IOException {
+      this.offset += byteCount;
        os.write((x >> 24) & 0xff);
        os.write((x >> 16) & 0xff);
        os.write((x >>  8) & 0xff);
        os.write( x        & 0xff);
    }
-    public ZoneCompactor(String setupFile, String dataDirectory, String outputDirectory, String version) throws Exception {
+    public void readByteArray(byte[] dst, int dstOffset, int byteCount) {
-        File zoneInfoFile = new File(outputDirectory, "zoneinfo.dat");
+      System.arraycopy(bytes, offset, dst, dstOffset, byteCount);
-        zoneInfoFile.delete();
+      offset += byteCount;
        OutputStream zoneInfo = new FileOutputStream(zoneInfoFile);
        BufferedReader rdr = new BufferedReader(new FileReader(setupFile));
        String s;
        while ((s = rdr.readLine()) != null) {
            s = s.trim();
            if (s.startsWith("Link")) {
                StringTokenizer st = new StringTokenizer(s);
                st.nextToken();
                String to = st.nextToken();
                String from = st.nextToken();
                links.put(from, to);
            } else {
                String link = links.get(s);
                if (link == null) {
                    File f = new File(dataDirectory, s);
                    long length = f.length();
                    starts.put(s, new Integer(start));
                    lengths.put(s, new Integer((int)length));
                    start += length;
                    byte[] data = copyFile(f, zoneInfo);
                    BufferIterator it = new ByteArrayBufferIteratorBE(data);
                    TimeZone tz = ZoneInfo.makeTimeZone(s, it);
                    int gmtOffset = tz.getRawOffset();
                    offsets.put(s, new Integer(gmtOffset));
                }
            }
        }
        zoneInfo.close();
        // Fill in fields for links
        Iterator<String> iter = links.keySet().iterator();
        while (iter.hasNext()) {
            String from = iter.next();
            String to = links.get(from);
            starts.put(from, starts.get(to));
            lengths.put(from, lengths.get(to));
            offsets.put(from, offsets.get(to));
        }
        File idxFile = new File(outputDirectory, "zoneinfo.idx");
        idxFile.delete();
        FileOutputStream idx = new FileOutputStream(idxFile);
        ArrayList<String> l = new ArrayList<String>();
        l.addAll(starts.keySet());
        Collections.sort(l);
        Iterator<String> ziter = l.iterator();
        while (ziter.hasNext()) {
            String zname = ziter.next();
            if (zname.length() >= MAXNAME) {
                System.err.println("Error - zone filename exceeds " +
                                   (MAXNAME - 1) + " characters!");
            }
            byte[] znameBuf = new byte[MAXNAME];
            for (int i = 0; i < zname.length(); i++) {
                znameBuf[i] = (byte)zname.charAt(i);
            }
            idx.write(znameBuf);
            writeInt(idx, starts.get(zname).intValue());
            writeInt(idx, lengths.get(zname).intValue());
            writeInt(idx, offsets.get(zname).intValue());
        }
        idx.close();
        OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(new File(outputDirectory, "zoneinfo.version")), "US-ASCII");
        writer.write(version);
        writer.write('\n');
        writer.close();
        // System.out.println("maxLength = " + maxLength);
    }
-    public static void main(String[] args) throws Exception {
+    public byte readByte() {
-        if (args.length != 4) {
+      return bytes[offset++];
            System.err.println("usage: java ZoneCompactor <setup file> <data directory> <output directory> <tzdata version>");
            System.exit(0);
        }
        new ZoneCompactor(args[0], args[1], args[2], args[3]);
    }
    public int readInt() {
      return ((readByte() & 0xff) << 24) | ((readByte() & 0xff) << 16) | ((readByte() & 0xff) << 8) | (readByte() & 0xff);
    }
    public void readIntArray(int[] dst, int dstOffset, int intCount) {
      for (int i = 0; i < intCount; ++i) {
        dst[dstOffset++] = readInt();
      }
    }
    public short readShort() {
      throw new UnsupportedOperationException();
    }
  }
  // Maximum number of characters in a zone name, including '\0' terminator
  private static final int MAXNAME = 40;
  // Zone name synonyms
  private Map<String,String> links = new HashMap<String,String>();
  // File starting bytes by zone name
  private Map<String,Integer> starts = new HashMap<String,Integer>();
  // File lengths by zone name
  private Map<String,Integer> lengths = new HashMap<String,Integer>();
  // Raw GMT offsets by zone name
  private Map<String,Integer> offsets = new HashMap<String,Integer>();
  private int start = 0;
  // Concatenate the contents of 'inFile' onto 'out'
  // and return the contents as a byte array.
  private static byte[] copyFile(File inFile, OutputStream out) throws Exception {
    byte[] ret = new byte[0];
    InputStream in = new FileInputStream(inFile);
    byte[] buf = new byte[8192];
    while (true) {
      int nbytes = in.read(buf);
      if (nbytes == -1) {
        break;
      }
      out.write(buf, 0, nbytes);
      byte[] nret = new byte[ret.length + nbytes];
      System.arraycopy(ret, 0, nret, 0, ret.length);
      System.arraycopy(buf, 0, nret, ret.length, nbytes);
      ret = nret;
    }
    out.flush();
    return ret;
  }
  public ZoneCompactor(String setupFile, String dataDirectory, String outputDirectory, String version) throws Exception {
    // Read the setup file, and concatenate all the data.
    ByteArrayOutputStream allData = new ByteArrayOutputStream();
    BufferedReader reader = new BufferedReader(new FileReader(setupFile));
    String s;
    while ((s = reader.readLine()) != null) {
      s = s.trim();
      if (s.startsWith("Link")) {
        StringTokenizer st = new StringTokenizer(s);
        st.nextToken();
        String to = st.nextToken();
        String from = st.nextToken();
        links.put(from, to);
      } else {
        String link = links.get(s);
        if (link == null) {
          File sourceFile = new File(dataDirectory, s);
          long length = sourceFile.length();
          starts.put(s, start);
          lengths.put(s, (int) length);
          start += length;
          byte[] data = copyFile(sourceFile, allData);
          BufferIterator it = new ByteArrayBufferIteratorBE(data);
          TimeZone tz = ZoneInfo.makeTimeZone(s, it);
          int gmtOffset = tz.getRawOffset();
          offsets.put(s, gmtOffset);
        }
      }
    }
    // Fill in fields for links.
    Iterator<String> it = links.keySet().iterator();
    while (it.hasNext()) {
      String from = it.next();
      String to = links.get(from);
      starts.put(from, starts.get(to));
      lengths.put(from, lengths.get(to));
      offsets.put(from, offsets.get(to));
    }
    // Create/truncate the destination file.
    RandomAccessFile f = new RandomAccessFile(new File(outputDirectory, "tzdata"), "rw");
    f.setLength(0);
    // Write the header.
    // byte[12] tzdata_version          -- 'tzdata2012f\0'
    // int file_format_version          -- probably won't need this, but just in case
    // int index_offset                 -- likewise
    // int data_offset
    // int zonetab_offset
    // tzdata_version
    f.write(toAscii(new byte[12], version));
    // file_format_version
    f.writeInt(1);
    // Write dummy values for the three offsets, and remember where we need to seek back to later
    // when we have the real values.
    int index_offset_offset = (int) f.getFilePointer();
    f.writeInt(0);
    int data_offset_offset = (int) f.getFilePointer();
    f.writeInt(0);
    int zonetab_offset_offset = (int) f.getFilePointer();
    f.writeInt(0);
    int index_offset = (int) f.getFilePointer();
    // Write the index.
    ArrayList<String> sortedOlsonIds = new ArrayList<String>();
    sortedOlsonIds.addAll(starts.keySet());
    Collections.sort(sortedOlsonIds);
    it = sortedOlsonIds.iterator();
    while (it.hasNext()) {
      String zoneName = it.next();
      if (zoneName.length() >= MAXNAME) {
        throw new RuntimeException("zone filename too long: " + zoneName.length());
      }
      f.write(toAscii(new byte[MAXNAME], zoneName));
      f.writeInt(starts.get(zoneName));
      f.writeInt(lengths.get(zoneName));
      f.writeInt(offsets.get(zoneName));
    }
    int data_offset = (int) f.getFilePointer();
    // Write the data.
    f.write(allData.toByteArray());
    // TODO: append the zonetab.
    int zonetab_offset = 0;
    // Go back and fix up the offsets in the header.
    f.seek(index_offset_offset);
    f.writeInt(index_offset);
    f.seek(data_offset_offset);
    f.writeInt(data_offset);
    f.seek(zonetab_offset_offset);
    f.writeInt(zonetab_offset);
    f.close();
  }
  private static byte[] toAscii(byte[] dst, String src) {
    for (int i = 0; i < src.length(); ++i) {
      if (src.charAt(i) > '~') {
        throw new RuntimeException("non-ASCII string: " + src);
      }
      dst[i] = (byte) src.charAt(i);
    }
    return dst;
  }
  public static void main(String[] args) throws Exception {
    if (args.length != 4) {
      System.err.println("usage: java ZoneCompactor <setup file> <data directory> <output directory> <tzdata version>");
      System.exit(0);
    }
    new ZoneCompactor(args[0], args[1], args[2], args[3]);
  }
 }
--- a/libc/tools/zoneinfo/generate
+++ b/libc/tools/zoneinfo/generate
@@ -1,12 +1,10 @@
 #!/usr/bin/python
-# Run with no arguments from any directory, with no special setup required.
+
 """Updates the tzdata file."""
 import ftplib
 import hashlib
 import os
 import re
 import shutil
 import string
 import subprocess
 import sys
 import tarfile
@@ -18,41 +16,58 @@ bionic_libc_tools_dir = os.path.dirname(bionic_libc_tools_zoneinfo_dir)
 bionic_libc_dir = os.path.dirname(bionic_libc_tools_dir)
 bionic_dir = os.path.dirname(bionic_libc_dir)
 bionic_libc_zoneinfo_dir = '%s/libc/zoneinfo' % bionic_dir
-if not os.path.isdir(bionic_libc_tools_zoneinfo_dir) or not os.path.isdir(bionic_libc_zoneinfo_dir):
+
 if not os.path.isdir(bionic_libc_tools_zoneinfo_dir):
  print "Couldn't find bionic/libc/tools/zoneinfo!"
  sys.exit(1)
 if not os.path.isdir(bionic_libc_zoneinfo_dir):
  print "Couldn't find bionic/libc/zoneinfo!"
  sys.exit(1)
 print 'Found bionic in %s...' % bionic_dir
-regions = ['africa', 'antarctica', 'asia', 'australasia', 'backward', 'etcetera', 'europe', 'northamerica', 'southamerica']
+regions = ['africa', 'antarctica', 'asia', 'australasia', 'backward',
           'etcetera', 'europe', 'northamerica', 'southamerica']
-def current_tzdata_version():
+def GetCurrentTzDataVersion():
-  return open('%s/zoneinfo.version' % bionic_libc_zoneinfo_dir).readline().rstrip('\n')
+  return open('%s/tzdata' % bionic_libc_zoneinfo_dir).read().split('\0', 1)[0]
-def md5_file(filename):
+def WriteSetupFile():
-  md5 = hashlib.md5()
+  links = []
-  f = open(filename, 'rb')
+  zones = []
-  while True:
+  for region in regions:
-    data = f.read(8192)
+    for line in open('extracted/%s' % region):
-    if not data:
+      fields = line.split()
-      break
+      if len(fields) == 0:
-    md5.update(data)
+        continue
-  return md5.hexdigest()
+      elif fields[0] == 'Link':
        links.append('%s %s %s\n' % (fields[0], fields[1], fields[2]))
        zones.append(fields[2])
      elif fields[0] == 'Zone':
        zones.append(fields[1])
  zones.sort()
  setup = open('setup', 'w')
  for link in links:
    setup.write(link)
  for zone in zones:
    setup.write('%s\n' % zone)
  setup.close()
-def upgrade_to(ftp, filename):
+def UpgradeTo(ftp, filename):
-  version = re.search('tzdata(.+)\.tar\.gz', filename).group(1)
+  new_version = re.search('(tzdata.+)\.tar\.gz', filename).group(1)
  # Switch to a temporary directory.
  tmp_dir = tempfile.mkdtemp('-tzdata')
  os.chdir(tmp_dir)
  print 'Created temporary directory "%s"...' % tmp_dir
-  print 'Downloading %s...' % filename
+  print 'Downloading...'
  ftp.retrbinary('RETR %s' % filename, open(filename, 'wb').write)
  print 'MD5: %s' % md5_file(filename)
  print 'Extracting...'
  os.mkdir('extracted')
@@ -65,58 +80,46 @@ def upgrade_to(ftp, filename):
    if region != 'backward':
      subprocess.check_call(['zic', '-d', 'data', 'extracted/%s' % region])
-  # Collect the data ZoneCompactor needs.
+  WriteSetupFile()
  links = []
  zones = []
  for region in regions:
    for line in open('extracted/%s' % region).readlines():
      fields = string.split(line)
      if len(fields) == 0:
        continue
      elif fields[0] == 'Link':
        links.append('%s %s %s\n' % (fields[0], fields[1], fields[2]))
        zones.append(fields[2])
      elif fields[0] == 'Zone':
        zones.append(fields[1])
  zones.sort()
-  # Write it into the "setup" file.
+  print 'Calling ZoneCompactor to update bionic to %s...' % new_version
  setup = open('setup', 'w')
  for link in links:
    setup.write(link)
  for zone in zones:
    setup.write('%s\n' % zone)
  setup.close()
  print 'Calling ZoneCompactor to update bionic from %s to %s...' % (current_tzdata_version(), version)
  libcore_src_dir = '%s/../libcore/luni/src/main/java/' % bionic_dir
  subprocess.check_call(['javac', '-d', '.',
                         '%s/ZoneCompactor.java' % bionic_libc_tools_zoneinfo_dir,
                         '%s/libcore/util/ZoneInfo.java' % libcore_src_dir,
                         '%s/libcore/io/BufferIterator.java' % libcore_src_dir])
-  subprocess.check_call(['java', 'ZoneCompactor', 'setup', 'data', bionic_libc_zoneinfo_dir, version])
+  subprocess.check_call(['java', 'ZoneCompactor',
                         'setup', 'data', bionic_libc_zoneinfo_dir, new_version])
-# URL from "Sources for Time Zone and Daylight Saving Time Data"
+# Run with no arguments from any directory, with no special setup required.
-# http://www.twinsun.com/tz/tz-link.htm
+def main():
  # URL from "Sources for Time Zone and Daylight Saving Time Data"
  # http://www.twinsun.com/tz/tz-link.htm
-print 'Looking for new tzdata...'
+  print 'Looking for new tzdata...'
-ftp = ftplib.FTP('ftp.iana.org')
+  ftp = ftplib.FTP('ftp.iana.org')
-ftp.login()
+  ftp.login()
-ftp.cwd('tz/releases')
+  ftp.cwd('tz/releases')
-tzdata_filenames = []
+  tzdata_filenames = []
-for filename in ftp.nlst():
+  for filename in ftp.nlst():
-  if filename.startswith('tzdata20'):
+    if filename.startswith('tzdata20'):
-    tzdata_filenames.append(filename)
+      tzdata_filenames.append(filename)
-tzdata_filenames.sort()
+  tzdata_filenames.sort()
-# If you're several releases behind, we'll walk you through the upgrades one by one.
+  # If you're several releases behind, we'll walk you through the upgrades
-current_version = current_tzdata_version()
+  # one by one.
-current_filename = 'tzdata%s.tar.gz' % current_version
+  current_version = GetCurrentTzDataVersion()
-for filename in tzdata_filenames:
+  current_filename = '%s.tar.gz' % current_version
-  if filename > current_filename:
+  for filename in tzdata_filenames:
-    upgrade_to(ftp, filename)
+    if filename > current_filename:
-    sys.exit(0)
+      print 'Found new tzdata: %s' % filename
      UpgradeTo(ftp, filename)
      sys.exit(0)
-print 'You already have the latest tzdata (%s)!' % current_version
+  print 'You already have the latest tzdata (%s)!' % current_version
-sys.exit(0)
+  sys.exit(0)
 if __name__ == '__main__':
  main()
--- a/libc/tools/zoneinfo/generate-single-file
+++ b/libc/tools/zoneinfo/generate-single-file
@@ -1,77 +0,0 @@
 #!/usr/bin/python
 # Run with no arguments from any directory, with no special setup required.
 import ftplib
 import hashlib
 import os
 import re
 import shutil
 import string
 import struct
 import subprocess
 import sys
 import tarfile
 import tempfile
 # Find the bionic directory, searching upward from this script.
 bionic_libc_tools_zoneinfo_dir = os.path.realpath(os.path.dirname(sys.argv[0]))
 bionic_libc_tools_dir = os.path.dirname(bionic_libc_tools_zoneinfo_dir)
 bionic_libc_dir = os.path.dirname(bionic_libc_tools_dir)
 bionic_dir = os.path.dirname(bionic_libc_dir)
 bionic_libc_zoneinfo_dir = '%s/libc/zoneinfo' % bionic_dir
 if not os.path.isdir(bionic_libc_tools_zoneinfo_dir) or not os.path.isdir(bionic_libc_zoneinfo_dir):
  print "Couldn't find bionic/libc/tools/zoneinfo!"
  sys.exit(1)
 def current_tzdata_version():
  return open('%s/zoneinfo.version' % bionic_libc_zoneinfo_dir).readline().rstrip('\n')
 # TODO: make the regular "generate" script just output this format directly.
 # Open the output file.
 f = open('%s/tzdata' % bionic_libc_zoneinfo_dir, 'wb+')
 #  -- header
 # char[12] tzdata_version          -- 'tzdata2012f\0'
 # u32 file_format_version          -- probably won't need this, but just in case
 # u32 index_offset                 -- likewise
 # u32 data_offset
 # u32 zonetab_offset
 header_format = "! 12s i i i i"
 header_size = struct.calcsize(header_format)
 index_offset = header_size
 index_bytes = open('%s/zoneinfo.idx' % bionic_libc_zoneinfo_dir, "rb").read()
 index_size = len(index_bytes)
 data_offset = index_offset + index_size
 data_bytes = open('%s/zoneinfo.dat' % bionic_libc_zoneinfo_dir).read()
 data_size = len(data_bytes)
 zonetab_offset = 0 # TODO: data_offset + data_size
 tzdata_version = current_tzdata_version()
 file_format_version = 1
 header = struct.pack(header_format, 'tzdata%s' % tzdata_version, file_format_version, index_offset, data_offset, zonetab_offset)
 f.write(header)
 # -- index (@index_offset)
 # u8* index_bytes
 f.write(index_bytes)
 # -- data (@data_offset)
 # u8* data_bytes
 f.write(data_bytes)
 # TODO: zonetab
 # -- zonetab (@zonetab_offset)
 # u8* zonetab_bytes
 f.close()
 sys.exit(0)
--- a/libc/zoneinfo/zoneinfo.dat
+++ b/libc/zoneinfo/zoneinfo.dat
--- a/libc/zoneinfo/zoneinfo.idx
+++ b/libc/zoneinfo/zoneinfo.idx
--- a/libc/zoneinfo/zoneinfo.version
+++ b/libc/zoneinfo/zoneinfo.version
@@ -1 +0,0 @@
 2012g