sample_muxer: added WebVTT support

Change-Id: If72d31ca4828adf39e4637003979a314e5dda98e
2012-08-14 16:40:33 -07:00
parent 8f0c3333d1
commit 7ef225de9f
7 changed files with 496 additions and 45 deletions
--- a/5
+++ b/5
@@ -8,6 +8,7 @@ OBJSSO    := $(WEBMOBJS:.o=_so.o)
 OBJECTS1  := sample.o
 OBJECTS2  := sample_muxer.o
 OBJECTS3  := dumpvtt.o vttreader.o webvttparser.o
 OBJECTS4  := vttreader.o webvttparser.o sample_muxer_metadata.o
 INCLUDES  := -I.
 EXES      := samplemuxer sample dumpvtt
@@ -16,7 +17,7 @@ all: $(EXES)
 sample: sample.o $(LIBWEBMA)
 	$(CXX) $^ -o $@
-samplemuxer: sample_muxer.o $(LIBWEBMA)
+samplemuxer: sample_muxer.o $(LIBWEBMA) $(OBJECTS4)
 	$(CXX) $^ -o $@
 dumpvtt: $(OBJECTS3)
@@ -40,4 +41,4 @@ libwebm.so: $(OBJSSO)
 	$(CXX) -c $(CXXFLAGS) -fPIC $(INCLUDES) $< -o $@
 clean:
-	$(RM) -f $(OBJECTS1) $(OBJECTS2) $(OBJECTS3) $(OBJSA) $(OBJSSO) $(LIBWEBMA) $(LIBWEBMSO) $(EXES) Makefile.bak
+	$(RM) -f $(OBJECTS1) $(OBJECTS2) $(OBJECTS3) $(OBJECTS4) $(OBJSA) $(OBJSSO) $(LIBWEBMA) $(LIBWEBMSO) $(EXES) Makefile.bak
--- a/mkvmuxer.cpp
+++ b/mkvmuxer.cpp
@@ -1965,27 +1965,15 @@ int Segment::TestFrame(uint64 track_number,
  if (frame_timecode < last_cluster_timecode)  // should never happen
    return -1;  // error
  // Handle the case when the frame we are testing has a timestamp
  // equal to the cluster's timestamp.  This can happen if some
  // non-video keyframe (that is, a WebVTT cue or audio block) first
  // creates the initial cluster (at t=0), and then we test a video
  // keyframe.  We don't want to create a new cluster just yet (see
  // the predicate below, which specifies the creation of a new
  // cluster when a video keyframe is detected); instead we want to
  // force the frame to be written to the existing cluster.
  if (frame_timecode == last_cluster_timecode)
    return 0;
  // If the frame has a timestamp significantly larger than the last
  // cluster (in Matroska, cluster-relative timestamps are serialized
  // using a 16-bit signed integer), then we cannot write this frame
-  // that cluster, and so we must create a new cluster.
+  // to that cluster, and so we must create a new cluster.
  const int64 delta_timecode = frame_timecode - last_cluster_timecode;
  if (delta_timecode > std::numeric_limits<int16>::max())
-    return 1;
+    return 2;
  // We decide to create a new cluster when we have a video keyframe.
  // This will flush queued (audio) frames, and write the keyframe
@@ -2095,6 +2083,7 @@ bool Segment::MakeNewCluster(uint64 frame_timestamp_ns) {
 bool Segment::DoNewClusterProcessing(uint64 track_number,
                                     uint64 frame_timestamp_ns,
                                     bool is_key) {
  for (;;) {
    // Based on the characteristics of the current frame and current
    // cluster, decide whether to create a new cluster.
    const int result = TestFrame(track_number, frame_timestamp_ns, is_key);
@@ -2112,7 +2101,13 @@ bool Segment::DoNewClusterProcessing(uint64 track_number,
    // Write the current frame to the current cluster (if TestFrame
    // returns 0) or to a newly created cluster (TestFrame returns 1).
    if (result <= 1)
      return true;
    // TestFrame returned 2, which means there was a large time
    // difference between the cluster and the frame itself.  Do the
    // test again, comparing the frame to the new cluster.
  }
 }
 bool Segment::CheckHeaderInfo() {
--- a/mkvmuxer.hpp
+++ b/mkvmuxer.hpp
@@ -834,6 +834,7 @@ class Segment {
  //  -1 = error: an out-of-order frame was detected
  //  0 = do not create a new cluster, and write frame to the existing cluster
  //  1 = create a new cluster, and write frame to that new cluster
  //  2 = create a new cluster, and re-run test
  int TestFrame(uint64 track_num, uint64 timestamp_ns, bool key) const;
  // Create a new cluster, using the earlier of the first enqueued
--- a/mkvmuxerutil.cpp
+++ b/mkvmuxerutil.cpp
@@ -387,6 +387,8 @@ uint64 WriteMetadataBlock(IMkvWriter* writer,
  // We use a single byte for the track number of the block, which
  // means the block header is exactly 4 bytes.
  // TODO(matthewjheaney): use EbmlMasterElementSize and WriteEbmlMasterElement
  const uint64 block_payload_size = 4 + length;
  const int32 block_size = GetCodedUIntSize(block_payload_size);
  const uint64 block_elem_size = 1 + block_size + block_payload_size;
@@ -437,7 +439,7 @@ uint64 WriteMetadataBlock(IMkvWriter* writer,
  // Write Duration element
-  if (WriteID(writer, kMkvDuration))  // 1-byte ID size
+  if (WriteID(writer, kMkvBlockDuration))  // 1-byte ID size
    return 0;
  if (WriteUInt(writer, duration_payload_size))
--- a/sample_muxer.cpp
+++ b/sample_muxer.cpp
@@ -9,6 +9,8 @@
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include <list>
 #include <string>
 // libwebm parser includes
 #include "mkvreader.hpp"
@@ -19,6 +21,10 @@
 #include "mkvwriter.hpp"
 #include "mkvmuxerutil.hpp"
 #include "sample_muxer_metadata.h"
 using mkvmuxer::uint64;
 namespace {
 void Usage() {
@@ -47,13 +53,87 @@ void Usage() {
  printf("\n");
  printf("Cues options:\n");
  printf("  -output_cues_block_number <int> >0 outputs cue block number\n");
  printf("\n");
  printf("Metadata options:\n");
  printf("  -webvtt-subtitles <vttfile>    "
         "add WebVTT subtitles as metadata track\n");
  printf("  -webvtt-captions <vttfile>     "
         "add WebVTT captions as metadata track\n");
  printf("  -webvtt-descriptions <vttfile> "
         "add WebVTT descriptions as metadata track\n");
  printf("  -webvtt-metadata <vttfile>     "
         "add WebVTT subtitles as metadata track\n");
 }
-} //end namespace
+struct MetadataFile {
  const char* name;
  SampleMuxerMetadata::Kind kind;
 };
 typedef std::list<MetadataFile> metadata_files_t;
 // Cache the WebVTT filenames specified as command-line args.
 bool LoadMetadataFiles(
    const metadata_files_t& files,
    SampleMuxerMetadata* metadata) {
  typedef metadata_files_t::const_iterator iter_t;
  iter_t i = files.begin();
  const iter_t j = files.end();
  while (i != j) {
    const metadata_files_t::value_type& v = *i++;
    if (!metadata->Load(v.name, v.kind))
      return false;
  }
  return true;
 }
 int ParseArgWebVTT(
    char* argv[],
    int* argv_index,
    int argc_check,
    metadata_files_t* metadata_files) {
  int& i = *argv_index;
  enum { kCount = 4 };
  struct Arg { const char* name; SampleMuxerMetadata::Kind kind; };
  const Arg args[kCount] = {
    { "-webvtt-subtitles", SampleMuxerMetadata::kSubtitles },
    { "-webvtt-captions", SampleMuxerMetadata::kCaptions },
    { "-webvtt-descriptions", SampleMuxerMetadata::kDescriptions },
    { "-webvtt-metadata", SampleMuxerMetadata::kMetadata }
  };
  for (int idx = 0; idx < kCount; ++idx) {
    const Arg& arg = args[idx];
    if (strcmp(arg.name, argv[i]) != 0)  // no match
      continue;
    ++i;  // consume arg name here
    if (i > argc_check) {
      printf("missing value for %s\n", arg.name);
      return -1;  // error
    }
    MetadataFile f;
    f.name = argv[i];  // arg value is consumed via caller's loop idx
    f.kind = arg.kind;
    metadata_files->push_back(f);
    return 1;  // successfully parsed WebVTT arg
  }
  return 0;  // not a WebVTT arg
 }
 } // end namespace
 int main(int argc, char* argv[]) {
  using mkvmuxer::uint64;
  char* input = NULL;
  char* output = NULL;
@@ -78,6 +158,8 @@ int main(int argc, char* argv[]) {
  uint64 display_height = 0;
  uint64 stereo_mode = 0;
  metadata_files_t metadata_files;
  const int argc_check = argc - 1;
  for (int i = 1; i < argc; ++i) {
    char* end;
@@ -130,6 +212,9 @@ int main(int argc, char* argv[]) {
               i < argc_check) {
      output_cues_block_number =
          strtol(argv[++i], &end, 10) == 0 ? false : true;
    } else if (int e = ParseArgWebVTT(argv, &i, argc_check, &metadata_files)) {
      if (e < 0)
        return EXIT_FAILURE;
    }
  }
@@ -204,12 +289,13 @@ int main(int argc, char* argv[]) {
  info->set_writing_app("sample_muxer");
  // Set Tracks element attributes
  enum { kVideoTrack = 1, kAudioTrack = 2 };
  const mkvparser::Tracks* const parser_tracks = parser_segment->GetTracks();
  unsigned long i = 0;
  uint64 vid_track = 0; // no track added
  uint64 aud_track = 0; // no track added
  using mkvparser::Track;
  while (i != parser_tracks->GetTracksCount()) {
    int track_num = i++;
    if (switch_tracks)
@@ -226,7 +312,7 @@ int main(int argc, char* argv[]) {
    const long long track_type = parser_track->GetType();
-    if (track_type == kVideoTrack && output_video) {
+    if (track_type == Track::kVideo && output_video) {
      // Get the video track from the parser
      const mkvparser::VideoTrack* const pVideoTrack =
          static_cast<const mkvparser::VideoTrack*>(parser_track);
@@ -264,7 +350,7 @@ int main(int argc, char* argv[]) {
      if (rate > 0.0) {
        video->set_frame_rate(rate);
      }
-    } else if (track_type == kAudioTrack && output_audio) {
+    } else if (track_type == Track::kAudio && output_audio) {
      // Get the audio track from the parser
      const mkvparser::AudioTrack* const pAudioTrack =
          static_cast<const mkvparser::AudioTrack*>(parser_track);
@@ -307,6 +393,17 @@ int main(int argc, char* argv[]) {
    }
  }
  // We have created all the video and audio tracks.  If any WebVTT
  // files were specified as command-line args, then parse them and
  // add a track to the output file corresponding to each metadata
  // input file.
  SampleMuxerMetadata metadata;
  metadata.Init(&muxer_segment);
  if (!LoadMetadataFiles(metadata_files, &metadata))
    return EXIT_FAILURE;
  // Set Cues element attributes
  mkvmuxer::Cues* const cues = muxer_segment.GetCues();
  cues->set_output_block_number(output_cues_block_number);
@@ -339,11 +436,16 @@ int main(int argc, char* argv[]) {
          parser_tracks->GetTrackByNumber(
              static_cast<unsigned long>(trackNum));
      const long long track_type = parser_track->GetType();
      if ((track_type == kAudioTrack && output_audio) ||
          (track_type == kVideoTrack && output_video)) {
        const int frame_count = block->GetFrameCount();
      const long long time_ns = block->GetTime(cluster);
      // Flush any metadata frames to the output file, before we write
      // the current block.
      if (!metadata.Write(time_ns))
        return EXIT_FAILURE;
      if ((track_type == Track::kAudio && output_audio) ||
          (track_type == Track::kVideo && output_video)) {
        const int frame_count = block->GetFrameCount();
        const bool is_key = block->IsKey();
        for (int i = 0; i < frame_count; ++i) {
@@ -361,7 +463,7 @@ int main(int argc, char* argv[]) {
            return EXIT_FAILURE;
          uint64 track_num = vid_track;
-          if (track_type == kAudioTrack)
+          if (track_type == Track::kAudio)
            track_num = aud_track;
          if (!muxer_segment.AddFrame(data,
@@ -387,6 +489,11 @@ int main(int argc, char* argv[]) {
    cluster = parser_segment->GetNext(cluster);
  }
  // We have exhausted all video and audio frames in the input file.
  // Flush any remaining metadata frames to the output file.
  if (!metadata.Write(-1))
    return EXIT_FAILURE;
  muxer_segment.Finalize();
  delete [] data;
@@ -397,6 +504,3 @@ int main(int argc, char* argv[]) {
  return EXIT_SUCCESS;
 }
--- a/sample_muxer_metadata.cc
+++ b/sample_muxer_metadata.cc
@@ -0,0 +1,236 @@
 #include "sample_muxer_metadata.h"
 #include <string>
 #include "vttreader.h"
 using std::string;
 SampleMuxerMetadata::SampleMuxerMetadata() : segment_(NULL) {
 }
 void SampleMuxerMetadata::Init(mkvmuxer::Segment* s) {
  segment_ = s;
 }
 bool SampleMuxerMetadata::Load(const char* file, Kind kind) {
  mkvmuxer::uint64 track_num;
  if (!AddTrack(kind, &track_num)) {
    printf("Unable to add track for WebVTT file \"%s\"\n", file);
    return false;
  }
  return Parse(file, kind, track_num);
 }
 bool SampleMuxerMetadata::Write(mkvmuxer::int64 time_ns) {
  typedef cues_set_t::iterator iter_t;
  iter_t i = cues_set_.begin();
  const iter_t j = cues_set_.end();
  while (i != j) {
    const cues_set_t::value_type& v = *i;
    if (time_ns >= 0 && v > time_ns)
      return true;  // nothing else to do just yet
    if (!v.Write(segment_)) {
      printf("\nCould not add metadata.\n");
      return false;  // error
    }
    cues_set_.erase(i++);
  }
  return true;
 }
 bool SampleMuxerMetadata::AddTrack(
    Kind kind,
    mkvmuxer::uint64* track_num) {
  *track_num = 0;
  // Track number value 0 means "let muxer choose track number"
  mkvmuxer::Track* const track = segment_->AddTrack(0);
  if (track == NULL)  // error
    return false;
  // Return the track number value chosen by the muxer
  *track_num = track->number();
  int type;
  const char* codec_id;
  switch (kind) {
  case kSubtitles:
    type = 0x11;
    codec_id = "D_WEBVTT/SUBTITLES";
    break;
  case kCaptions:
    type = 0x11;
    codec_id = "D_WEBVTT/CAPTIONS";
    break;
  case kDescriptions:
    type = 0x21;
    codec_id = "D_WEBVTT/DESCRIPTIONS";
    break;
  case kMetadata:
    type = 0x21;
    codec_id = "D_WEBVTT/METADATA";
    break;
  default:
    return false;
  }
  track->set_type(type);
  track->set_codec_id(codec_id);
  // TODO(matthewjheaney): set name and language
  return true;
 }
 bool SampleMuxerMetadata::Parse(
    const char* file,
    Kind /* kind */,
    mkvmuxer::uint64 track_num) {
  libwebvtt::VttReader r;
  int e = r.Open(file);
  if (e) {
    printf("Unable to open WebVTT file: \"%s\"\n", file);
    return false;
  }
  libwebvtt::Parser p(&r);
  e = p.Init();
  if (e < 0) {  // error
    printf("Error parsing WebVTT file: \"%s\"\n", file);
    return false;
  }
  SortableCue cue;
  cue.track_num = track_num;
  libwebvtt::Time t;
  t.hours = -1;
  for (;;) {
    cue_t& c = cue.cue;
    e = p.Parse(&c);
    if (e < 0) {  // error
      printf("Error parsing WebVTT file: \"%s\"\n", file);
      return false;
    }
    if (e > 0)  // EOF
      return true;
    if (c.start_time >= t) {
      t = c.start_time;
    } else {
      printf("bad WebVTT cue timestamp (out-of-order)\n");
      return false;
    }
    if (c.stop_time < c.start_time) {
      printf("bad WebVTT cue timestamp (stop < start)\n");
      return false;
    }
    cues_set_.insert(cue);
  }
 }
 void SampleMuxerMetadata::MakeFrame(const cue_t& c, string* pf) {
  pf->clear();
  WriteCueIdentifier(c.identifier, pf);
  WriteCueSettings(c.settings, pf);
  WriteCuePayload(c.payload, pf);
 }
 void SampleMuxerMetadata::WriteCueIdentifier(
    const string& identifier,
    string* pf) {
  pf->append(identifier);
  pf->push_back('\x0A');  // LF
 }
 void SampleMuxerMetadata::WriteCueSettings(
    const cue_t::settings_t& settings,
    string* pf) {
  if (settings.empty()) {
    pf->push_back('\x0A');  // LF
    return;
  }
  typedef cue_t::settings_t::const_iterator iter_t;
  iter_t i = settings.begin();
  const iter_t j = settings.end();
  for (;;) {
    const libwebvtt::Setting& setting = *i++;
    pf->append(setting.name);
    pf->push_back(':');
    pf->append(setting.value);
    if (i == j)
      break;
    pf->push_back(' ');  // separate settings with whitespace
  }
  pf->push_back('\x0A');  // LF
 }
 void SampleMuxerMetadata::WriteCuePayload(
    const cue_t::payload_t& payload,
    string* pf) {
  typedef cue_t::payload_t::const_iterator iter_t;
  iter_t i = payload.begin();
  const iter_t j = payload.end();
  while (i != j) {
    const string& line = *i++;
    pf->append(line);
    pf->push_back('\x0A');  // LF
  }
 }
 bool SampleMuxerMetadata::SortableCue::Write(
    mkvmuxer::Segment* segment) const {
  // Cue start time expressed in milliseconds
  const mkvmuxer::int64 start_ms = cue.start_time.presentation();
  // Cue start time expressed in nanoseconds (MKV time)
  const mkvmuxer::int64 start_ns = start_ms * 1000000;
  // Cue stop time expressed in milliseconds
  const mkvmuxer::int64 stop_ms = cue.stop_time.presentation();
  // Cue stop time expressed in nanonseconds
  const mkvmuxer::int64 stop_ns = stop_ms * 1000000;
  // Metadata blocks always specify the block duration.
  const mkvmuxer::int64 duration_ns = stop_ns - start_ns;
  string frame;
  MakeFrame(cue, &frame);
  typedef const mkvmuxer::uint8* data_t;
  const data_t buf = reinterpret_cast<data_t>(frame.data());
  const mkvmuxer::uint64 len = frame.length();
  return segment->AddMetadata(buf, len, track_num, start_ns, duration_ns);
 }
--- a/sample_muxer_metadata.h
+++ b/sample_muxer_metadata.h
@@ -0,0 +1,112 @@
 // Copyright (c) 2012 The WebM project authors. All Rights Reserved.
 //
 // Use of this source code is governed by a BSD-style license
 // that can be found in the LICENSE file in the root of the source
 // tree. An additional intellectual property rights grant can be found
 // in the file PATENTS.  All contributing project authors may
 // be found in the AUTHORS file in the root of the source tree.
 #ifndef SAMPLE_MUXER_METADATA_H_  // NOLINT
 #define SAMPLE_MUXER_METADATA_H_
 #include <list>
 #include <set>
 #include "mkvmuxer.hpp"
 #include "webvttparser.h"
 class SampleMuxerMetadata {
 public:
  enum Kind {
    kSubtitles,
    kCaptions,
    kDescriptions,
    kMetadata
  };
  SampleMuxerMetadata();
  // Bind this metadata object to the muxer instance.
  void Init(mkvmuxer::Segment* segment);
  // Parse the WebVTT file |filename| having the indicated |kind|, and
  // create a corresponding track in the segment.  Returns false on
  // error.
  bool Load(const char* filename, Kind kind);
  // Write any WebVTT cues whose time is less or equal to |time_ns| as
  // a metadata block in its corresponding track.  If |time_ns| is
  // negative, write all remaining cues. Returns false on error.
  bool Write(mkvmuxer::int64 time_ns);
 private:
  typedef libwebvtt::Cue cue_t;
  // Used to sort cues as they are loaded.
  struct SortableCue {
    bool operator>(mkvmuxer::int64 time_ns) const {
      // Cue start time expressed in milliseconds
      const mkvmuxer::int64 start_ms = cue.start_time.presentation();
      // Cue start time expressed in nanoseconds (MKV time)
      const mkvmuxer::int64 start_ns = start_ms * 1000000;
      return (start_ns > time_ns);
    }
    bool operator<(const SortableCue& rhs) const {
      if (cue.start_time < rhs.cue.start_time)
        return true;
      if (cue.start_time > rhs.cue.start_time)
        return false;
      return (track_num < rhs.track_num);
    }
    // Write this cue as a metablock to |segment|.  Returns false on
    // error.
    bool Write(mkvmuxer::Segment* segment) const;
    mkvmuxer::uint64 track_num;
    cue_t cue;
  };
  typedef std::multiset<SortableCue> cues_set_t;
  // Add a metadata track to the segment having the indicated |kind|,
  // returning the |track_num| that has been chosen for this track.
  // Returns false on error.
  bool AddTrack(Kind kind, mkvmuxer::uint64* track_num);
  // Parse the WebVTT |file| having the indicated |kind| and
  // |track_num|, adding each parsed cue to cues set.  Returns false
  // on error.
  bool Parse(const char* file, Kind kind, mkvmuxer::uint64 track_num);
  // Converts a WebVTT cue to a Matroska metadata block.
  static void MakeFrame(const cue_t& cue, std::string* frame);
  // Populate the cue identifier part of the metadata block.
  static void WriteCueIdentifier(const std::string& identifier,
                                 std::string* frame);
  // Populate the cue settings part of the metadata block.
  static void WriteCueSettings(const cue_t::settings_t& settings,
                               std::string* frame);
  // Populate the payload part of the metadata block.
  static void WriteCuePayload(const cue_t::payload_t& payload,
                              std::string* frame);
  mkvmuxer::Segment* segment_;
  // Set of cues ordered by time and then by track number.
  cues_set_t cues_set_;
  // Disable copy ctor and copy assign.
  SampleMuxerMetadata(const SampleMuxerMetadata&);
  SampleMuxerMetadata& operator=(const SampleMuxerMetadata&);
 };
 #endif  // SAMPLE_MUXER_METADATA_H_  // NOLINT