diff --git a/Makefile b/Makefile index a6617a9..6520726 100644 --- a/Makefile +++ b/Makefile @@ -8,6 +8,7 @@ OBJSSO := $(WEBMOBJS:.o=_so.o) OBJECTS1 := sample.o OBJECTS2 := sample_muxer.o OBJECTS3 := dumpvtt.o vttreader.o webvttparser.o +OBJECTS4 := vttreader.o webvttparser.o sample_muxer_metadata.o INCLUDES := -I. EXES := samplemuxer sample dumpvtt @@ -16,7 +17,7 @@ all: $(EXES) sample: sample.o $(LIBWEBMA) $(CXX) $^ -o $@ -samplemuxer: sample_muxer.o $(LIBWEBMA) +samplemuxer: sample_muxer.o $(LIBWEBMA) $(OBJECTS4) $(CXX) $^ -o $@ dumpvtt: $(OBJECTS3) @@ -40,4 +41,4 @@ libwebm.so: $(OBJSSO) $(CXX) -c $(CXXFLAGS) -fPIC $(INCLUDES) $< -o $@ clean: - $(RM) -f $(OBJECTS1) $(OBJECTS2) $(OBJECTS3) $(OBJSA) $(OBJSSO) $(LIBWEBMA) $(LIBWEBMSO) $(EXES) Makefile.bak + $(RM) -f $(OBJECTS1) $(OBJECTS2) $(OBJECTS3) $(OBJECTS4) $(OBJSA) $(OBJSSO) $(LIBWEBMA) $(LIBWEBMSO) $(EXES) Makefile.bak diff --git a/mkvmuxer.cpp b/mkvmuxer.cpp index 20d9302..27ea7de 100644 --- a/mkvmuxer.cpp +++ b/mkvmuxer.cpp @@ -1965,27 +1965,15 @@ int Segment::TestFrame(uint64 track_number, if (frame_timecode < last_cluster_timecode) // should never happen return -1; // error - // Handle the case when the frame we are testing has a timestamp - // equal to the cluster's timestamp. This can happen if some - // non-video keyframe (that is, a WebVTT cue or audio block) first - // creates the initial cluster (at t=0), and then we test a video - // keyframe. We don't want to create a new cluster just yet (see - // the predicate below, which specifies the creation of a new - // cluster when a video keyframe is detected); instead we want to - // force the frame to be written to the existing cluster. - - if (frame_timecode == last_cluster_timecode) - return 0; - // If the frame has a timestamp significantly larger than the last // cluster (in Matroska, cluster-relative timestamps are serialized // using a 16-bit signed integer), then we cannot write this frame - // that cluster, and so we must create a new cluster. + // to that cluster, and so we must create a new cluster. const int64 delta_timecode = frame_timecode - last_cluster_timecode; if (delta_timecode > std::numeric_limits::max()) - return 1; + return 2; // We decide to create a new cluster when we have a video keyframe. // This will flush queued (audio) frames, and write the keyframe @@ -2095,24 +2083,31 @@ bool Segment::MakeNewCluster(uint64 frame_timestamp_ns) { bool Segment::DoNewClusterProcessing(uint64 track_number, uint64 frame_timestamp_ns, bool is_key) { - // Based on the characteristics of the current frame and current - // cluster, decide whether to create a new cluster. - const int result = TestFrame(track_number, frame_timestamp_ns, is_key); - if (result < 0) // error - return false; + for (;;) { + // Based on the characteristics of the current frame and current + // cluster, decide whether to create a new cluster. + const int result = TestFrame(track_number, frame_timestamp_ns, is_key); + if (result < 0) // error + return false; - // A non-zero result means create a new cluster. - if (result > 0 && !MakeNewCluster(frame_timestamp_ns)) - return false; + // A non-zero result means create a new cluster. + if (result > 0 && !MakeNewCluster(frame_timestamp_ns)) + return false; - // Write queued (audio) frames. - const int frame_count = WriteFramesAll(); - if (frame_count < 0) // error - return false; + // Write queued (audio) frames. + const int frame_count = WriteFramesAll(); + if (frame_count < 0) // error + return false; - // Write the current frame to the current cluster (if TestFrame - // returns 0) or to a newly created cluster (TestFrame returns 1). - return true; + // Write the current frame to the current cluster (if TestFrame + // returns 0) or to a newly created cluster (TestFrame returns 1). + if (result <= 1) + return true; + + // TestFrame returned 2, which means there was a large time + // difference between the cluster and the frame itself. Do the + // test again, comparing the frame to the new cluster. + } } bool Segment::CheckHeaderInfo() { diff --git a/mkvmuxer.hpp b/mkvmuxer.hpp index 3e9b592..537361e 100644 --- a/mkvmuxer.hpp +++ b/mkvmuxer.hpp @@ -834,6 +834,7 @@ class Segment { // -1 = error: an out-of-order frame was detected // 0 = do not create a new cluster, and write frame to the existing cluster // 1 = create a new cluster, and write frame to that new cluster + // 2 = create a new cluster, and re-run test int TestFrame(uint64 track_num, uint64 timestamp_ns, bool key) const; // Create a new cluster, using the earlier of the first enqueued diff --git a/mkvmuxerutil.cpp b/mkvmuxerutil.cpp index a22851a..87cc875 100644 --- a/mkvmuxerutil.cpp +++ b/mkvmuxerutil.cpp @@ -387,6 +387,8 @@ uint64 WriteMetadataBlock(IMkvWriter* writer, // We use a single byte for the track number of the block, which // means the block header is exactly 4 bytes. + // TODO(matthewjheaney): use EbmlMasterElementSize and WriteEbmlMasterElement + const uint64 block_payload_size = 4 + length; const int32 block_size = GetCodedUIntSize(block_payload_size); const uint64 block_elem_size = 1 + block_size + block_payload_size; @@ -437,7 +439,7 @@ uint64 WriteMetadataBlock(IMkvWriter* writer, // Write Duration element - if (WriteID(writer, kMkvDuration)) // 1-byte ID size + if (WriteID(writer, kMkvBlockDuration)) // 1-byte ID size return 0; if (WriteUInt(writer, duration_payload_size)) diff --git a/sample_muxer.cpp b/sample_muxer.cpp index 6f94861..baba2d2 100644 --- a/sample_muxer.cpp +++ b/sample_muxer.cpp @@ -9,6 +9,8 @@ #include #include #include +#include +#include // libwebm parser includes #include "mkvreader.hpp" @@ -19,6 +21,10 @@ #include "mkvwriter.hpp" #include "mkvmuxerutil.hpp" +#include "sample_muxer_metadata.h" + +using mkvmuxer::uint64; + namespace { void Usage() { @@ -47,13 +53,87 @@ void Usage() { printf("\n"); printf("Cues options:\n"); printf(" -output_cues_block_number >0 outputs cue block number\n"); + printf("\n"); + printf("Metadata options:\n"); + printf(" -webvtt-subtitles " + "add WebVTT subtitles as metadata track\n"); + printf(" -webvtt-captions " + "add WebVTT captions as metadata track\n"); + printf(" -webvtt-descriptions " + "add WebVTT descriptions as metadata track\n"); + printf(" -webvtt-metadata " + "add WebVTT subtitles as metadata track\n"); } -} //end namespace +struct MetadataFile { + const char* name; + SampleMuxerMetadata::Kind kind; +}; + +typedef std::list metadata_files_t; + +// Cache the WebVTT filenames specified as command-line args. +bool LoadMetadataFiles( + const metadata_files_t& files, + SampleMuxerMetadata* metadata) { + typedef metadata_files_t::const_iterator iter_t; + + iter_t i = files.begin(); + const iter_t j = files.end(); + + while (i != j) { + const metadata_files_t::value_type& v = *i++; + + if (!metadata->Load(v.name, v.kind)) + return false; + } + + return true; +} + +int ParseArgWebVTT( + char* argv[], + int* argv_index, + int argc_check, + metadata_files_t* metadata_files) { + int& i = *argv_index; + + enum { kCount = 4 }; + struct Arg { const char* name; SampleMuxerMetadata::Kind kind; }; + const Arg args[kCount] = { + { "-webvtt-subtitles", SampleMuxerMetadata::kSubtitles }, + { "-webvtt-captions", SampleMuxerMetadata::kCaptions }, + { "-webvtt-descriptions", SampleMuxerMetadata::kDescriptions }, + { "-webvtt-metadata", SampleMuxerMetadata::kMetadata } + }; + + for (int idx = 0; idx < kCount; ++idx) { + const Arg& arg = args[idx]; + + if (strcmp(arg.name, argv[i]) != 0) // no match + continue; + + ++i; // consume arg name here + + if (i > argc_check) { + printf("missing value for %s\n", arg.name); + return -1; // error + } + + MetadataFile f; + f.name = argv[i]; // arg value is consumed via caller's loop idx + f.kind = arg.kind; + + metadata_files->push_back(f); + return 1; // successfully parsed WebVTT arg + } + + return 0; // not a WebVTT arg +} + +} // end namespace int main(int argc, char* argv[]) { - using mkvmuxer::uint64; - char* input = NULL; char* output = NULL; @@ -78,6 +158,8 @@ int main(int argc, char* argv[]) { uint64 display_height = 0; uint64 stereo_mode = 0; + metadata_files_t metadata_files; + const int argc_check = argc - 1; for (int i = 1; i < argc; ++i) { char* end; @@ -130,6 +212,9 @@ int main(int argc, char* argv[]) { i < argc_check) { output_cues_block_number = strtol(argv[++i], &end, 10) == 0 ? false : true; + } else if (int e = ParseArgWebVTT(argv, &i, argc_check, &metadata_files)) { + if (e < 0) + return EXIT_FAILURE; } } @@ -204,12 +289,13 @@ int main(int argc, char* argv[]) { info->set_writing_app("sample_muxer"); // Set Tracks element attributes - enum { kVideoTrack = 1, kAudioTrack = 2 }; const mkvparser::Tracks* const parser_tracks = parser_segment->GetTracks(); unsigned long i = 0; uint64 vid_track = 0; // no track added uint64 aud_track = 0; // no track added + using mkvparser::Track; + while (i != parser_tracks->GetTracksCount()) { int track_num = i++; if (switch_tracks) @@ -226,7 +312,7 @@ int main(int argc, char* argv[]) { const long long track_type = parser_track->GetType(); - if (track_type == kVideoTrack && output_video) { + if (track_type == Track::kVideo && output_video) { // Get the video track from the parser const mkvparser::VideoTrack* const pVideoTrack = static_cast(parser_track); @@ -264,7 +350,7 @@ int main(int argc, char* argv[]) { if (rate > 0.0) { video->set_frame_rate(rate); } - } else if (track_type == kAudioTrack && output_audio) { + } else if (track_type == Track::kAudio && output_audio) { // Get the audio track from the parser const mkvparser::AudioTrack* const pAudioTrack = static_cast(parser_track); @@ -307,6 +393,17 @@ int main(int argc, char* argv[]) { } } + // We have created all the video and audio tracks. If any WebVTT + // files were specified as command-line args, then parse them and + // add a track to the output file corresponding to each metadata + // input file. + + SampleMuxerMetadata metadata; + metadata.Init(&muxer_segment); + + if (!LoadMetadataFiles(metadata_files, &metadata)) + return EXIT_FAILURE; + // Set Cues element attributes mkvmuxer::Cues* const cues = muxer_segment.GetCues(); cues->set_output_block_number(output_cues_block_number); @@ -339,11 +436,16 @@ int main(int argc, char* argv[]) { parser_tracks->GetTrackByNumber( static_cast(trackNum)); const long long track_type = parser_track->GetType(); + const long long time_ns = block->GetTime(cluster); - if ((track_type == kAudioTrack && output_audio) || - (track_type == kVideoTrack && output_video)) { + // Flush any metadata frames to the output file, before we write + // the current block. + if (!metadata.Write(time_ns)) + return EXIT_FAILURE; + + if ((track_type == Track::kAudio && output_audio) || + (track_type == Track::kVideo && output_video)) { const int frame_count = block->GetFrameCount(); - const long long time_ns = block->GetTime(cluster); const bool is_key = block->IsKey(); for (int i = 0; i < frame_count; ++i) { @@ -361,7 +463,7 @@ int main(int argc, char* argv[]) { return EXIT_FAILURE; uint64 track_num = vid_track; - if (track_type == kAudioTrack) + if (track_type == Track::kAudio) track_num = aud_track; if (!muxer_segment.AddFrame(data, @@ -387,6 +489,11 @@ int main(int argc, char* argv[]) { cluster = parser_segment->GetNext(cluster); } + // We have exhausted all video and audio frames in the input file. + // Flush any remaining metadata frames to the output file. + if (!metadata.Write(-1)) + return EXIT_FAILURE; + muxer_segment.Finalize(); delete [] data; @@ -397,6 +504,3 @@ int main(int argc, char* argv[]) { return EXIT_SUCCESS; } - - - diff --git a/sample_muxer_metadata.cc b/sample_muxer_metadata.cc new file mode 100644 index 0000000..0288b27 --- /dev/null +++ b/sample_muxer_metadata.cc @@ -0,0 +1,236 @@ +#include "sample_muxer_metadata.h" +#include +#include "vttreader.h" + +using std::string; + +SampleMuxerMetadata::SampleMuxerMetadata() : segment_(NULL) { +} + +void SampleMuxerMetadata::Init(mkvmuxer::Segment* s) { + segment_ = s; +} + +bool SampleMuxerMetadata::Load(const char* file, Kind kind) { + mkvmuxer::uint64 track_num; + + if (!AddTrack(kind, &track_num)) { + printf("Unable to add track for WebVTT file \"%s\"\n", file); + return false; + } + + return Parse(file, kind, track_num); +} + +bool SampleMuxerMetadata::Write(mkvmuxer::int64 time_ns) { + typedef cues_set_t::iterator iter_t; + + iter_t i = cues_set_.begin(); + const iter_t j = cues_set_.end(); + + while (i != j) { + const cues_set_t::value_type& v = *i; + + if (time_ns >= 0 && v > time_ns) + return true; // nothing else to do just yet + + if (!v.Write(segment_)) { + printf("\nCould not add metadata.\n"); + return false; // error + } + + cues_set_.erase(i++); + } + + return true; +} + +bool SampleMuxerMetadata::AddTrack( + Kind kind, + mkvmuxer::uint64* track_num) { + *track_num = 0; + + // Track number value 0 means "let muxer choose track number" + mkvmuxer::Track* const track = segment_->AddTrack(0); + + if (track == NULL) // error + return false; + + // Return the track number value chosen by the muxer + *track_num = track->number(); + + int type; + const char* codec_id; + + switch (kind) { + case kSubtitles: + type = 0x11; + codec_id = "D_WEBVTT/SUBTITLES"; + break; + + case kCaptions: + type = 0x11; + codec_id = "D_WEBVTT/CAPTIONS"; + break; + + case kDescriptions: + type = 0x21; + codec_id = "D_WEBVTT/DESCRIPTIONS"; + break; + + case kMetadata: + type = 0x21; + codec_id = "D_WEBVTT/METADATA"; + break; + + default: + return false; + } + + track->set_type(type); + track->set_codec_id(codec_id); + + // TODO(matthewjheaney): set name and language + + return true; +} + +bool SampleMuxerMetadata::Parse( + const char* file, + Kind /* kind */, + mkvmuxer::uint64 track_num) { + libwebvtt::VttReader r; + int e = r.Open(file); + + if (e) { + printf("Unable to open WebVTT file: \"%s\"\n", file); + return false; + } + + libwebvtt::Parser p(&r); + + e = p.Init(); + + if (e < 0) { // error + printf("Error parsing WebVTT file: \"%s\"\n", file); + return false; + } + + SortableCue cue; + cue.track_num = track_num; + + libwebvtt::Time t; + t.hours = -1; + + for (;;) { + cue_t& c = cue.cue; + e = p.Parse(&c); + + if (e < 0) { // error + printf("Error parsing WebVTT file: \"%s\"\n", file); + return false; + } + + if (e > 0) // EOF + return true; + + if (c.start_time >= t) { + t = c.start_time; + } else { + printf("bad WebVTT cue timestamp (out-of-order)\n"); + return false; + } + + if (c.stop_time < c.start_time) { + printf("bad WebVTT cue timestamp (stop < start)\n"); + return false; + } + + cues_set_.insert(cue); + } +} + +void SampleMuxerMetadata::MakeFrame(const cue_t& c, string* pf) { + pf->clear(); + WriteCueIdentifier(c.identifier, pf); + WriteCueSettings(c.settings, pf); + WriteCuePayload(c.payload, pf); +} + +void SampleMuxerMetadata::WriteCueIdentifier( + const string& identifier, + string* pf) { + pf->append(identifier); + pf->push_back('\x0A'); // LF +} + +void SampleMuxerMetadata::WriteCueSettings( + const cue_t::settings_t& settings, + string* pf) { + if (settings.empty()) { + pf->push_back('\x0A'); // LF + return; + } + + typedef cue_t::settings_t::const_iterator iter_t; + + iter_t i = settings.begin(); + const iter_t j = settings.end(); + + for (;;) { + const libwebvtt::Setting& setting = *i++; + + pf->append(setting.name); + pf->push_back(':'); + pf->append(setting.value); + + if (i == j) + break; + + pf->push_back(' '); // separate settings with whitespace + } + + pf->push_back('\x0A'); // LF +} + +void SampleMuxerMetadata::WriteCuePayload( + const cue_t::payload_t& payload, + string* pf) { + typedef cue_t::payload_t::const_iterator iter_t; + + iter_t i = payload.begin(); + const iter_t j = payload.end(); + + while (i != j) { + const string& line = *i++; + pf->append(line); + pf->push_back('\x0A'); // LF + } +} + +bool SampleMuxerMetadata::SortableCue::Write( + mkvmuxer::Segment* segment) const { + // Cue start time expressed in milliseconds + const mkvmuxer::int64 start_ms = cue.start_time.presentation(); + + // Cue start time expressed in nanoseconds (MKV time) + const mkvmuxer::int64 start_ns = start_ms * 1000000; + + // Cue stop time expressed in milliseconds + const mkvmuxer::int64 stop_ms = cue.stop_time.presentation(); + + // Cue stop time expressed in nanonseconds + const mkvmuxer::int64 stop_ns = stop_ms * 1000000; + + // Metadata blocks always specify the block duration. + const mkvmuxer::int64 duration_ns = stop_ns - start_ns; + + string frame; + MakeFrame(cue, &frame); + + typedef const mkvmuxer::uint8* data_t; + const data_t buf = reinterpret_cast(frame.data()); + const mkvmuxer::uint64 len = frame.length(); + + return segment->AddMetadata(buf, len, track_num, start_ns, duration_ns); +} diff --git a/sample_muxer_metadata.h b/sample_muxer_metadata.h new file mode 100644 index 0000000..3f9b87f --- /dev/null +++ b/sample_muxer_metadata.h @@ -0,0 +1,112 @@ +// Copyright (c) 2012 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +#ifndef SAMPLE_MUXER_METADATA_H_ // NOLINT +#define SAMPLE_MUXER_METADATA_H_ + +#include +#include + +#include "mkvmuxer.hpp" +#include "webvttparser.h" + +class SampleMuxerMetadata { + public: + enum Kind { + kSubtitles, + kCaptions, + kDescriptions, + kMetadata + }; + + SampleMuxerMetadata(); + + // Bind this metadata object to the muxer instance. + void Init(mkvmuxer::Segment* segment); + + // Parse the WebVTT file |filename| having the indicated |kind|, and + // create a corresponding track in the segment. Returns false on + // error. + bool Load(const char* filename, Kind kind); + + // Write any WebVTT cues whose time is less or equal to |time_ns| as + // a metadata block in its corresponding track. If |time_ns| is + // negative, write all remaining cues. Returns false on error. + bool Write(mkvmuxer::int64 time_ns); + + private: + typedef libwebvtt::Cue cue_t; + + // Used to sort cues as they are loaded. + struct SortableCue { + bool operator>(mkvmuxer::int64 time_ns) const { + // Cue start time expressed in milliseconds + const mkvmuxer::int64 start_ms = cue.start_time.presentation(); + + // Cue start time expressed in nanoseconds (MKV time) + const mkvmuxer::int64 start_ns = start_ms * 1000000; + + return (start_ns > time_ns); + } + + bool operator<(const SortableCue& rhs) const { + if (cue.start_time < rhs.cue.start_time) + return true; + + if (cue.start_time > rhs.cue.start_time) + return false; + + return (track_num < rhs.track_num); + } + + // Write this cue as a metablock to |segment|. Returns false on + // error. + bool Write(mkvmuxer::Segment* segment) const; + + mkvmuxer::uint64 track_num; + cue_t cue; + }; + + typedef std::multiset cues_set_t; + + // Add a metadata track to the segment having the indicated |kind|, + // returning the |track_num| that has been chosen for this track. + // Returns false on error. + bool AddTrack(Kind kind, mkvmuxer::uint64* track_num); + + // Parse the WebVTT |file| having the indicated |kind| and + // |track_num|, adding each parsed cue to cues set. Returns false + // on error. + bool Parse(const char* file, Kind kind, mkvmuxer::uint64 track_num); + + // Converts a WebVTT cue to a Matroska metadata block. + static void MakeFrame(const cue_t& cue, std::string* frame); + + // Populate the cue identifier part of the metadata block. + static void WriteCueIdentifier(const std::string& identifier, + std::string* frame); + + // Populate the cue settings part of the metadata block. + static void WriteCueSettings(const cue_t::settings_t& settings, + std::string* frame); + + // Populate the payload part of the metadata block. + static void WriteCuePayload(const cue_t::payload_t& payload, + std::string* frame); + + mkvmuxer::Segment* segment_; + + // Set of cues ordered by time and then by track number. + cues_set_t cues_set_; + + // Disable copy ctor and copy assign. + SampleMuxerMetadata(const SampleMuxerMetadata&); + SampleMuxerMetadata& operator=(const SampleMuxerMetadata&); +}; + +#endif // SAMPLE_MUXER_METADATA_H_ // NOLINT