sample_muxer: added WebVTT support

Change-Id: If72d31ca4828adf39e4637003979a314e5dda98e
This commit is contained in:
Matthew Heaney
2012-08-14 16:40:33 -07:00
parent 8f0c3333d1
commit 7ef225de9f
7 changed files with 496 additions and 45 deletions

View File

@@ -8,6 +8,7 @@ OBJSSO := $(WEBMOBJS:.o=_so.o)
OBJECTS1 := sample.o
OBJECTS2 := sample_muxer.o
OBJECTS3 := dumpvtt.o vttreader.o webvttparser.o
OBJECTS4 := vttreader.o webvttparser.o sample_muxer_metadata.o
INCLUDES := -I.
EXES := samplemuxer sample dumpvtt
@@ -16,7 +17,7 @@ all: $(EXES)
sample: sample.o $(LIBWEBMA)
$(CXX) $^ -o $@
samplemuxer: sample_muxer.o $(LIBWEBMA)
samplemuxer: sample_muxer.o $(LIBWEBMA) $(OBJECTS4)
$(CXX) $^ -o $@
dumpvtt: $(OBJECTS3)
@@ -40,4 +41,4 @@ libwebm.so: $(OBJSSO)
$(CXX) -c $(CXXFLAGS) -fPIC $(INCLUDES) $< -o $@
clean:
$(RM) -f $(OBJECTS1) $(OBJECTS2) $(OBJECTS3) $(OBJSA) $(OBJSSO) $(LIBWEBMA) $(LIBWEBMSO) $(EXES) Makefile.bak
$(RM) -f $(OBJECTS1) $(OBJECTS2) $(OBJECTS3) $(OBJECTS4) $(OBJSA) $(OBJSSO) $(LIBWEBMA) $(LIBWEBMSO) $(EXES) Makefile.bak

View File

@@ -1965,27 +1965,15 @@ int Segment::TestFrame(uint64 track_number,
if (frame_timecode < last_cluster_timecode) // should never happen
return -1; // error
// Handle the case when the frame we are testing has a timestamp
// equal to the cluster's timestamp. This can happen if some
// non-video keyframe (that is, a WebVTT cue or audio block) first
// creates the initial cluster (at t=0), and then we test a video
// keyframe. We don't want to create a new cluster just yet (see
// the predicate below, which specifies the creation of a new
// cluster when a video keyframe is detected); instead we want to
// force the frame to be written to the existing cluster.
if (frame_timecode == last_cluster_timecode)
return 0;
// If the frame has a timestamp significantly larger than the last
// cluster (in Matroska, cluster-relative timestamps are serialized
// using a 16-bit signed integer), then we cannot write this frame
// that cluster, and so we must create a new cluster.
// to that cluster, and so we must create a new cluster.
const int64 delta_timecode = frame_timecode - last_cluster_timecode;
if (delta_timecode > std::numeric_limits<int16>::max())
return 1;
return 2;
// We decide to create a new cluster when we have a video keyframe.
// This will flush queued (audio) frames, and write the keyframe
@@ -2095,24 +2083,31 @@ bool Segment::MakeNewCluster(uint64 frame_timestamp_ns) {
bool Segment::DoNewClusterProcessing(uint64 track_number,
uint64 frame_timestamp_ns,
bool is_key) {
// Based on the characteristics of the current frame and current
// cluster, decide whether to create a new cluster.
const int result = TestFrame(track_number, frame_timestamp_ns, is_key);
if (result < 0) // error
return false;
for (;;) {
// Based on the characteristics of the current frame and current
// cluster, decide whether to create a new cluster.
const int result = TestFrame(track_number, frame_timestamp_ns, is_key);
if (result < 0) // error
return false;
// A non-zero result means create a new cluster.
if (result > 0 && !MakeNewCluster(frame_timestamp_ns))
return false;
// A non-zero result means create a new cluster.
if (result > 0 && !MakeNewCluster(frame_timestamp_ns))
return false;
// Write queued (audio) frames.
const int frame_count = WriteFramesAll();
if (frame_count < 0) // error
return false;
// Write queued (audio) frames.
const int frame_count = WriteFramesAll();
if (frame_count < 0) // error
return false;
// Write the current frame to the current cluster (if TestFrame
// returns 0) or to a newly created cluster (TestFrame returns 1).
return true;
// Write the current frame to the current cluster (if TestFrame
// returns 0) or to a newly created cluster (TestFrame returns 1).
if (result <= 1)
return true;
// TestFrame returned 2, which means there was a large time
// difference between the cluster and the frame itself. Do the
// test again, comparing the frame to the new cluster.
}
}
bool Segment::CheckHeaderInfo() {

View File

@@ -834,6 +834,7 @@ class Segment {
// -1 = error: an out-of-order frame was detected
// 0 = do not create a new cluster, and write frame to the existing cluster
// 1 = create a new cluster, and write frame to that new cluster
// 2 = create a new cluster, and re-run test
int TestFrame(uint64 track_num, uint64 timestamp_ns, bool key) const;
// Create a new cluster, using the earlier of the first enqueued

View File

@@ -387,6 +387,8 @@ uint64 WriteMetadataBlock(IMkvWriter* writer,
// We use a single byte for the track number of the block, which
// means the block header is exactly 4 bytes.
// TODO(matthewjheaney): use EbmlMasterElementSize and WriteEbmlMasterElement
const uint64 block_payload_size = 4 + length;
const int32 block_size = GetCodedUIntSize(block_payload_size);
const uint64 block_elem_size = 1 + block_size + block_payload_size;
@@ -437,7 +439,7 @@ uint64 WriteMetadataBlock(IMkvWriter* writer,
// Write Duration element
if (WriteID(writer, kMkvDuration)) // 1-byte ID size
if (WriteID(writer, kMkvBlockDuration)) // 1-byte ID size
return 0;
if (WriteUInt(writer, duration_payload_size))

View File

@@ -9,6 +9,8 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <list>
#include <string>
// libwebm parser includes
#include "mkvreader.hpp"
@@ -19,6 +21,10 @@
#include "mkvwriter.hpp"
#include "mkvmuxerutil.hpp"
#include "sample_muxer_metadata.h"
using mkvmuxer::uint64;
namespace {
void Usage() {
@@ -47,13 +53,87 @@ void Usage() {
printf("\n");
printf("Cues options:\n");
printf(" -output_cues_block_number <int> >0 outputs cue block number\n");
printf("\n");
printf("Metadata options:\n");
printf(" -webvtt-subtitles <vttfile> "
"add WebVTT subtitles as metadata track\n");
printf(" -webvtt-captions <vttfile> "
"add WebVTT captions as metadata track\n");
printf(" -webvtt-descriptions <vttfile> "
"add WebVTT descriptions as metadata track\n");
printf(" -webvtt-metadata <vttfile> "
"add WebVTT subtitles as metadata track\n");
}
} //end namespace
struct MetadataFile {
const char* name;
SampleMuxerMetadata::Kind kind;
};
typedef std::list<MetadataFile> metadata_files_t;
// Cache the WebVTT filenames specified as command-line args.
bool LoadMetadataFiles(
const metadata_files_t& files,
SampleMuxerMetadata* metadata) {
typedef metadata_files_t::const_iterator iter_t;
iter_t i = files.begin();
const iter_t j = files.end();
while (i != j) {
const metadata_files_t::value_type& v = *i++;
if (!metadata->Load(v.name, v.kind))
return false;
}
return true;
}
int ParseArgWebVTT(
char* argv[],
int* argv_index,
int argc_check,
metadata_files_t* metadata_files) {
int& i = *argv_index;
enum { kCount = 4 };
struct Arg { const char* name; SampleMuxerMetadata::Kind kind; };
const Arg args[kCount] = {
{ "-webvtt-subtitles", SampleMuxerMetadata::kSubtitles },
{ "-webvtt-captions", SampleMuxerMetadata::kCaptions },
{ "-webvtt-descriptions", SampleMuxerMetadata::kDescriptions },
{ "-webvtt-metadata", SampleMuxerMetadata::kMetadata }
};
for (int idx = 0; idx < kCount; ++idx) {
const Arg& arg = args[idx];
if (strcmp(arg.name, argv[i]) != 0) // no match
continue;
++i; // consume arg name here
if (i > argc_check) {
printf("missing value for %s\n", arg.name);
return -1; // error
}
MetadataFile f;
f.name = argv[i]; // arg value is consumed via caller's loop idx
f.kind = arg.kind;
metadata_files->push_back(f);
return 1; // successfully parsed WebVTT arg
}
return 0; // not a WebVTT arg
}
} // end namespace
int main(int argc, char* argv[]) {
using mkvmuxer::uint64;
char* input = NULL;
char* output = NULL;
@@ -78,6 +158,8 @@ int main(int argc, char* argv[]) {
uint64 display_height = 0;
uint64 stereo_mode = 0;
metadata_files_t metadata_files;
const int argc_check = argc - 1;
for (int i = 1; i < argc; ++i) {
char* end;
@@ -130,6 +212,9 @@ int main(int argc, char* argv[]) {
i < argc_check) {
output_cues_block_number =
strtol(argv[++i], &end, 10) == 0 ? false : true;
} else if (int e = ParseArgWebVTT(argv, &i, argc_check, &metadata_files)) {
if (e < 0)
return EXIT_FAILURE;
}
}
@@ -204,12 +289,13 @@ int main(int argc, char* argv[]) {
info->set_writing_app("sample_muxer");
// Set Tracks element attributes
enum { kVideoTrack = 1, kAudioTrack = 2 };
const mkvparser::Tracks* const parser_tracks = parser_segment->GetTracks();
unsigned long i = 0;
uint64 vid_track = 0; // no track added
uint64 aud_track = 0; // no track added
using mkvparser::Track;
while (i != parser_tracks->GetTracksCount()) {
int track_num = i++;
if (switch_tracks)
@@ -226,7 +312,7 @@ int main(int argc, char* argv[]) {
const long long track_type = parser_track->GetType();
if (track_type == kVideoTrack && output_video) {
if (track_type == Track::kVideo && output_video) {
// Get the video track from the parser
const mkvparser::VideoTrack* const pVideoTrack =
static_cast<const mkvparser::VideoTrack*>(parser_track);
@@ -264,7 +350,7 @@ int main(int argc, char* argv[]) {
if (rate > 0.0) {
video->set_frame_rate(rate);
}
} else if (track_type == kAudioTrack && output_audio) {
} else if (track_type == Track::kAudio && output_audio) {
// Get the audio track from the parser
const mkvparser::AudioTrack* const pAudioTrack =
static_cast<const mkvparser::AudioTrack*>(parser_track);
@@ -307,6 +393,17 @@ int main(int argc, char* argv[]) {
}
}
// We have created all the video and audio tracks. If any WebVTT
// files were specified as command-line args, then parse them and
// add a track to the output file corresponding to each metadata
// input file.
SampleMuxerMetadata metadata;
metadata.Init(&muxer_segment);
if (!LoadMetadataFiles(metadata_files, &metadata))
return EXIT_FAILURE;
// Set Cues element attributes
mkvmuxer::Cues* const cues = muxer_segment.GetCues();
cues->set_output_block_number(output_cues_block_number);
@@ -339,11 +436,16 @@ int main(int argc, char* argv[]) {
parser_tracks->GetTrackByNumber(
static_cast<unsigned long>(trackNum));
const long long track_type = parser_track->GetType();
const long long time_ns = block->GetTime(cluster);
if ((track_type == kAudioTrack && output_audio) ||
(track_type == kVideoTrack && output_video)) {
// Flush any metadata frames to the output file, before we write
// the current block.
if (!metadata.Write(time_ns))
return EXIT_FAILURE;
if ((track_type == Track::kAudio && output_audio) ||
(track_type == Track::kVideo && output_video)) {
const int frame_count = block->GetFrameCount();
const long long time_ns = block->GetTime(cluster);
const bool is_key = block->IsKey();
for (int i = 0; i < frame_count; ++i) {
@@ -361,7 +463,7 @@ int main(int argc, char* argv[]) {
return EXIT_FAILURE;
uint64 track_num = vid_track;
if (track_type == kAudioTrack)
if (track_type == Track::kAudio)
track_num = aud_track;
if (!muxer_segment.AddFrame(data,
@@ -387,6 +489,11 @@ int main(int argc, char* argv[]) {
cluster = parser_segment->GetNext(cluster);
}
// We have exhausted all video and audio frames in the input file.
// Flush any remaining metadata frames to the output file.
if (!metadata.Write(-1))
return EXIT_FAILURE;
muxer_segment.Finalize();
delete [] data;
@@ -397,6 +504,3 @@ int main(int argc, char* argv[]) {
return EXIT_SUCCESS;
}

236
sample_muxer_metadata.cc Normal file
View File

@@ -0,0 +1,236 @@
#include "sample_muxer_metadata.h"
#include <string>
#include "vttreader.h"
using std::string;
SampleMuxerMetadata::SampleMuxerMetadata() : segment_(NULL) {
}
void SampleMuxerMetadata::Init(mkvmuxer::Segment* s) {
segment_ = s;
}
bool SampleMuxerMetadata::Load(const char* file, Kind kind) {
mkvmuxer::uint64 track_num;
if (!AddTrack(kind, &track_num)) {
printf("Unable to add track for WebVTT file \"%s\"\n", file);
return false;
}
return Parse(file, kind, track_num);
}
bool SampleMuxerMetadata::Write(mkvmuxer::int64 time_ns) {
typedef cues_set_t::iterator iter_t;
iter_t i = cues_set_.begin();
const iter_t j = cues_set_.end();
while (i != j) {
const cues_set_t::value_type& v = *i;
if (time_ns >= 0 && v > time_ns)
return true; // nothing else to do just yet
if (!v.Write(segment_)) {
printf("\nCould not add metadata.\n");
return false; // error
}
cues_set_.erase(i++);
}
return true;
}
bool SampleMuxerMetadata::AddTrack(
Kind kind,
mkvmuxer::uint64* track_num) {
*track_num = 0;
// Track number value 0 means "let muxer choose track number"
mkvmuxer::Track* const track = segment_->AddTrack(0);
if (track == NULL) // error
return false;
// Return the track number value chosen by the muxer
*track_num = track->number();
int type;
const char* codec_id;
switch (kind) {
case kSubtitles:
type = 0x11;
codec_id = "D_WEBVTT/SUBTITLES";
break;
case kCaptions:
type = 0x11;
codec_id = "D_WEBVTT/CAPTIONS";
break;
case kDescriptions:
type = 0x21;
codec_id = "D_WEBVTT/DESCRIPTIONS";
break;
case kMetadata:
type = 0x21;
codec_id = "D_WEBVTT/METADATA";
break;
default:
return false;
}
track->set_type(type);
track->set_codec_id(codec_id);
// TODO(matthewjheaney): set name and language
return true;
}
bool SampleMuxerMetadata::Parse(
const char* file,
Kind /* kind */,
mkvmuxer::uint64 track_num) {
libwebvtt::VttReader r;
int e = r.Open(file);
if (e) {
printf("Unable to open WebVTT file: \"%s\"\n", file);
return false;
}
libwebvtt::Parser p(&r);
e = p.Init();
if (e < 0) { // error
printf("Error parsing WebVTT file: \"%s\"\n", file);
return false;
}
SortableCue cue;
cue.track_num = track_num;
libwebvtt::Time t;
t.hours = -1;
for (;;) {
cue_t& c = cue.cue;
e = p.Parse(&c);
if (e < 0) { // error
printf("Error parsing WebVTT file: \"%s\"\n", file);
return false;
}
if (e > 0) // EOF
return true;
if (c.start_time >= t) {
t = c.start_time;
} else {
printf("bad WebVTT cue timestamp (out-of-order)\n");
return false;
}
if (c.stop_time < c.start_time) {
printf("bad WebVTT cue timestamp (stop < start)\n");
return false;
}
cues_set_.insert(cue);
}
}
void SampleMuxerMetadata::MakeFrame(const cue_t& c, string* pf) {
pf->clear();
WriteCueIdentifier(c.identifier, pf);
WriteCueSettings(c.settings, pf);
WriteCuePayload(c.payload, pf);
}
void SampleMuxerMetadata::WriteCueIdentifier(
const string& identifier,
string* pf) {
pf->append(identifier);
pf->push_back('\x0A'); // LF
}
void SampleMuxerMetadata::WriteCueSettings(
const cue_t::settings_t& settings,
string* pf) {
if (settings.empty()) {
pf->push_back('\x0A'); // LF
return;
}
typedef cue_t::settings_t::const_iterator iter_t;
iter_t i = settings.begin();
const iter_t j = settings.end();
for (;;) {
const libwebvtt::Setting& setting = *i++;
pf->append(setting.name);
pf->push_back(':');
pf->append(setting.value);
if (i == j)
break;
pf->push_back(' '); // separate settings with whitespace
}
pf->push_back('\x0A'); // LF
}
void SampleMuxerMetadata::WriteCuePayload(
const cue_t::payload_t& payload,
string* pf) {
typedef cue_t::payload_t::const_iterator iter_t;
iter_t i = payload.begin();
const iter_t j = payload.end();
while (i != j) {
const string& line = *i++;
pf->append(line);
pf->push_back('\x0A'); // LF
}
}
bool SampleMuxerMetadata::SortableCue::Write(
mkvmuxer::Segment* segment) const {
// Cue start time expressed in milliseconds
const mkvmuxer::int64 start_ms = cue.start_time.presentation();
// Cue start time expressed in nanoseconds (MKV time)
const mkvmuxer::int64 start_ns = start_ms * 1000000;
// Cue stop time expressed in milliseconds
const mkvmuxer::int64 stop_ms = cue.stop_time.presentation();
// Cue stop time expressed in nanonseconds
const mkvmuxer::int64 stop_ns = stop_ms * 1000000;
// Metadata blocks always specify the block duration.
const mkvmuxer::int64 duration_ns = stop_ns - start_ns;
string frame;
MakeFrame(cue, &frame);
typedef const mkvmuxer::uint8* data_t;
const data_t buf = reinterpret_cast<data_t>(frame.data());
const mkvmuxer::uint64 len = frame.length();
return segment->AddMetadata(buf, len, track_num, start_ns, duration_ns);
}

112
sample_muxer_metadata.h Normal file
View File

@@ -0,0 +1,112 @@
// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the LICENSE file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
#ifndef SAMPLE_MUXER_METADATA_H_ // NOLINT
#define SAMPLE_MUXER_METADATA_H_
#include <list>
#include <set>
#include "mkvmuxer.hpp"
#include "webvttparser.h"
class SampleMuxerMetadata {
public:
enum Kind {
kSubtitles,
kCaptions,
kDescriptions,
kMetadata
};
SampleMuxerMetadata();
// Bind this metadata object to the muxer instance.
void Init(mkvmuxer::Segment* segment);
// Parse the WebVTT file |filename| having the indicated |kind|, and
// create a corresponding track in the segment. Returns false on
// error.
bool Load(const char* filename, Kind kind);
// Write any WebVTT cues whose time is less or equal to |time_ns| as
// a metadata block in its corresponding track. If |time_ns| is
// negative, write all remaining cues. Returns false on error.
bool Write(mkvmuxer::int64 time_ns);
private:
typedef libwebvtt::Cue cue_t;
// Used to sort cues as they are loaded.
struct SortableCue {
bool operator>(mkvmuxer::int64 time_ns) const {
// Cue start time expressed in milliseconds
const mkvmuxer::int64 start_ms = cue.start_time.presentation();
// Cue start time expressed in nanoseconds (MKV time)
const mkvmuxer::int64 start_ns = start_ms * 1000000;
return (start_ns > time_ns);
}
bool operator<(const SortableCue& rhs) const {
if (cue.start_time < rhs.cue.start_time)
return true;
if (cue.start_time > rhs.cue.start_time)
return false;
return (track_num < rhs.track_num);
}
// Write this cue as a metablock to |segment|. Returns false on
// error.
bool Write(mkvmuxer::Segment* segment) const;
mkvmuxer::uint64 track_num;
cue_t cue;
};
typedef std::multiset<SortableCue> cues_set_t;
// Add a metadata track to the segment having the indicated |kind|,
// returning the |track_num| that has been chosen for this track.
// Returns false on error.
bool AddTrack(Kind kind, mkvmuxer::uint64* track_num);
// Parse the WebVTT |file| having the indicated |kind| and
// |track_num|, adding each parsed cue to cues set. Returns false
// on error.
bool Parse(const char* file, Kind kind, mkvmuxer::uint64 track_num);
// Converts a WebVTT cue to a Matroska metadata block.
static void MakeFrame(const cue_t& cue, std::string* frame);
// Populate the cue identifier part of the metadata block.
static void WriteCueIdentifier(const std::string& identifier,
std::string* frame);
// Populate the cue settings part of the metadata block.
static void WriteCueSettings(const cue_t::settings_t& settings,
std::string* frame);
// Populate the payload part of the metadata block.
static void WriteCuePayload(const cue_t::payload_t& payload,
std::string* frame);
mkvmuxer::Segment* segment_;
// Set of cues ordered by time and then by track number.
cues_set_t cues_set_;
// Disable copy ctor and copy assign.
SampleMuxerMetadata(const SampleMuxerMetadata&);
SampleMuxerMetadata& operator=(const SampleMuxerMetadata&);
};
#endif // SAMPLE_MUXER_METADATA_H_ // NOLINT