vttdemux: add support for WebVTT chapters

Change-Id: If5e12ff7057ce4217907ef91d493e1bcd8a72656
This commit is contained in:
Matthew Heaney 2012-10-26 15:06:28 -07:00
parent 386928d8b8
commit c26db03d5a
3 changed files with 335 additions and 8 deletions

View File

@ -4526,6 +4526,18 @@ long long Chapters::Atom::GetStopTimecode() const
} }
long long Chapters::Atom::GetStartTime(const Chapters* pChapters) const
{
return GetTime(pChapters, m_start_timecode);
}
long long Chapters::Atom::GetStopTime(const Chapters* pChapters) const
{
return GetTime(pChapters, m_stop_timecode);
}
int Chapters::Atom::GetDisplayCount() const int Chapters::Atom::GetDisplayCount() const
{ {
return m_displays_count; return m_displays_count;
@ -4651,6 +4663,37 @@ long Chapters::Atom::Parse(
} }
long long Chapters::Atom::GetTime(
const Chapters* pChapters,
long long timecode)
{
if (pChapters == NULL)
return -1;
Segment* const pSegment = pChapters->m_pSegment;
if (pSegment == NULL) // weird
return -1;
const SegmentInfo* const pInfo = pSegment->GetInfo();
if (pInfo == NULL)
return -1;
const long long timecode_scale = pInfo->GetTimeCodeScale();
if (timecode_scale < 1) // weird
return -1;
if (timecode < 0)
return -1;
const long long result = timecode_scale * timecode;
return result;
}
long Chapters::Atom::ParseDisplay( long Chapters::Atom::ParseDisplay(
IMkvReader* pReader, IMkvReader* pReader,
long long pos, long long pos,

View File

@ -569,6 +569,8 @@ public:
public: public:
long long GetStartTimecode() const; long long GetStartTimecode() const;
long long GetStopTimecode() const; long long GetStopTimecode() const;
long long GetStartTime(const Chapters*) const;
long long GetStopTime(const Chapters*) const;
int GetDisplayCount() const; int GetDisplayCount() const;
const Display* GetDisplay(int index) const; const Display* GetDisplay(int index) const;
private: private:
@ -576,6 +578,7 @@ public:
void ShallowCopy(Atom&) const; void ShallowCopy(Atom&) const;
void Clear(); void Clear();
long Parse(IMkvReader*, long long pos, long long size); long Parse(IMkvReader*, long long pos, long long size);
static long long GetTime(const Chapters*, long long timecode);
long ParseDisplay(IMkvReader*, long long pos, long long size); long ParseDisplay(IMkvReader*, long long pos, long long size);
bool ExpandDisplaysArray(); bool ExpandDisplaysArray();

View File

@ -26,7 +26,12 @@ typedef std::auto_ptr<mkvparser::Segment> segment_ptr_t;
// WebVTT metadata tracks have a type (encoded in the CodecID for the track). // WebVTT metadata tracks have a type (encoded in the CodecID for the track).
// We use |type| to synthesize a filename for the out-of-band WebVTT |file|. // We use |type| to synthesize a filename for the out-of-band WebVTT |file|.
struct MetadataInfo { struct MetadataInfo {
enum Type { kSubtitles, kCaptions, kDescriptions, kMetadata } type; enum Type {
kSubtitles,
kCaptions,
kDescriptions,
kMetadata,
kChapters } type;
FILE* file; FILE* file;
}; };
@ -34,6 +39,10 @@ struct MetadataInfo {
// each track in the input file. // each track in the input file.
typedef std::map<long, MetadataInfo> metadata_map_t; // NOLINT typedef std::map<long, MetadataInfo> metadata_map_t; // NOLINT
// The distinguished key value we use to store the chapters
// information in the metadata map.
enum { kChaptersKey = 0 };
// The data from the original WebVTT Cue is stored as a WebM block. // The data from the original WebVTT Cue is stored as a WebM block.
// The FrameParser is used to parse the lines of text out from the // The FrameParser is used to parse the lines of text out from the
// block, in order to reconstruct the original WebVTT Cue. // block, in order to reconstruct the original WebVTT Cue.
@ -72,6 +81,44 @@ class FrameParser : public libwebvtt::LineReader {
FrameParser& operator=(const FrameParser&); FrameParser& operator=(const FrameParser&);
}; };
// The data from the original WebVTT Cue is stored as an MKV Chapters
// Atom element (the cue payload is stored as a Display sub-element).
// The ChapterAtomParser is used to parse the lines of text out from
// the String sub-element of the Display element (though it would be
// admittedly odd if there were more than one line).
class ChapterAtomParser : public libwebvtt::LineReader {
public:
explicit ChapterAtomParser(const mkvparser::Chapters::Display* display);
virtual ~ChapterAtomParser();
const mkvparser::Chapters::Display* const display_;
protected:
// Read the next character from the character stream (the title
// member of the atom's display). We increment the stream pointer
// |str_| as each character from the stream is consumed.
virtual int GetChar(char* c);
// End-of-line handling requires that we put a character back into
// the stream. Here we need only decrement the stream pointer |str_|
// to unconsume the character.
virtual void UngetChar(char c);
// The current position in the character stream (the title of the
// atom's display).
const char* str_;
// The position of the end of the character stream. When the current
// position |str_| equals the end position |str_end_|, the entire
// stream (title of the display) has been consumed and end-of-stream
// is indicated.
const char* str_end_;
private:
ChapterAtomParser(const ChapterAtomParser&);
ChapterAtomParser& operator=(const ChapterAtomParser&);
};
// Parse the EBML header of the WebM input file, to determine whether we // Parse the EBML header of the WebM input file, to determine whether we
// actually have a WebM file. Returns false if this is not a WebM file. // actually have a WebM file. Returns false if this is not a WebM file.
bool ParseHeader(mkvparser::IMkvReader* reader, mkvpos_t* pos); bool ParseHeader(mkvparser::IMkvReader* reader, mkvpos_t* pos);
@ -83,8 +130,37 @@ bool ParseSegment(
mkvpos_t pos, mkvpos_t pos,
segment_ptr_t* segment); segment_ptr_t* segment);
// Iterate over the tracks of the input file and cache information about // If |segment| has a Chapters element (in which case, there will be a
// each metadata track. // corresponding entry in |metadata_map|), convert the MKV chapters to
// WebVTT chapter cues and write them to the output file. Returns
// false on error.
bool WriteChaptersFile(
const metadata_map_t& metadata_map,
const mkvparser::Segment* segment);
// Convert an MKV Chapters Atom to a WebVTT cue and write it to the
// output |file|. Returns false on error.
bool WriteChaptersCue(
FILE* file,
const mkvparser::Chapters* chapters,
const mkvparser::Chapters::Atom* atom,
const mkvparser::Chapters::Display* display);
// Use the timecodes from the chapters |atom| to write just the
// timings line of the WebVTT cue. Returns false on error.
bool WriteChaptersCueTimings(
FILE* file,
const mkvparser::Chapters* chapters,
const mkvparser::Chapters::Atom* atom);
// Parse the String sub-element of the |display| and write the payload
// of the WebVTT cue. Returns false on error.
bool WriteChaptersCuePayload(
FILE* file,
const mkvparser::Chapters::Display* display);
// Iterate over the tracks of the input file (and any chapters
// element) and cache information about each metadata track.
void BuildMap(const mkvparser::Segment* segment, metadata_map_t* metadata_map); void BuildMap(const mkvparser::Segment* segment, metadata_map_t* metadata_map);
// For each track listed in the cache, synthesize its output filename // For each track listed in the cache, synthesize its output filename
@ -184,8 +260,8 @@ int main(int argc, const char* argv[]) {
BuildMap(segment_ptr.get(), &metadata_map); BuildMap(segment_ptr.get(), &metadata_map);
if (metadata_map.empty()) { if (metadata_map.empty()) {
printf("no metadata tracks found\n"); printf("no WebVTT metadata found\n");
return EXIT_FAILURE; // TODO(matthewjheaney): correct result? return EXIT_FAILURE;
} }
if (!OpenFiles(&metadata_map, filename)) { if (!OpenFiles(&metadata_map, filename)) {
@ -245,6 +321,32 @@ void FrameParser::UngetChar(char /* c */ ) {
--pos_; --pos_;
} }
ChapterAtomParser::ChapterAtomParser(
const mkvparser::Chapters::Display* display)
: display_(display) {
str_ = display->GetString();
const size_t len = strlen(str_);
str_end_ = str_ + len;
}
ChapterAtomParser::~ChapterAtomParser() {
}
int ChapterAtomParser::GetChar(char* c) {
if (str_ >= str_end_) // end-of-stream
return 1; // per the semantics of libwebvtt::Reader::GetChar
*c = *str_++; // consume this character in the stream
return 0;
}
void ChapterAtomParser::UngetChar(char /* c */ ) {
// All we need to do here is decrement the position in the stream.
// The next time GetChar is called the same character will be
// re-read from the input file.
--str_;
}
} // namespace vttdemux } // namespace vttdemux
bool vttdemux::ParseHeader( bool vttdemux::ParseHeader(
@ -297,6 +399,17 @@ bool vttdemux::ParseSegment(
void vttdemux::BuildMap( void vttdemux::BuildMap(
const mkvparser::Segment* segment, const mkvparser::Segment* segment,
metadata_map_t* map_ptr) { metadata_map_t* map_ptr) {
metadata_map_t& m = *map_ptr;
m.clear();
if (segment->GetChapters()) {
MetadataInfo info;
info.file = NULL;
info.type = MetadataInfo::kChapters;
m[kChaptersKey] = info;
}
const mkvparser::Tracks* const tt = segment->GetTracks(); const mkvparser::Tracks* const tt = segment->GetTracks();
if (tt == NULL) if (tt == NULL)
return; return;
@ -305,8 +418,6 @@ void vttdemux::BuildMap(
if (tc <= 0) if (tc <= 0)
return; return;
metadata_map_t& m = *map_ptr;
// Iterate over the tracks in the intput file. We determine whether // Iterate over the tracks in the intput file. We determine whether
// a track holds metadata by inspecting its CodecID. // a track holds metadata by inspecting its CodecID.
@ -316,6 +427,11 @@ void vttdemux::BuildMap(
if (t == NULL) // weird if (t == NULL) // weird
continue; continue;
const long tn = t->GetNumber(); // NOLINT
if (tn <= 0) // weird
continue;
const char* const codec_id = t->GetCodecId(); const char* const codec_id = t->GetCodecId();
if (codec_id == NULL) // weird if (codec_id == NULL) // weird
@ -336,7 +452,6 @@ void vttdemux::BuildMap(
continue; continue;
} }
const long tn = t->GetNumber(); // NOLINT
m[tn] = info; // create an entry in the cache for this track m[tn] = info; // create an entry in the cache for this track
} }
} }
@ -415,6 +530,10 @@ bool vttdemux::OpenFiles(metadata_map_t* metadata_map, const char* filename) {
name += "_METADATA"; name += "_METADATA";
break; break;
case MetadataInfo::kChapters:
name += "_CHAPTERS";
break;
default: default:
return false; return false;
} }
@ -476,6 +595,9 @@ bool vttdemux::WriteFiles(const metadata_map_t& m, mkvparser::Segment* s) {
InitializeFiles(m); InitializeFiles(m);
if (!WriteChaptersFile(m, s))
return false;
// Now iterate over the clusters, writing the WebVTT cue as we parse // Now iterate over the clusters, writing the WebVTT cue as we parse
// each metadata block. // each metadata block.
@ -512,6 +634,165 @@ bool vttdemux::InitializeFiles(const metadata_map_t& m) {
return true; return true;
} }
bool vttdemux::WriteChaptersFile(
const metadata_map_t& m,
const mkvparser::Segment* s) {
const metadata_map_t::const_iterator info_iter = m.find(kChaptersKey);
if (info_iter == m.end()) // no chapters, so nothing to do
return true;
const mkvparser::Chapters* const chapters = s->GetChapters();
if (chapters == NULL) // weird
return true;
const MetadataInfo& info = info_iter->second;
FILE* const file = info.file;
const int edition_count = chapters->GetEditionCount();
if (edition_count <= 0) // weird
return true; // nothing to do
if (edition_count > 1) {
// TODO(matthewjheaney): figure what to do here
printf("more than one chapter edition detected\n");
return false;
}
const mkvparser::Chapters::Edition* const edition = chapters->GetEdition(0);
const int atom_count = edition->GetAtomCount();
for (int idx = 0; idx < atom_count; ++idx) {
const mkvparser::Chapters::Atom* const atom = edition->GetAtom(idx);
const int display_count = atom->GetDisplayCount();
if (display_count <= 0)
continue;
if (display_count > 1) {
// TODO(matthewjheaney): handle case of multiple languages
printf("more than 1 display in atom detected\n");
return false;
}
const mkvparser::Chapters::Display* const display = atom->GetDisplay(0);
if (const char* language = display->GetLanguage()) {
if (strcmp(language, "eng") != 0) {
// TODO(matthewjheaney): handle case of multiple languages.
// We must create a separate webvtt file for each language.
// This isn't a simple problem (which is why we defer it for
// now), because there's nothing in the header that tells us
// what languages we have as cues. We must parse the displays
// of each atom to determine that.
// One solution is to make two passes over the input data.
// First parse the displays, creating an in-memory cache of
// all the chapter cues, sorted according to their language.
// After we have read all of the chapter atoms from the input
// file, we can then write separate output files for each
// language.
printf("only English-language chapter cues are supported\n");
return false;
}
}
if (!WriteChaptersCue(file, chapters, atom, display))
return false;
}
return true;
}
bool vttdemux::WriteChaptersCue(
FILE* f,
const mkvparser::Chapters* chapters,
const mkvparser::Chapters::Atom* atom,
const mkvparser::Chapters::Display* display) {
// We start a new cue by writing a cue separator (an empty line)
// into the stream.
if (fputc('\n', f) < 0)
return false;
// A WebVTT Cue comprises 3 things: a cue identifier, followed by
// the cue timings, followed by the payload of the cue. We write
// each part of the cue in sequence.
// TODO(matthewjheaney): write cue identifier
// if (!WriteChaptersCueIdentifier(f, atom))
// return false;
if (!WriteChaptersCueTimings(f, chapters, atom))
return false;
if (!WriteChaptersCuePayload(f, display))
return false;
return true;
}
bool vttdemux::WriteChaptersCueTimings(
FILE* f,
const mkvparser::Chapters* chapters,
const mkvparser::Chapters::Atom* atom) {
const mkvtime_t start_ns = atom->GetStartTime(chapters);
if (start_ns < 0)
return false;
const mkvtime_t stop_ns = atom->GetStopTime(chapters);
if (stop_ns < 0)
return false;
if (!WriteCueTime(f, start_ns))
return false;
if (fputs(" --> ", f) < 0)
return false;
if (!WriteCueTime(f, stop_ns))
return false;
if (fputc('\n', f) < 0)
return false;
return true;
}
bool vttdemux::WriteChaptersCuePayload(
FILE* f,
const mkvparser::Chapters::Display* display) {
// Bind a Chapter parser object to the display, which allows us to
// extract each line of text from the title-part of the display.
ChapterAtomParser parser(display);
int count = 0; // count of lines of payload text written to output file
for (string line;;) {
const int e = parser.GetLine(&line);
if (e < 0) // error (only -- we allow EOS here)
return false;
if (line.empty()) // TODO(matthewjheaney): retain this check?
break;
if (fprintf(f, "%s\n", line.c_str()) < 0)
return false;
++count;
}
if (count <= 0) // WebVTT cue requires non-empty payload
return false;
return true;
}
bool vttdemux::ProcessCluster( bool vttdemux::ProcessCluster(
const metadata_map_t& m, const metadata_map_t& m,
const mkvparser::Cluster* c) { const mkvparser::Cluster* c) {