src/utils/vttparser.js
/*
* Source: https://github.com/mozilla/vtt.js/blob/master/dist/vtt.js#L1716
*/
import VTTCue from './vttcue';
const StringDecoder = function StringDecoder () {
return {
decode: function (data) {
if (!data)
return '';
if (typeof data !== 'string')
throw new Error('Error - expected string data.');
return decodeURIComponent(encodeURIComponent(data));
}
};
};
function VTTParser () {
this.window = window;
this.state = 'INITIAL';
this.buffer = '';
this.decoder = new StringDecoder();
this.regionList = [];
}
// Try to parse input as a time stamp.
function parseTimeStamp (input) {
function computeSeconds (h, m, s, f) {
return (h | 0) * 3600 + (m | 0) * 60 + (s | 0) + (f | 0) / 1000;
}
let m = input.match(/^(\d+):(\d{2})(:\d{2})?\.(\d{3})/);
if (!m)
return null;
if (m[3]) {
// Timestamp takes the form of [hours]:[minutes]:[seconds].[milliseconds]
return computeSeconds(m[1], m[2], m[3].replace(':', ''), m[4]);
} else if (m[1] > 59) {
// Timestamp takes the form of [hours]:[minutes].[milliseconds]
// First position is hours as it's over 59.
return computeSeconds(m[1], m[2], 0, m[4]);
} else {
// Timestamp takes the form of [minutes]:[seconds].[milliseconds]
return computeSeconds(0, m[1], m[2], m[4]);
}
}
// A settings object holds key/value pairs and will ignore anything but the first
// assignment to a specific key.
function Settings () {
this.values = Object.create(null);
}
Settings.prototype = {
// Only accept the first assignment to any key.
set: function (k, v) {
if (!this.get(k) && v !== '')
this.values[k] = v;
},
// Return the value for a key, or a default value.
// If 'defaultKey' is passed then 'dflt' is assumed to be an object with
// a number of possible default values as properties where 'defaultKey' is
// the key of the property that will be chosen; otherwise it's assumed to be
// a single value.
get: function (k, dflt, defaultKey) {
if (defaultKey)
return this.has(k) ? this.values[k] : dflt[defaultKey];
return this.has(k) ? this.values[k] : dflt;
},
// Check whether we have a value for a key.
has: function (k) {
return k in this.values;
},
// Accept a setting if its one of the given alternatives.
alt: function (k, v, a) {
for (let n = 0; n < a.length; ++n) {
if (v === a[n]) {
this.set(k, v);
break;
}
}
},
// Accept a setting if its a valid (signed) integer.
integer: function (k, v) {
if (/^-?\d+$/.test(v)) { // integer
this.set(k, parseInt(v, 10));
}
},
// Accept a setting if its a valid percentage.
percent: function (k, v) {
let m;
if ((m = v.match(/^([\d]{1,3})(\.[\d]*)?%$/))) {
v = parseFloat(v);
if (v >= 0 && v <= 100) {
this.set(k, v);
return true;
}
}
return false;
}
};
// Helper function to parse input into groups separated by 'groupDelim', and
// interprete each group as a key/value pair separated by 'keyValueDelim'.
function parseOptions (input, callback, keyValueDelim, groupDelim) {
let groups = groupDelim ? input.split(groupDelim) : [input];
for (let i in groups) {
if (typeof groups[i] !== 'string')
continue;
let kv = groups[i].split(keyValueDelim);
if (kv.length !== 2)
continue;
let k = kv[0];
let v = kv[1];
callback(k, v);
}
}
let defaults = new VTTCue(0, 0, 0);
// 'middle' was changed to 'center' in the spec: https://github.com/w3c/webvtt/pull/244
// Safari doesn't yet support this change, but FF and Chrome do.
let center = defaults.align === 'middle' ? 'middle' : 'center';
function parseCue (input, cue, regionList) {
// Remember the original input if we need to throw an error.
let oInput = input;
// 4.1 WebVTT timestamp
function consumeTimeStamp () {
let ts = parseTimeStamp(input);
if (ts === null)
throw new Error('Malformed timestamp: ' + oInput);
// Remove time stamp from input.
input = input.replace(/^[^\sa-zA-Z-]+/, '');
return ts;
}
// 4.4.2 WebVTT cue settings
function consumeCueSettings (input, cue) {
let settings = new Settings();
parseOptions(input, function (k, v) {
switch (k) {
case 'region':
// Find the last region we parsed with the same region id.
for (let i = regionList.length - 1; i >= 0; i--) {
if (regionList[i].id === v) {
settings.set(k, regionList[i].region);
break;
}
}
break;
case 'vertical':
settings.alt(k, v, ['rl', 'lr']);
break;
case 'line':
var vals = v.split(','),
vals0 = vals[0];
settings.integer(k, vals0);
if (settings.percent(k, vals0))
settings.set('snapToLines', false);
settings.alt(k, vals0, ['auto']);
if (vals.length === 2)
settings.alt('lineAlign', vals[1], ['start', center, 'end']);
break;
case 'position':
vals = v.split(',');
settings.percent(k, vals[0]);
if (vals.length === 2)
settings.alt('positionAlign', vals[1], ['start', center, 'end', 'line-left', 'line-right', 'auto']);
break;
case 'size':
settings.percent(k, v);
break;
case 'align':
settings.alt(k, v, ['start', center, 'end', 'left', 'right']);
break;
}
}, /:/, /\s/);
// Apply default values for any missing fields.
cue.region = settings.get('region', null);
cue.vertical = settings.get('vertical', '');
let line = settings.get('line', 'auto');
if (line === 'auto' && defaults.line === -1) {
// set numeric line number for Safari
line = -1;
}
cue.line = line;
cue.lineAlign = settings.get('lineAlign', 'start');
cue.snapToLines = settings.get('snapToLines', true);
cue.size = settings.get('size', 100);
cue.align = settings.get('align', center);
let position = settings.get('position', 'auto');
if (position === 'auto' && defaults.position === 50) {
// set numeric position for Safari
position = cue.align === 'start' || cue.align === 'left' ? 0 : cue.align === 'end' || cue.align === 'right' ? 100 : 50;
}
cue.position = position;
}
function skipWhitespace () {
input = input.replace(/^\s+/, '');
}
// 4.1 WebVTT cue timings.
skipWhitespace();
cue.startTime = consumeTimeStamp(); // (1) collect cue start time
skipWhitespace();
if (input.substr(0, 3) !== '-->') { // (3) next characters must match '-->'
throw new Error('Malformed time stamp (time stamps must be separated by \'-->\'): ' +
oInput);
}
input = input.substr(3);
skipWhitespace();
cue.endTime = consumeTimeStamp(); // (5) collect cue end time
// 4.1 WebVTT cue settings list.
skipWhitespace();
consumeCueSettings(input, cue);
}
function fixLineBreaks (input) {
return input.replace(/<br(?: \/)?>/gi, '\n');
}
VTTParser.prototype = {
parse: function (data) {
let self = this;
// If there is no data then we won't decode it, but will just try to parse
// whatever is in buffer already. This may occur in circumstances, for
// example when flush() is called.
if (data) {
// Try to decode the data that we received.
self.buffer += self.decoder.decode(data, { stream: true });
}
function collectNextLine () {
let buffer = self.buffer;
let pos = 0;
buffer = fixLineBreaks(buffer);
while (pos < buffer.length && buffer[pos] !== '\r' && buffer[pos] !== '\n')
++pos;
let line = buffer.substr(0, pos);
// Advance the buffer early in case we fail below.
if (buffer[pos] === '\r')
++pos;
if (buffer[pos] === '\n')
++pos;
self.buffer = buffer.substr(pos);
return line;
}
// 3.2 WebVTT metadata header syntax
function parseHeader (input) {
parseOptions(input, function (k, v) {
switch (k) {
case 'Region':
// 3.3 WebVTT region metadata header syntax
console.log('parse region', v);
// parseRegion(v);
break;
}
}, /:/);
}
// 5.1 WebVTT file parsing.
try {
let line;
if (self.state === 'INITIAL') {
// We can't start parsing until we have the first line.
if (!/\r\n|\n/.test(self.buffer))
return this;
line = collectNextLine();
// strip of UTF-8 BOM if any
// https://en.wikipedia.org/wiki/Byte_order_mark#UTF-8
let m = line.match(/^()?WEBVTT([ \t].*)?$/);
if (!m || !m[0])
throw new Error('Malformed WebVTT signature.');
self.state = 'HEADER';
}
let alreadyCollectedLine = false;
while (self.buffer) {
// We can't parse a line until we have the full line.
if (!/\r\n|\n/.test(self.buffer))
return this;
if (!alreadyCollectedLine)
line = collectNextLine();
else
alreadyCollectedLine = false;
switch (self.state) {
case 'HEADER':
// 13-18 - Allow a header (metadata) under the WEBVTT line.
if (/:/.test(line)) {
parseHeader(line);
} else if (!line) {
// An empty line terminates the header and starts the body (cues).
self.state = 'ID';
}
continue;
case 'NOTE':
// Ignore NOTE blocks.
if (!line)
self.state = 'ID';
continue;
case 'ID':
// Check for the start of NOTE blocks.
if (/^NOTE($|[ \t])/.test(line)) {
self.state = 'NOTE';
break;
}
// 19-29 - Allow any number of line terminators, then initialize new cue values.
if (!line)
continue;
self.cue = new VTTCue(0, 0, '');
self.state = 'CUE';
// 30-39 - Check if self line contains an optional identifier or timing data.
if (line.indexOf('-->') === -1) {
self.cue.id = line;
continue;
}
// Process line as start of a cue.
/* falls through */
case 'CUE':
// 40 - Collect cue timings and settings.
try {
parseCue(line, self.cue, self.regionList);
} catch (e) {
// In case of an error ignore rest of the cue.
self.cue = null;
self.state = 'BADCUE';
continue;
}
self.state = 'CUETEXT';
continue;
case 'CUETEXT':
var hasSubstring = line.indexOf('-->') !== -1;
// 34 - If we have an empty line then report the cue.
// 35 - If we have the special substring '-->' then report the cue,
// but do not collect the line as we need to process the current
// one as a new cue.
if (!line || hasSubstring && (alreadyCollectedLine = true)) {
// We are done parsing self cue.
if (self.oncue)
self.oncue(self.cue);
self.cue = null;
self.state = 'ID';
continue;
}
if (self.cue.text)
self.cue.text += '\n';
self.cue.text += line;
continue;
case 'BADCUE': // BADCUE
// 54-62 - Collect and discard the remaining cue.
if (!line)
self.state = 'ID';
continue;
}
}
} catch (e) {
// If we are currently parsing a cue, report what we have.
if (self.state === 'CUETEXT' && self.cue && self.oncue)
self.oncue(self.cue);
self.cue = null;
// Enter BADWEBVTT state if header was not parsed correctly otherwise
// another exception occurred so enter BADCUE state.
self.state = self.state === 'INITIAL' ? 'BADWEBVTT' : 'BADCUE';
}
return this;
},
flush: function () {
let self = this;
try {
// Finish decoding the stream.
self.buffer += self.decoder.decode();
// Synthesize the end of the current cue or region.
if (self.cue || self.state === 'HEADER') {
self.buffer += '\n\n';
self.parse();
}
// If we've flushed, parsed, and we're still on the INITIAL state then
// that means we don't have enough of the stream to parse the first
// line.
if (self.state === 'INITIAL')
throw new Error('Malformed WebVTT signature.');
} catch (e) {
throw e;
}
if (self.onflush)
self.onflush();
return this;
}
};
export { fixLineBreaks };
export default VTTParser;