| /* |
| * Copyright (C) 2011 Google Inc. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are |
| * met: |
| * |
| * * Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * * Redistributions in binary form must reproduce the above |
| * copyright notice, this list of conditions and the following disclaimer |
| * in the documentation and/or other materials provided with the |
| * distribution. |
| * * Neither the name of Google Inc. nor the names of its |
| * contributors may be used to endorse or promote products derived from |
| * this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "core/html/track/vtt/VTTTokenizer.h" |
| |
| #include "core/xml/parser/MarkupTokenizerInlines.h" |
| #include "wtf/text/CharacterNames.h" |
| #include "wtf/text/StringBuilder.h" |
| |
| namespace blink { |
| |
| #define WEBVTT_BEGIN_STATE(stateName) \ |
| case stateName: \ |
| stateName: |
| #define WEBVTT_ADVANCE_TO(stateName) \ |
| do { \ |
| state = stateName; \ |
| DCHECK(!m_input.isEmpty()); \ |
| m_inputStreamPreprocessor.advance(m_input); \ |
| cc = m_inputStreamPreprocessor.nextInputCharacter(); \ |
| goto stateName; \ |
| } while (false) |
| |
| template <unsigned charactersCount> |
| ALWAYS_INLINE bool equalLiteral(const StringBuilder& s, |
| const char (&characters)[charactersCount]) { |
| return WTF::equal(s, reinterpret_cast<const LChar*>(characters), |
| charactersCount - 1); |
| } |
| |
| static void addNewClass(StringBuilder& classes, const StringBuilder& newClass) { |
| if (!classes.isEmpty()) |
| classes.append(' '); |
| classes.append(newClass); |
| } |
| |
| inline bool emitToken(VTTToken& resultToken, const VTTToken& token) { |
| resultToken = token; |
| return true; |
| } |
| |
| inline bool advanceAndEmitToken(SegmentedString& source, |
| VTTToken& resultToken, |
| const VTTToken& token) { |
| source.advanceAndUpdateLineNumber(); |
| return emitToken(resultToken, token); |
| } |
| |
| VTTTokenizer::VTTTokenizer(const String& input) |
| : m_input(input), m_inputStreamPreprocessor(this) { |
| // Append a EOF marker and close the input "stream". |
| DCHECK(!m_input.isClosed()); |
| m_input.append(SegmentedString(String(&kEndOfFileMarker, 1))); |
| m_input.close(); |
| } |
| |
| bool VTTTokenizer::nextToken(VTTToken& token) { |
| if (m_input.isEmpty() || !m_inputStreamPreprocessor.peek(m_input)) |
| return false; |
| |
| UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); |
| if (cc == kEndOfFileMarker) { |
| m_inputStreamPreprocessor.advance(m_input); |
| return false; |
| } |
| |
| StringBuilder buffer; |
| StringBuilder result; |
| StringBuilder classes; |
| enum { |
| DataState, |
| EscapeState, |
| TagState, |
| StartTagState, |
| StartTagClassState, |
| StartTagAnnotationState, |
| EndTagState, |
| TimestampTagState, |
| } state = DataState; |
| |
| // 4.8.10.13.4 WebVTT cue text tokenizer |
| switch (state) { |
| WEBVTT_BEGIN_STATE(DataState) { |
| if (cc == '&') { |
| buffer.append(static_cast<LChar>(cc)); |
| WEBVTT_ADVANCE_TO(EscapeState); |
| } else if (cc == '<') { |
| if (result.isEmpty()) { |
| WEBVTT_ADVANCE_TO(TagState); |
| } else { |
| // We don't want to advance input or perform a state transition - just |
| // return a (new) token. (On the next call to nextToken we will see |
| // '<' again, but take the other branch in this if instead.) |
| return emitToken(token, VTTToken::StringToken(result.toString())); |
| } |
| } else if (cc == kEndOfFileMarker) { |
| return advanceAndEmitToken(m_input, token, |
| VTTToken::StringToken(result.toString())); |
| } else { |
| result.append(cc); |
| WEBVTT_ADVANCE_TO(DataState); |
| } |
| } |
| END_STATE() |
| |
| WEBVTT_BEGIN_STATE(EscapeState) { |
| if (cc == ';') { |
| if (equalLiteral(buffer, "&")) { |
| result.append('&'); |
| } else if (equalLiteral(buffer, "<")) { |
| result.append('<'); |
| } else if (equalLiteral(buffer, ">")) { |
| result.append('>'); |
| } else if (equalLiteral(buffer, "&lrm")) { |
| result.append(leftToRightMarkCharacter); |
| } else if (equalLiteral(buffer, "&rlm")) { |
| result.append(rightToLeftMarkCharacter); |
| } else if (equalLiteral(buffer, " ")) { |
| result.append(noBreakSpaceCharacter); |
| } else { |
| buffer.append(static_cast<LChar>(cc)); |
| result.append(buffer); |
| } |
| buffer.clear(); |
| WEBVTT_ADVANCE_TO(DataState); |
| } else if (isASCIIAlphanumeric(cc)) { |
| buffer.append(static_cast<LChar>(cc)); |
| WEBVTT_ADVANCE_TO(EscapeState); |
| } else if (cc == '<') { |
| result.append(buffer); |
| return emitToken(token, VTTToken::StringToken(result.toString())); |
| } else if (cc == kEndOfFileMarker) { |
| result.append(buffer); |
| return advanceAndEmitToken(m_input, token, |
| VTTToken::StringToken(result.toString())); |
| } else { |
| result.append(buffer); |
| buffer.clear(); |
| |
| if (cc == '&') { |
| buffer.append(static_cast<LChar>(cc)); |
| WEBVTT_ADVANCE_TO(EscapeState); |
| } |
| result.append(cc); |
| WEBVTT_ADVANCE_TO(DataState); |
| } |
| } |
| END_STATE() |
| |
| WEBVTT_BEGIN_STATE(TagState) { |
| if (isTokenizerWhitespace(cc)) { |
| DCHECK(result.isEmpty()); |
| WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
| } else if (cc == '.') { |
| DCHECK(result.isEmpty()); |
| WEBVTT_ADVANCE_TO(StartTagClassState); |
| } else if (cc == '/') { |
| WEBVTT_ADVANCE_TO(EndTagState); |
| } else if (WTF::isASCIIDigit(cc)) { |
| result.append(cc); |
| WEBVTT_ADVANCE_TO(TimestampTagState); |
| } else if (cc == '>' || cc == kEndOfFileMarker) { |
| DCHECK(result.isEmpty()); |
| return advanceAndEmitToken(m_input, token, |
| VTTToken::StartTag(result.toString())); |
| } else { |
| result.append(cc); |
| WEBVTT_ADVANCE_TO(StartTagState); |
| } |
| } |
| END_STATE() |
| |
| WEBVTT_BEGIN_STATE(StartTagState) { |
| if (isTokenizerWhitespace(cc)) { |
| WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
| } else if (cc == '.') { |
| WEBVTT_ADVANCE_TO(StartTagClassState); |
| } else if (cc == '>' || cc == kEndOfFileMarker) { |
| return advanceAndEmitToken(m_input, token, |
| VTTToken::StartTag(result.toString())); |
| } else { |
| result.append(cc); |
| WEBVTT_ADVANCE_TO(StartTagState); |
| } |
| } |
| END_STATE() |
| |
| WEBVTT_BEGIN_STATE(StartTagClassState) { |
| if (isTokenizerWhitespace(cc)) { |
| addNewClass(classes, buffer); |
| buffer.clear(); |
| WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
| } else if (cc == '.') { |
| addNewClass(classes, buffer); |
| buffer.clear(); |
| WEBVTT_ADVANCE_TO(StartTagClassState); |
| } else if (cc == '>' || cc == kEndOfFileMarker) { |
| addNewClass(classes, buffer); |
| buffer.clear(); |
| return advanceAndEmitToken( |
| m_input, token, |
| VTTToken::StartTag(result.toString(), classes.toAtomicString())); |
| } else { |
| buffer.append(cc); |
| WEBVTT_ADVANCE_TO(StartTagClassState); |
| } |
| } |
| END_STATE() |
| |
| WEBVTT_BEGIN_STATE(StartTagAnnotationState) { |
| if (cc == '>' || cc == kEndOfFileMarker) { |
| return advanceAndEmitToken( |
| m_input, token, |
| VTTToken::StartTag(result.toString(), classes.toAtomicString(), |
| buffer.toAtomicString())); |
| } |
| buffer.append(cc); |
| WEBVTT_ADVANCE_TO(StartTagAnnotationState); |
| } |
| END_STATE() |
| |
| WEBVTT_BEGIN_STATE(EndTagState) { |
| if (cc == '>' || cc == kEndOfFileMarker) |
| return advanceAndEmitToken(m_input, token, |
| VTTToken::EndTag(result.toString())); |
| result.append(cc); |
| WEBVTT_ADVANCE_TO(EndTagState); |
| } |
| END_STATE() |
| |
| WEBVTT_BEGIN_STATE(TimestampTagState) { |
| if (cc == '>' || cc == kEndOfFileMarker) |
| return advanceAndEmitToken(m_input, token, |
| VTTToken::TimestampTag(result.toString())); |
| result.append(cc); |
| WEBVTT_ADVANCE_TO(TimestampTagState); |
| } |
| END_STATE() |
| } |
| |
| NOTREACHED(); |
| return false; |
| } |
| |
| } // namespace blink |