| /* |
| Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. |
| |
| This library is free software; you can redistribute it and/or |
| modify it under the terms of the GNU Library General Public |
| License as published by the Free Software Foundation; either |
| version 2 of the License, or (at your option) any later version. |
| |
| This library is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| Library General Public License for more details. |
| |
| You should have received a copy of the GNU Library General Public License |
| along with this library; see the file COPYING.LIB. If not, write to |
| the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| Boston, MA 02110-1301, USA. |
| */ |
| |
| #ifndef SegmentedString_h |
| #define SegmentedString_h |
| |
| #include "platform/PlatformExport.h" |
| #include "wtf/Allocator.h" |
| #include "wtf/Deque.h" |
| #include "wtf/text/StringBuilder.h" |
| #include "wtf/text/TextPosition.h" |
| #include "wtf/text/WTFString.h" |
| |
| namespace blink { |
| |
| class SegmentedString; |
| |
| class PLATFORM_EXPORT SegmentedSubstring { |
| DISALLOW_NEW_EXCEPT_PLACEMENT_NEW(); |
| |
| public: |
| SegmentedSubstring() |
| : m_length(0), m_doNotExcludeLineNumbers(true), m_is8Bit(false) { |
| m_data.string16Ptr = 0; |
| } |
| |
| SegmentedSubstring(const String& str) |
| : m_length(str.length()), m_doNotExcludeLineNumbers(true), m_string(str) { |
| if (m_length) { |
| if (m_string.is8Bit()) { |
| m_is8Bit = true; |
| m_data.string8Ptr = m_string.characters8(); |
| } else { |
| m_is8Bit = false; |
| m_data.string16Ptr = m_string.characters16(); |
| } |
| } else { |
| m_is8Bit = false; |
| m_data.string8Ptr = nullptr; |
| } |
| } |
| |
| void clear() { |
| m_length = 0; |
| m_data.string16Ptr = nullptr; |
| m_is8Bit = false; |
| } |
| |
| bool is8Bit() { return m_is8Bit; } |
| |
| bool excludeLineNumbers() const { return !m_doNotExcludeLineNumbers; } |
| bool doNotExcludeLineNumbers() const { return m_doNotExcludeLineNumbers; } |
| |
| void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; } |
| |
| int numberOfCharactersConsumed() const { |
| return m_string.length() - m_length; |
| } |
| |
| void appendTo(StringBuilder& builder) const { |
| int offset = m_string.length() - m_length; |
| |
| if (!offset) { |
| if (m_length) |
| builder.append(m_string); |
| } else { |
| builder.append(m_string.substring(offset, m_length)); |
| } |
| } |
| |
| bool pushIfPossible(UChar c) { |
| if (!m_length) |
| return false; |
| |
| if (m_is8Bit) { |
| if (m_data.string8Ptr == m_string.characters8()) |
| return false; |
| |
| if (*(m_data.string8Ptr - 1) != c) |
| return false; |
| |
| --m_data.string8Ptr; |
| ++m_length; |
| return true; |
| } |
| |
| if (m_data.string16Ptr == m_string.characters16()) |
| return false; |
| |
| if (*(m_data.string16Ptr - 1) != c) |
| return false; |
| |
| --m_data.string16Ptr; |
| ++m_length; |
| return true; |
| } |
| |
| UChar getCurrentChar8() { return *m_data.string8Ptr; } |
| |
| UChar getCurrentChar16() { |
| return m_data.string16Ptr ? *m_data.string16Ptr : 0; |
| } |
| |
| UChar incrementAndGetCurrentChar8() { |
| ASSERT(m_data.string8Ptr); |
| return *++m_data.string8Ptr; |
| } |
| |
| UChar incrementAndGetCurrentChar16() { |
| ASSERT(m_data.string16Ptr); |
| return *++m_data.string16Ptr; |
| } |
| |
| String currentSubString(unsigned length) { |
| int offset = m_string.length() - m_length; |
| return m_string.substring(offset, length); |
| } |
| |
| ALWAYS_INLINE UChar getCurrentChar() { |
| ASSERT(m_length); |
| if (is8Bit()) |
| return getCurrentChar8(); |
| return getCurrentChar16(); |
| } |
| |
| ALWAYS_INLINE UChar incrementAndGetCurrentChar() { |
| ASSERT(m_length); |
| if (is8Bit()) |
| return incrementAndGetCurrentChar8(); |
| return incrementAndGetCurrentChar16(); |
| } |
| |
| ALWAYS_INLINE bool haveOneCharacterLeft() const { return m_length == 1; } |
| |
| ALWAYS_INLINE void decrementLength() { --m_length; } |
| |
| ALWAYS_INLINE int length() const { return m_length; } |
| |
| private: |
| union { |
| const LChar* string8Ptr; |
| const UChar* string16Ptr; |
| } m_data; |
| int m_length; |
| bool m_doNotExcludeLineNumbers; |
| bool m_is8Bit; |
| String m_string; |
| }; |
| |
| class PLATFORM_EXPORT SegmentedString { |
| DISALLOW_NEW(); |
| |
| public: |
| SegmentedString() |
| : m_currentChar(0), |
| m_numberOfCharactersConsumedPriorToCurrentString(0), |
| m_numberOfCharactersConsumedPriorToCurrentLine(0), |
| m_currentLine(0), |
| m_closed(false), |
| m_empty(true), |
| m_fastPathFlags(NoFastPath), |
| m_advanceFunc(&SegmentedString::advanceEmpty), |
| m_advanceAndUpdateLineNumberFunc(&SegmentedString::advanceEmpty) {} |
| |
| SegmentedString(const String& str) |
| : m_currentString(str), |
| m_currentChar(0), |
| m_numberOfCharactersConsumedPriorToCurrentString(0), |
| m_numberOfCharactersConsumedPriorToCurrentLine(0), |
| m_currentLine(0), |
| m_closed(false), |
| m_empty(!str.length()), |
| m_fastPathFlags(NoFastPath) { |
| if (m_currentString.length()) |
| m_currentChar = m_currentString.getCurrentChar(); |
| updateAdvanceFunctionPointers(); |
| } |
| |
| void clear(); |
| void close(); |
| |
| void append(const SegmentedString&); |
| enum class PrependType { |
| NewInput = 0, |
| Unconsume = 1, |
| }; |
| void prepend(const SegmentedString&, PrependType); |
| |
| bool excludeLineNumbers() const { |
| return m_currentString.excludeLineNumbers(); |
| } |
| void setExcludeLineNumbers(); |
| |
| void push(UChar); |
| |
| bool isEmpty() const { return m_empty; } |
| unsigned length() const; |
| |
| bool isClosed() const { return m_closed; } |
| |
| enum LookAheadResult { |
| DidNotMatch, |
| DidMatch, |
| NotEnoughCharacters, |
| }; |
| |
| LookAheadResult lookAhead(const String& string) { |
| return lookAheadInline(string, TextCaseSensitive); |
| } |
| LookAheadResult lookAheadIgnoringCase(const String& string) { |
| return lookAheadInline(string, TextCaseInsensitive); |
| } |
| |
| void advance() { |
| if (m_fastPathFlags & Use8BitAdvance) { |
| m_currentChar = m_currentString.incrementAndGetCurrentChar8(); |
| decrementAndCheckLength(); |
| return; |
| } |
| |
| (this->*m_advanceFunc)(); |
| } |
| |
| inline void advanceAndUpdateLineNumber() { |
| if (m_fastPathFlags & Use8BitAdvance) { |
| bool haveNewLine = |
| (m_currentChar == '\n') & |
| !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers); |
| m_currentChar = m_currentString.incrementAndGetCurrentChar8(); |
| decrementAndCheckLength(); |
| |
| if (haveNewLine) { |
| ++m_currentLine; |
| m_numberOfCharactersConsumedPriorToCurrentLine = |
| m_numberOfCharactersConsumedPriorToCurrentString + |
| m_currentString.numberOfCharactersConsumed(); |
| } |
| |
| return; |
| } |
| |
| (this->*m_advanceAndUpdateLineNumberFunc)(); |
| } |
| |
| void advanceAndASSERT(UChar expectedCharacter) { |
| ASSERT_UNUSED(expectedCharacter, currentChar() == expectedCharacter); |
| advance(); |
| } |
| |
| void advanceAndASSERTIgnoringCase(UChar expectedCharacter) { |
| ASSERT_UNUSED(expectedCharacter, |
| WTF::Unicode::foldCase(currentChar()) == |
| WTF::Unicode::foldCase(expectedCharacter)); |
| advance(); |
| } |
| |
| void advancePastNonNewline() { |
| ASSERT(currentChar() != '\n'); |
| advance(); |
| } |
| |
| void advancePastNewlineAndUpdateLineNumber() { |
| ASSERT(currentChar() == '\n'); |
| if (m_currentString.length() > 1) { |
| int newLineFlag = m_currentString.doNotExcludeLineNumbers(); |
| m_currentLine += newLineFlag; |
| if (newLineFlag) |
| m_numberOfCharactersConsumedPriorToCurrentLine = |
| numberOfCharactersConsumed() + 1; |
| decrementAndCheckLength(); |
| m_currentChar = m_currentString.incrementAndGetCurrentChar(); |
| return; |
| } |
| advanceAndUpdateLineNumberSlowCase(); |
| } |
| |
| // Writes the consumed characters into consumedCharacters, which must |
| // have space for at least |count| characters. |
| void advance(unsigned count, UChar* consumedCharacters); |
| |
| int numberOfCharactersConsumed() const { |
| int numberOfPushedCharacters = 0; |
| return m_numberOfCharactersConsumedPriorToCurrentString + |
| m_currentString.numberOfCharactersConsumed() - |
| numberOfPushedCharacters; |
| } |
| |
| String toString() const; |
| |
| UChar currentChar() const { return m_currentChar; } |
| |
| // The method is moderately slow, comparing to currentLine method. |
| OrdinalNumber currentColumn() const; |
| OrdinalNumber currentLine() const; |
| // Sets value of line/column variables. Column is specified indirectly by a |
| // parameter columnAftreProlog which is a value of column that we should get |
| // after a prolog (first prologLength characters) has been consumed. |
| void setCurrentPosition(OrdinalNumber line, |
| OrdinalNumber columnAftreProlog, |
| int prologLength); |
| |
| private: |
| enum FastPathFlags { |
| NoFastPath = 0, |
| Use8BitAdvanceAndUpdateLineNumbers = 1 << 0, |
| Use8BitAdvance = 1 << 1, |
| }; |
| |
| void append(const SegmentedSubstring&); |
| void prepend(const SegmentedSubstring&, PrependType); |
| |
| void advance8(); |
| void advance16(); |
| void advanceAndUpdateLineNumber8(); |
| void advanceAndUpdateLineNumber16(); |
| void advanceSlowCase(); |
| void advanceAndUpdateLineNumberSlowCase(); |
| void advanceEmpty(); |
| void advanceSubstring(); |
| |
| void updateSlowCaseFunctionPointers(); |
| |
| void decrementAndCheckLength() { |
| ASSERT(m_currentString.length() > 1); |
| m_currentString.decrementLength(); |
| if (m_currentString.haveOneCharacterLeft()) |
| updateSlowCaseFunctionPointers(); |
| } |
| |
| void updateAdvanceFunctionPointers() { |
| if (m_currentString.length() > 1) { |
| if (m_currentString.is8Bit()) { |
| m_advanceFunc = &SegmentedString::advance8; |
| m_fastPathFlags = Use8BitAdvance; |
| if (m_currentString.doNotExcludeLineNumbers()) { |
| m_advanceAndUpdateLineNumberFunc = |
| &SegmentedString::advanceAndUpdateLineNumber8; |
| m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers; |
| } else { |
| m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance8; |
| } |
| return; |
| } |
| |
| m_advanceFunc = &SegmentedString::advance16; |
| m_fastPathFlags = NoFastPath; |
| if (m_currentString.doNotExcludeLineNumbers()) |
| m_advanceAndUpdateLineNumberFunc = |
| &SegmentedString::advanceAndUpdateLineNumber16; |
| else |
| m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance16; |
| return; |
| } |
| |
| if (!m_currentString.length() && !isComposite()) { |
| m_advanceFunc = &SegmentedString::advanceEmpty; |
| m_fastPathFlags = NoFastPath; |
| m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty; |
| } |
| |
| updateSlowCaseFunctionPointers(); |
| } |
| |
| inline LookAheadResult lookAheadInline(const String& string, |
| TextCaseSensitivity caseSensitivity) { |
| if (string.length() <= static_cast<unsigned>(m_currentString.length())) { |
| String currentSubstring = |
| m_currentString.currentSubString(string.length()); |
| if (currentSubstring.startsWith(string, caseSensitivity)) |
| return DidMatch; |
| return DidNotMatch; |
| } |
| return lookAheadSlowCase(string, caseSensitivity); |
| } |
| |
| LookAheadResult lookAheadSlowCase(const String& string, |
| TextCaseSensitivity caseSensitivity) { |
| unsigned count = string.length(); |
| if (count > length()) |
| return NotEnoughCharacters; |
| UChar* consumedCharacters; |
| String consumedString = |
| String::createUninitialized(count, consumedCharacters); |
| advance(count, consumedCharacters); |
| LookAheadResult result = DidNotMatch; |
| if (consumedString.startsWith(string, caseSensitivity)) |
| result = DidMatch; |
| prepend(SegmentedString(consumedString), PrependType::Unconsume); |
| return result; |
| } |
| |
| bool isComposite() const { return !m_substrings.isEmpty(); } |
| |
| SegmentedSubstring m_currentString; |
| UChar m_currentChar; |
| int m_numberOfCharactersConsumedPriorToCurrentString; |
| int m_numberOfCharactersConsumedPriorToCurrentLine; |
| int m_currentLine; |
| Deque<SegmentedSubstring> m_substrings; |
| bool m_closed; |
| bool m_empty; |
| unsigned char m_fastPathFlags; |
| void (SegmentedString::*m_advanceFunc)(); |
| void (SegmentedString::*m_advanceAndUpdateLineNumberFunc)(); |
| }; |
| |
| } // namespace blink |
| |
| #endif |