third_party/WebKit/Source/core/editing/iterators/SearchBuffer.cpp - chromium/src - Git at Google

 /*
  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All rights reserved.
  * Copyright (C) 2005 Alexey Proskuryakov.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 #include "core/editing/iterators/SearchBuffer.h"

 #include "core/dom/Document.h"
 #include "core/editing/iterators/CharacterIterator.h"
 #include "core/editing/iterators/SimplifiedBackwardsTextIterator.h"
 #include "platform/fonts/Character.h"
 #include "platform/text/TextBoundaries.h"
 #include "platform/text/TextBreakIteratorInternalICU.h"
 #include "platform/text/UnicodeUtilities.h"
 #include "wtf/text/CharacterNames.h"
 #include <unicode/usearch.h>

 namespace blink {

 static const size_t minimumSearchBufferSize = 8192;

 #if ENABLE(ASSERT)
 static bool searcherInUse;
 #endif

 static UStringSearch* createSearcher()
 {
     // Provide a non-empty pattern and non-empty text so usearch_open will not fail,
     // but it doesn't matter exactly what it is, since we don't perform any searches
     // without setting both the pattern and the text.
     UErrorCode status = U_ZERO_ERROR;
     String searchCollatorName = currentSearchLocaleID() + String("@collation=search");
     UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, searchCollatorName.utf8().data(), 0, &status);
     ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING);
     return searcher;
 }

 static UStringSearch* searcher()
 {
     static UStringSearch* searcher = createSearcher();
     return searcher;
 }

 static inline void lockSearcher()
 {
 #if ENABLE(ASSERT)
     ASSERT(!searcherInUse);
     searcherInUse = true;
 #endif
 }

 static inline void unlockSearcher()
 {
 #if ENABLE(ASSERT)
     ASSERT(searcherInUse);
     searcherInUse = false;
 #endif
 }

 inline SearchBuffer::SearchBuffer(const String& target, FindOptions options)
     : m_options(options)
     , m_prefixLength(0)
     , m_numberOfCharactersJustAppended(0)
     , m_atBreak(true)
     , m_needsMoreContext(options & AtWordStarts)
     , m_targetRequiresKanaWorkaround(containsKanaLetters(target))
 {
     ASSERT(!target.isEmpty());
     target.appendTo(m_target);

     // FIXME: We'd like to tailor the searcher to fold quote marks for us instead
     // of doing it in a separate replacement pass here, but ICU doesn't offer a way
     // to add tailoring on top of the locale-specific tailoring as of this writing.
     foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size());

     size_t targetLength = m_target.size();
     m_buffer.reserveInitialCapacity(std::max(targetLength * 8, minimumSearchBufferSize));
     m_overlap = m_buffer.capacity() / 4;

     if ((m_options & AtWordStarts) && targetLength) {
         UChar32 targetFirstCharacter;
         U16_GET(m_target.data(), 0, 0, targetLength, targetFirstCharacter);
         // Characters in the separator category never really occur at the beginning of a word,
         // so if the target begins with such a character, we just ignore the AtWordStart option.
         if (isSeparator(targetFirstCharacter)) {
             m_options &= ~AtWordStarts;
             m_needsMoreContext = false;
         }
     }

     // Grab the single global searcher.
     // If we ever have a reason to do more than once search buffer at once, we'll have
     // to move to multiple searchers.
     lockSearcher();

     UStringSearch* searcher = blink::searcher();
     UCollator* collator = usearch_getCollator(searcher);

     UCollationStrength strength = m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY;
     if (ucol_getStrength(collator) != strength) {
         ucol_setStrength(collator, strength);
         usearch_reset(searcher);
     }

     UErrorCode status = U_ZERO_ERROR;
     usearch_setPattern(searcher, m_target.data(), targetLength, &status);
     ASSERT(status == U_ZERO_ERROR);

     // The kana workaround requires a normalized copy of the target string.
     if (m_targetRequiresKanaWorkaround)
         normalizeCharactersIntoNFCForm(m_target.data(), m_target.size(), m_normalizedTarget);
 }

 inline SearchBuffer::~SearchBuffer()
 {
     // Leave the static object pointing to valid strings (pattern=targer,
     // text=buffer). Otheriwse, usearch_reset() will results in 'use-after-free'
     // error.
     UErrorCode status = U_ZERO_ERROR;
     usearch_setPattern(blink::searcher(), &newlineCharacter, 1, &status);
     usearch_setText(blink::searcher(), &newlineCharacter, 1, &status);
     ASSERT(status == U_ZERO_ERROR);

     unlockSearcher();
 }

 template<typename CharType>
 inline void SearchBuffer::append(const CharType* characters, size_t length)
 {
     ASSERT(length);

     if (m_atBreak) {
         m_buffer.shrink(0);
         m_prefixLength = 0;
         m_atBreak = false;
     } else if (m_buffer.size() == m_buffer.capacity()) {
         memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar));
         m_prefixLength -= std::min(m_prefixLength, m_buffer.size() - m_overlap);
         m_buffer.shrink(m_overlap);
     }

     size_t oldLength = m_buffer.size();
     size_t usableLength = std::min(m_buffer.capacity() - oldLength, length);
     ASSERT(usableLength);
     m_buffer.resize(oldLength + usableLength);
     UChar* destination = m_buffer.data() + oldLength;
     StringImpl::copyChars(destination, characters, usableLength);
     foldQuoteMarksAndSoftHyphens(destination, usableLength);
     m_numberOfCharactersJustAppended = usableLength;
 }

 inline bool SearchBuffer::needsMoreContext() const
 {
     return m_needsMoreContext;
 }

 inline void SearchBuffer::prependContext(const UChar* characters, size_t length)
 {
     ASSERT(m_needsMoreContext);
     ASSERT(m_prefixLength == m_buffer.size());

     if (!length)
         return;

     m_atBreak = false;

     size_t wordBoundaryContextStart = length;
     if (wordBoundaryContextStart) {
         U16_BACK_1(characters, 0, wordBoundaryContextStart);
         wordBoundaryContextStart = startOfLastWordBoundaryContext(characters, wordBoundaryContextStart);
     }

     size_t usableLength = std::min(m_buffer.capacity() - m_prefixLength, length - wordBoundaryContextStart);
     m_buffer.prepend(characters + length - usableLength, usableLength);
     m_prefixLength += usableLength;

     if (wordBoundaryContextStart || m_prefixLength == m_buffer.capacity())
         m_needsMoreContext = false;
 }

 inline bool SearchBuffer::atBreak() const
 {
     return m_atBreak;
 }

 inline void SearchBuffer::reachedBreak()
 {
     m_atBreak = true;
 }

 inline bool SearchBuffer::isBadMatch(const UChar* match, size_t matchLength) const
 {
     // This function implements the kana workaround. If usearch treats
     // it as a match, but we do not want to, then it's a "bad match".
     if (!m_targetRequiresKanaWorkaround)
         return false;

     // Normalize into a match buffer. We reuse a single buffer rather than
     // creating a new one each time.
     normalizeCharactersIntoNFCForm(match, matchLength, m_normalizedMatch);

     return !checkOnlyKanaLettersInStrings(m_normalizedTarget.begin(), m_normalizedTarget.size(), m_normalizedMatch.begin(), m_normalizedMatch.size());
 }

 inline bool SearchBuffer::isWordStartMatch(size_t start, size_t length) const
 {
     ASSERT(m_options & AtWordStarts);

     if (!start)
         return true;

     int size = m_buffer.size();
     int offset = start;
     UChar32 firstCharacter;
     U16_GET(m_buffer.data(), 0, offset, size, firstCharacter);

     if (m_options & TreatMedialCapitalAsWordStart) {
         UChar32 previousCharacter;
         U16_PREV(m_buffer.data(), 0, offset, previousCharacter);

         if (isSeparator(firstCharacter)) {
             // The start of a separator run is a word start (".org" in "webkit.org").
             if (!isSeparator(previousCharacter))
                 return true;
         } else if (isASCIIUpper(firstCharacter)) {
             // The start of an uppercase run is a word start ("Kit" in "WebKit").
             if (!isASCIIUpper(previousCharacter))
                 return true;
             // The last character of an uppercase run followed by a non-separator, non-digit
             // is a word start ("Request" in "XMLHTTPRequest").
             offset = start;
             U16_FWD_1(m_buffer.data(), offset, size);
             UChar32 nextCharacter = 0;
             if (offset < size)
                 U16_GET(m_buffer.data(), 0, offset, size, nextCharacter);
             if (!isASCIIUpper(nextCharacter) && !isASCIIDigit(nextCharacter) && !isSeparator(nextCharacter))
                 return true;
         } else if (isASCIIDigit(firstCharacter)) {
             // The start of a digit run is a word start ("2" in "WebKit2").
             if (!isASCIIDigit(previousCharacter))
                 return true;
         } else if (isSeparator(previousCharacter) || isASCIIDigit(previousCharacter)) {
             // The start of a non-separator, non-uppercase, non-digit run is a word start,
             // except after an uppercase. ("org" in "webkit.org", but not "ore" in "WebCore").
             return true;
         }
     }

     // Chinese and Japanese lack word boundary marks, and there is no clear agreement on what constitutes
     // a word, so treat the position before any CJK character as a word start.
     if (Character::isCJKIdeographOrSymbol(firstCharacter))
         return true;

     size_t wordBreakSearchStart = start + length;
     while (wordBreakSearchStart > start)
         wordBreakSearchStart = findNextWordFromIndex(m_buffer.data(), m_buffer.size(), wordBreakSearchStart, false /* backwards */);
     if (wordBreakSearchStart != start)
         return false;
     if (m_options & WholeWord)
         return static_cast<int>(start + length) == findWordEndBoundary(m_buffer.data(), m_buffer.size(), wordBreakSearchStart);
     return true;
 }

 inline size_t SearchBuffer::search(size_t& start)
 {
     size_t size = m_buffer.size();
     if (m_atBreak) {
         if (!size)
             return 0;
     } else {
         if (size != m_buffer.capacity())
             return 0;
     }

     UStringSearch* searcher = blink::searcher();

     UErrorCode status = U_ZERO_ERROR;
     usearch_setText(searcher, m_buffer.data(), size, &status);
     ASSERT(status == U_ZERO_ERROR);

     usearch_setOffset(searcher, m_prefixLength, &status);
     ASSERT(status == U_ZERO_ERROR);

     int matchStart = usearch_next(searcher, &status);
     ASSERT(status == U_ZERO_ERROR);

 nextMatch:
     if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) {
         ASSERT(matchStart == USEARCH_DONE);
         return 0;
     }

     // Matches that start in the overlap area are only tentative.
     // The same match may appear later, matching more characters,
     // possibly including a combining character that's not yet in the buffer.
     if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) {
         size_t overlap = m_overlap;
         if (m_options & AtWordStarts) {
             // Ensure that there is sufficient context before matchStart the next time around for
             // determining if it is at a word boundary.
             int wordBoundaryContextStart = matchStart;
             U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart);
             wordBoundaryContextStart = startOfLastWordBoundaryContext(m_buffer.data(), wordBoundaryContextStart);
             overlap = std::min(size - 1, std::max(overlap, size - wordBoundaryContextStart));
         }
         memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar));
         m_prefixLength -= std::min(m_prefixLength, size - overlap);
         m_buffer.shrink(overlap);
         return 0;
     }

     size_t matchedLength = usearch_getMatchedLength(searcher);
     ASSERT_WITH_SECURITY_IMPLICATION(matchStart + matchedLength <= size);

     // If this match is "bad", move on to the next match.
     if (isBadMatch(m_buffer.data() + matchStart, matchedLength) || ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength))) {
         matchStart = usearch_next(searcher, &status);
         ASSERT(status == U_ZERO_ERROR);
         goto nextMatch;
     }

     size_t newSize = size - (matchStart + 1);
     memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar));
     m_prefixLength -= std::min<size_t>(m_prefixLength, matchStart + 1);
     m_buffer.shrink(newSize);

     start = size - matchStart;
     return matchedLength;
 }

 // Check if there's any unpaird surrogate code point.
 // Non-character code points are not checked.
 static bool isValidUTF16(const String& s)
 {
     if (s.is8Bit())
         return true;
     const UChar* ustr = s.characters16();
     size_t length = s.length();
     size_t position = 0;
     while (position < length) {
         UChar32 character;
         U16_NEXT(ustr, position, length, character);
         if (U_IS_SURROGATE(character))
             return false;
     }
     return true;
 }

 template <typename Strategy>
 static size_t findPlainTextInternal(CharacterIteratorAlgorithm<Strategy>& it, const String& target, FindOptions options, size_t& matchStart)
 {
     matchStart = 0;
     size_t matchLength = 0;

     if (!isValidUTF16(target))
         return 0;

     SearchBuffer buffer(target, options);

     if (buffer.needsMoreContext()) {
         for (SimplifiedBackwardsTextIteratorAlgorithm<Strategy> backwardsIterator(PositionTemplate<Strategy>::firstPositionInNode(it.ownerDocument()), PositionTemplate<Strategy>(it.currentContainer(), it.startOffset())); !backwardsIterator.atEnd(); backwardsIterator.advance()) {
             BackwardsTextBuffer characters;
             backwardsIterator.copyTextTo(&characters);
             buffer.prependContext(characters.data(), characters.size());
             if (!buffer.needsMoreContext())
                 break;
         }
     }

     while (!it.atEnd()) {
         // TODO(xiaochengh): Should allow copying text to SearchBuffer directly
         ForwardsTextBuffer characters;
         it.copyTextTo(&characters);
         buffer.append(characters.data(), characters.size());
         it.advance(buffer.numberOfCharactersJustAppended());
 tryAgain:
         size_t matchStartOffset;
         if (size_t newMatchLength = buffer.search(matchStartOffset)) {
             // Note that we found a match, and where we found it.
             size_t lastCharacterInBufferOffset = it.characterOffset();
             ASSERT(lastCharacterInBufferOffset >= matchStartOffset);
             matchStart = lastCharacterInBufferOffset - matchStartOffset;
             matchLength = newMatchLength;
             // If searching forward, stop on the first match.
             // If searching backward, don't stop, so we end up with the last match.
             if (!(options & Backwards))
                 break;
             goto tryAgain;
         }
         if (it.atBreak() && !buffer.atBreak()) {
             buffer.reachedBreak();
             goto tryAgain;
         }
     }

     return matchLength;
 }

 static const TextIteratorBehaviorFlags iteratorFlagsForFindPlainText = TextIteratorEntersTextControls | TextIteratorEntersOpenShadowRoots | TextIteratorDoesNotBreakAtReplacedElement | TextIteratorCollapseTrailingSpace;

 template <typename Strategy>
 static EphemeralRangeTemplate<Strategy> findPlainTextAlgorithm(const EphemeralRangeTemplate<Strategy>& inputRange, const String& target, FindOptions options)
 {
     // CharacterIterator requires layoutObjects to be up-to-date.
     if (!inputRange.startPosition().inDocument())
         return EphemeralRangeTemplate<Strategy>();
     ASSERT(inputRange.startPosition().document() == inputRange.endPosition().document());

     // FIXME: Reduce the code duplication with above (but how?).
     size_t matchStart;
     size_t matchLength;
     {
         TextIteratorBehaviorFlags behavior = iteratorFlagsForFindPlainText;
         if (options & FindAPICall)
             behavior |= TextIteratorForWindowFind;
         CharacterIteratorAlgorithm<Strategy> findIterator(inputRange, behavior);
         matchLength = findPlainTextInternal(findIterator, target, options, matchStart);
         if (!matchLength)
             return EphemeralRangeTemplate<Strategy>(options & Backwards ? inputRange.startPosition() : inputRange.endPosition());
     }

     CharacterIteratorAlgorithm<Strategy> computeRangeIterator(inputRange, iteratorFlagsForFindPlainText);
     return computeRangeIterator.calculateCharacterSubrange(matchStart, matchLength);
 }

 EphemeralRange findPlainText(const EphemeralRange& inputRange, const String& target, FindOptions options)
 {
     return findPlainTextAlgorithm<EditingStrategy>(inputRange, target, options);
 }

 EphemeralRangeInFlatTree findPlainText(const EphemeralRangeInFlatTree& inputRange, const String& target, FindOptions options)
 {
     return findPlainTextAlgorithm<EditingInFlatTreeStrategy>(inputRange, target, options);
 }

 } // namespace blink
	/*
	* Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All rights reserved.
	* Copyright (C) 2005 Alexey Proskuryakov.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	*
	* THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/

	#include "core/editing/iterators/SearchBuffer.h"

	#include "core/dom/Document.h"
	#include "core/editing/iterators/CharacterIterator.h"
	#include "core/editing/iterators/SimplifiedBackwardsTextIterator.h"
	#include "platform/fonts/Character.h"
	#include "platform/text/TextBoundaries.h"
	#include "platform/text/TextBreakIteratorInternalICU.h"
	#include "platform/text/UnicodeUtilities.h"
	#include "wtf/text/CharacterNames.h"
	#include <unicode/usearch.h>

	namespace blink {

	static const size_t minimumSearchBufferSize = 8192;

	#if ENABLE(ASSERT)
	static bool searcherInUse;
	#endif

	static UStringSearch* createSearcher()
	{
	// Provide a non-empty pattern and non-empty text so usearch_open will not fail,
	// but it doesn't matter exactly what it is, since we don't perform any searches
	// without setting both the pattern and the text.
	UErrorCode status = U_ZERO_ERROR;
	String searchCollatorName = currentSearchLocaleID() + String("@collation=search");
	UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, searchCollatorName.utf8().data(), 0, &status);
	ASSERT(status == U_ZERO_ERROR \|\| status == U_USING_FALLBACK_WARNING \|\| status == U_USING_DEFAULT_WARNING);
	return searcher;
	}

	static UStringSearch* searcher()
	{
	static UStringSearch* searcher = createSearcher();
	return searcher;
	}

	static inline void lockSearcher()
	{
	#if ENABLE(ASSERT)
	ASSERT(!searcherInUse);
	searcherInUse = true;
	#endif
	}

	static inline void unlockSearcher()
	{
	#if ENABLE(ASSERT)
	ASSERT(searcherInUse);
	searcherInUse = false;
	#endif
	}

	inline SearchBuffer::SearchBuffer(const String& target, FindOptions options)
	: m_options(options)
	, m_prefixLength(0)
	, m_numberOfCharactersJustAppended(0)
	, m_atBreak(true)
	, m_needsMoreContext(options & AtWordStarts)
	, m_targetRequiresKanaWorkaround(containsKanaLetters(target))
	{
	ASSERT(!target.isEmpty());
	target.appendTo(m_target);

	// FIXME: We'd like to tailor the searcher to fold quote marks for us instead
	// of doing it in a separate replacement pass here, but ICU doesn't offer a way
	// to add tailoring on top of the locale-specific tailoring as of this writing.
	foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size());

	size_t targetLength = m_target.size();
	m_buffer.reserveInitialCapacity(std::max(targetLength * 8, minimumSearchBufferSize));
	m_overlap = m_buffer.capacity() / 4;

	if ((m_options & AtWordStarts) && targetLength) {
	UChar32 targetFirstCharacter;
	U16_GET(m_target.data(), 0, 0, targetLength, targetFirstCharacter);
	// Characters in the separator category never really occur at the beginning of a word,
	// so if the target begins with such a character, we just ignore the AtWordStart option.
	if (isSeparator(targetFirstCharacter)) {
	m_options &= ~AtWordStarts;
	m_needsMoreContext = false;
	}
	}

	// Grab the single global searcher.
	// If we ever have a reason to do more than once search buffer at once, we'll have
	// to move to multiple searchers.
	lockSearcher();

	UStringSearch* searcher = blink::searcher();
	UCollator* collator = usearch_getCollator(searcher);

	UCollationStrength strength = m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY;
	if (ucol_getStrength(collator) != strength) {
	ucol_setStrength(collator, strength);
	usearch_reset(searcher);
	}

	UErrorCode status = U_ZERO_ERROR;
	usearch_setPattern(searcher, m_target.data(), targetLength, &status);
	ASSERT(status == U_ZERO_ERROR);

	// The kana workaround requires a normalized copy of the target string.
	if (m_targetRequiresKanaWorkaround)
	normalizeCharactersIntoNFCForm(m_target.data(), m_target.size(), m_normalizedTarget);
	}

	inline SearchBuffer::~SearchBuffer()
	{
	// Leave the static object pointing to valid strings (pattern=targer,
	// text=buffer). Otheriwse, usearch_reset() will results in 'use-after-free'
	// error.
	UErrorCode status = U_ZERO_ERROR;
	usearch_setPattern(blink::searcher(), &newlineCharacter, 1, &status);
	usearch_setText(blink::searcher(), &newlineCharacter, 1, &status);
	ASSERT(status == U_ZERO_ERROR);

	unlockSearcher();
	}

	template<typename CharType>
	inline void SearchBuffer::append(const CharType* characters, size_t length)
	{
	ASSERT(length);

	if (m_atBreak) {
	m_buffer.shrink(0);
	m_prefixLength = 0;
	m_atBreak = false;
	} else if (m_buffer.size() == m_buffer.capacity()) {
	memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar));
	m_prefixLength -= std::min(m_prefixLength, m_buffer.size() - m_overlap);
	m_buffer.shrink(m_overlap);
	}

	size_t oldLength = m_buffer.size();
	size_t usableLength = std::min(m_buffer.capacity() - oldLength, length);
	ASSERT(usableLength);
	m_buffer.resize(oldLength + usableLength);
	UChar* destination = m_buffer.data() + oldLength;
	StringImpl::copyChars(destination, characters, usableLength);
	foldQuoteMarksAndSoftHyphens(destination, usableLength);
	m_numberOfCharactersJustAppended = usableLength;
	}

	inline bool SearchBuffer::needsMoreContext() const
	{
	return m_needsMoreContext;
	}

	inline void SearchBuffer::prependContext(const UChar* characters, size_t length)
	{
	ASSERT(m_needsMoreContext);
	ASSERT(m_prefixLength == m_buffer.size());

	if (!length)
	return;

	m_atBreak = false;

	size_t wordBoundaryContextStart = length;
	if (wordBoundaryContextStart) {
	U16_BACK_1(characters, 0, wordBoundaryContextStart);
	wordBoundaryContextStart = startOfLastWordBoundaryContext(characters, wordBoundaryContextStart);
	}

	size_t usableLength = std::min(m_buffer.capacity() - m_prefixLength, length - wordBoundaryContextStart);
	m_buffer.prepend(characters + length - usableLength, usableLength);
	m_prefixLength += usableLength;

	if (wordBoundaryContextStart \|\| m_prefixLength == m_buffer.capacity())
	m_needsMoreContext = false;
	}

	inline bool SearchBuffer::atBreak() const
	{
	return m_atBreak;
	}

	inline void SearchBuffer::reachedBreak()
	{
	m_atBreak = true;
	}

	inline bool SearchBuffer::isBadMatch(const UChar* match, size_t matchLength) const
	{
	// This function implements the kana workaround. If usearch treats
	// it as a match, but we do not want to, then it's a "bad match".
	if (!m_targetRequiresKanaWorkaround)
	return false;

	// Normalize into a match buffer. We reuse a single buffer rather than
	// creating a new one each time.
	normalizeCharactersIntoNFCForm(match, matchLength, m_normalizedMatch);

	return !checkOnlyKanaLettersInStrings(m_normalizedTarget.begin(), m_normalizedTarget.size(), m_normalizedMatch.begin(), m_normalizedMatch.size());
	}

	inline bool SearchBuffer::isWordStartMatch(size_t start, size_t length) const
	{
	ASSERT(m_options & AtWordStarts);

	if (!start)
	return true;

	int size = m_buffer.size();
	int offset = start;
	UChar32 firstCharacter;
	U16_GET(m_buffer.data(), 0, offset, size, firstCharacter);

	if (m_options & TreatMedialCapitalAsWordStart) {
	UChar32 previousCharacter;
	U16_PREV(m_buffer.data(), 0, offset, previousCharacter);

	if (isSeparator(firstCharacter)) {
	// The start of a separator run is a word start (".org" in "webkit.org").
	if (!isSeparator(previousCharacter))
	return true;
	} else if (isASCIIUpper(firstCharacter)) {
	// The start of an uppercase run is a word start ("Kit" in "WebKit").
	if (!isASCIIUpper(previousCharacter))
	return true;
	// The last character of an uppercase run followed by a non-separator, non-digit
	// is a word start ("Request" in "XMLHTTPRequest").
	offset = start;
	U16_FWD_1(m_buffer.data(), offset, size);
	UChar32 nextCharacter = 0;
	if (offset < size)
	U16_GET(m_buffer.data(), 0, offset, size, nextCharacter);
	if (!isASCIIUpper(nextCharacter) && !isASCIIDigit(nextCharacter) && !isSeparator(nextCharacter))
	return true;
	} else if (isASCIIDigit(firstCharacter)) {
	// The start of a digit run is a word start ("2" in "WebKit2").
	if (!isASCIIDigit(previousCharacter))
	return true;
	} else if (isSeparator(previousCharacter) \|\| isASCIIDigit(previousCharacter)) {
	// The start of a non-separator, non-uppercase, non-digit run is a word start,
	// except after an uppercase. ("org" in "webkit.org", but not "ore" in "WebCore").
	return true;
	}
	}

	// Chinese and Japanese lack word boundary marks, and there is no clear agreement on what constitutes
	// a word, so treat the position before any CJK character as a word start.
	if (Character::isCJKIdeographOrSymbol(firstCharacter))
	return true;

	size_t wordBreakSearchStart = start + length;
	while (wordBreakSearchStart > start)
	wordBreakSearchStart = findNextWordFromIndex(m_buffer.data(), m_buffer.size(), wordBreakSearchStart, false /* backwards */);
	if (wordBreakSearchStart != start)
	return false;
	if (m_options & WholeWord)
	return static_cast<int>(start + length) == findWordEndBoundary(m_buffer.data(), m_buffer.size(), wordBreakSearchStart);
	return true;
	}

	inline size_t SearchBuffer::search(size_t& start)
	{
	size_t size = m_buffer.size();
	if (m_atBreak) {
	if (!size)
	return 0;
	} else {
	if (size != m_buffer.capacity())
	return 0;
	}

	UStringSearch* searcher = blink::searcher();

	UErrorCode status = U_ZERO_ERROR;
	usearch_setText(searcher, m_buffer.data(), size, &status);
	ASSERT(status == U_ZERO_ERROR);

	usearch_setOffset(searcher, m_prefixLength, &status);
	ASSERT(status == U_ZERO_ERROR);

	int matchStart = usearch_next(searcher, &status);
	ASSERT(status == U_ZERO_ERROR);

	nextMatch:
	if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) {
	ASSERT(matchStart == USEARCH_DONE);
	return 0;
	}

	// Matches that start in the overlap area are only tentative.
	// The same match may appear later, matching more characters,
	// possibly including a combining character that's not yet in the buffer.
	if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) {
	size_t overlap = m_overlap;
	if (m_options & AtWordStarts) {
	// Ensure that there is sufficient context before matchStart the next time around for
	// determining if it is at a word boundary.
	int wordBoundaryContextStart = matchStart;
	U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart);
	wordBoundaryContextStart = startOfLastWordBoundaryContext(m_buffer.data(), wordBoundaryContextStart);
	overlap = std::min(size - 1, std::max(overlap, size - wordBoundaryContextStart));
	}
	memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar));
	m_prefixLength -= std::min(m_prefixLength, size - overlap);
	m_buffer.shrink(overlap);
	return 0;
	}

	size_t matchedLength = usearch_getMatchedLength(searcher);
	ASSERT_WITH_SECURITY_IMPLICATION(matchStart + matchedLength <= size);

	// If this match is "bad", move on to the next match.
	if (isBadMatch(m_buffer.data() + matchStart, matchedLength) \|\| ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength))) {
	matchStart = usearch_next(searcher, &status);
	ASSERT(status == U_ZERO_ERROR);
	goto nextMatch;
	}

	size_t newSize = size - (matchStart + 1);
	memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar));
	m_prefixLength -= std::min<size_t>(m_prefixLength, matchStart + 1);
	m_buffer.shrink(newSize);

	start = size - matchStart;
	return matchedLength;
	}

	// Check if there's any unpaird surrogate code point.
	// Non-character code points are not checked.
	static bool isValidUTF16(const String& s)
	{
	if (s.is8Bit())
	return true;
	const UChar* ustr = s.characters16();
	size_t length = s.length();
	size_t position = 0;
	while (position < length) {
	UChar32 character;
	U16_NEXT(ustr, position, length, character);
	if (U_IS_SURROGATE(character))
	return false;
	}
	return true;
	}

	template <typename Strategy>
	static size_t findPlainTextInternal(CharacterIteratorAlgorithm<Strategy>& it, const String& target, FindOptions options, size_t& matchStart)
	{
	matchStart = 0;
	size_t matchLength = 0;

	if (!isValidUTF16(target))
	return 0;

	SearchBuffer buffer(target, options);

	if (buffer.needsMoreContext()) {
	for (SimplifiedBackwardsTextIteratorAlgorithm<Strategy> backwardsIterator(PositionTemplate<Strategy>::firstPositionInNode(it.ownerDocument()), PositionTemplate<Strategy>(it.currentContainer(), it.startOffset())); !backwardsIterator.atEnd(); backwardsIterator.advance()) {
	BackwardsTextBuffer characters;
	backwardsIterator.copyTextTo(&characters);
	buffer.prependContext(characters.data(), characters.size());
	if (!buffer.needsMoreContext())
	break;
	}
	}

	while (!it.atEnd()) {
	// TODO(xiaochengh): Should allow copying text to SearchBuffer directly
	ForwardsTextBuffer characters;
	it.copyTextTo(&characters);
	buffer.append(characters.data(), characters.size());
	it.advance(buffer.numberOfCharactersJustAppended());
	tryAgain:
	size_t matchStartOffset;
	if (size_t newMatchLength = buffer.search(matchStartOffset)) {
	// Note that we found a match, and where we found it.
	size_t lastCharacterInBufferOffset = it.characterOffset();
	ASSERT(lastCharacterInBufferOffset >= matchStartOffset);
	matchStart = lastCharacterInBufferOffset - matchStartOffset;
	matchLength = newMatchLength;
	// If searching forward, stop on the first match.
	// If searching backward, don't stop, so we end up with the last match.
	if (!(options & Backwards))
	break;
	goto tryAgain;
	}
	if (it.atBreak() && !buffer.atBreak()) {
	buffer.reachedBreak();
	goto tryAgain;
	}
	}

	return matchLength;
	}

	static const TextIteratorBehaviorFlags iteratorFlagsForFindPlainText = TextIteratorEntersTextControls \| TextIteratorEntersOpenShadowRoots \| TextIteratorDoesNotBreakAtReplacedElement \| TextIteratorCollapseTrailingSpace;

	template <typename Strategy>
	static EphemeralRangeTemplate<Strategy> findPlainTextAlgorithm(const EphemeralRangeTemplate<Strategy>& inputRange, const String& target, FindOptions options)
	{
	// CharacterIterator requires layoutObjects to be up-to-date.
	if (!inputRange.startPosition().inDocument())
	return EphemeralRangeTemplate<Strategy>();
	ASSERT(inputRange.startPosition().document() == inputRange.endPosition().document());

	// FIXME: Reduce the code duplication with above (but how?).
	size_t matchStart;
	size_t matchLength;
	{
	TextIteratorBehaviorFlags behavior = iteratorFlagsForFindPlainText;
	if (options & FindAPICall)
	behavior \|= TextIteratorForWindowFind;
	CharacterIteratorAlgorithm<Strategy> findIterator(inputRange, behavior);
	matchLength = findPlainTextInternal(findIterator, target, options, matchStart);
	if (!matchLength)
	return EphemeralRangeTemplate<Strategy>(options & Backwards ? inputRange.startPosition() : inputRange.endPosition());
	}

	CharacterIteratorAlgorithm<Strategy> computeRangeIterator(inputRange, iteratorFlagsForFindPlainText);
	return computeRangeIterator.calculateCharacterSubrange(matchStart, matchLength);
	}

	EphemeralRange findPlainText(const EphemeralRange& inputRange, const String& target, FindOptions options)
	{
	return findPlainTextAlgorithm<EditingStrategy>(inputRange, target, options);
	}

	EphemeralRangeInFlatTree findPlainText(const EphemeralRangeInFlatTree& inputRange, const String& target, FindOptions options)
	{
	return findPlainTextAlgorithm<EditingInFlatTreeStrategy>(inputRange, target, options);
	}

	} // namespace blink