third_party/WebKit/Source/core/editing/iterators/SearchBuffer.cpp - chromium/src - Git at Google

 /*
  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All
  * rights reserved.
  * Copyright (C) 2005 Alexey Proskuryakov.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 #include "core/editing/iterators/SearchBuffer.h"

 #include "core/dom/Document.h"
 #include "core/editing/iterators/CharacterIterator.h"
 #include "core/editing/iterators/SimplifiedBackwardsTextIterator.h"
 #include "platform/text/Character.h"
 #include "platform/text/TextBoundaries.h"
 #include "platform/text/TextBreakIteratorInternalICU.h"
 #include "platform/text/UnicodeUtilities.h"
 #include "wtf/text/CharacterNames.h"
 #include <unicode/usearch.h>

 namespace blink {

 static const size_t minimumSearchBufferSize = 8192;

 #if DCHECK_IS_ON()
 static bool searcherInUse;
 #endif

 static UStringSearch* createSearcher() {
   // Provide a non-empty pattern and non-empty text so usearch_open will not
   // fail, but it doesn't matter exactly what it is, since we don't perform any
   // searches without setting both the pattern and the text.
   UErrorCode status = U_ZERO_ERROR;
   String searchCollatorName =
       currentSearchLocaleID() + String("@collation=search");
   UStringSearch* searcher =
       usearch_open(&newlineCharacter, 1, &newlineCharacter, 1,
                    searchCollatorName.utf8().data(), 0, &status);
   DCHECK(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING ||
          status == U_USING_DEFAULT_WARNING)
       << status;
   return searcher;
 }

 static UStringSearch* searcher() {
   static UStringSearch* searcher = createSearcher();
   return searcher;
 }

 static inline void lockSearcher() {
 #if DCHECK_IS_ON()
   DCHECK(!searcherInUse);
   searcherInUse = true;
 #endif
 }

 static inline void unlockSearcher() {
 #if DCHECK_IS_ON()
   DCHECK(searcherInUse);
   searcherInUse = false;
 #endif
 }

 inline SearchBuffer::SearchBuffer(const String& target, FindOptions options)
     : m_options(options),
       m_prefixLength(0),
       m_numberOfCharactersJustAppended(0),
       m_atBreak(true),
       m_needsMoreContext(options & AtWordStarts),
       m_targetRequiresKanaWorkaround(containsKanaLetters(target)) {
   DCHECK(!target.isEmpty()) << target;
   target.appendTo(m_target);

   // FIXME: We'd like to tailor the searcher to fold quote marks for us instead
   // of doing it in a separate replacement pass here, but ICU doesn't offer a
   // way to add tailoring on top of the locale-specific tailoring as of this
   // writing.
   foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size());

   size_t targetLength = m_target.size();
   m_buffer.reserveInitialCapacity(
       std::max(targetLength * 8, minimumSearchBufferSize));
   m_overlap = m_buffer.capacity() / 4;

   if ((m_options & AtWordStarts) && targetLength) {
     UChar32 targetFirstCharacter;
     U16_GET(m_target.data(), 0, 0, targetLength, targetFirstCharacter);
     // Characters in the separator category never really occur at the beginning
     // of a word, so if the target begins with such a character, we just ignore
     // the AtWordStart option.
     if (isSeparator(targetFirstCharacter)) {
       m_options &= ~AtWordStarts;
       m_needsMoreContext = false;
     }
   }

   // Grab the single global searcher.
   // If we ever have a reason to do more than once search buffer at once, we'll
   // have to move to multiple searchers.
   lockSearcher();

   UStringSearch* searcher = blink::searcher();
   UCollator* collator = usearch_getCollator(searcher);

   UCollationStrength strength =
       m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY;
   if (ucol_getStrength(collator) != strength) {
     ucol_setStrength(collator, strength);
     usearch_reset(searcher);
   }

   UErrorCode status = U_ZERO_ERROR;
   usearch_setPattern(searcher, m_target.data(), targetLength, &status);
   DCHECK_EQ(status, U_ZERO_ERROR);

   // The kana workaround requires a normalized copy of the target string.
   if (m_targetRequiresKanaWorkaround)
     normalizeCharactersIntoNFCForm(m_target.data(), m_target.size(),
                                    m_normalizedTarget);
 }

 inline SearchBuffer::~SearchBuffer() {
   // Leave the static object pointing to valid strings (pattern=targer,
   // text=buffer). Otheriwse, usearch_reset() will results in 'use-after-free'
   // error.
   UErrorCode status = U_ZERO_ERROR;
   usearch_setPattern(blink::searcher(), &newlineCharacter, 1, &status);
   usearch_setText(blink::searcher(), &newlineCharacter, 1, &status);
   DCHECK_EQ(status, U_ZERO_ERROR);

   unlockSearcher();
 }

 template <typename CharType>
 inline void SearchBuffer::append(const CharType* characters, size_t length) {
   DCHECK(length);

   if (m_atBreak) {
     m_buffer.shrink(0);
     m_prefixLength = 0;
     m_atBreak = false;
   } else if (m_buffer.size() == m_buffer.capacity()) {
     memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap,
            m_overlap * sizeof(UChar));
     m_prefixLength -= std::min(m_prefixLength, m_buffer.size() - m_overlap);
     m_buffer.shrink(m_overlap);
   }

   size_t oldLength = m_buffer.size();
   size_t usableLength = std::min(m_buffer.capacity() - oldLength, length);
   DCHECK(usableLength);
   m_buffer.resize(oldLength + usableLength);
   UChar* destination = m_buffer.data() + oldLength;
   StringImpl::copyChars(destination, characters, usableLength);
   foldQuoteMarksAndSoftHyphens(destination, usableLength);
   m_numberOfCharactersJustAppended = usableLength;
 }

 inline bool SearchBuffer::needsMoreContext() const {
   return m_needsMoreContext;
 }

 inline void SearchBuffer::prependContext(const UChar* characters,
                                          size_t length) {
   DCHECK(m_needsMoreContext);
   DCHECK_EQ(m_prefixLength, m_buffer.size());

   if (!length)
     return;

   m_atBreak = false;

   size_t wordBoundaryContextStart = length;
   if (wordBoundaryContextStart) {
     U16_BACK_1(characters, 0, wordBoundaryContextStart);
     wordBoundaryContextStart =
         startOfLastWordBoundaryContext(characters, wordBoundaryContextStart);
   }

   size_t usableLength = std::min(m_buffer.capacity() - m_prefixLength,
                                  length - wordBoundaryContextStart);
   m_buffer.prepend(characters + length - usableLength, usableLength);
   m_prefixLength += usableLength;

   if (wordBoundaryContextStart || m_prefixLength == m_buffer.capacity())
     m_needsMoreContext = false;
 }

 inline bool SearchBuffer::atBreak() const {
   return m_atBreak;
 }

 inline void SearchBuffer::reachedBreak() {
   m_atBreak = true;
 }

 inline bool SearchBuffer::isBadMatch(const UChar* match,
                                      size_t matchLength) const {
   // This function implements the kana workaround. If usearch treats
   // it as a match, but we do not want to, then it's a "bad match".
   if (!m_targetRequiresKanaWorkaround)
     return false;

   // Normalize into a match buffer. We reuse a single buffer rather than
   // creating a new one each time.
   normalizeCharactersIntoNFCForm(match, matchLength, m_normalizedMatch);

   return !checkOnlyKanaLettersInStrings(
       m_normalizedTarget.begin(), m_normalizedTarget.size(),
       m_normalizedMatch.begin(), m_normalizedMatch.size());
 }

 inline bool SearchBuffer::isWordStartMatch(size_t start, size_t length) const {
   DCHECK(m_options & AtWordStarts);

   if (!start)
     return true;

   int size = m_buffer.size();
   int offset = start;
   UChar32 firstCharacter;
   U16_GET(m_buffer.data(), 0, offset, size, firstCharacter);

   if (m_options & TreatMedialCapitalAsWordStart) {
     UChar32 previousCharacter;
     U16_PREV(m_buffer.data(), 0, offset, previousCharacter);

     if (isSeparator(firstCharacter)) {
       // The start of a separator run is a word start (".org" in "webkit.org").
       if (!isSeparator(previousCharacter))
         return true;
     } else if (isASCIIUpper(firstCharacter)) {
       // The start of an uppercase run is a word start ("Kit" in "WebKit").
       if (!isASCIIUpper(previousCharacter))
         return true;
       // The last character of an uppercase run followed by a non-separator,
       // non-digit is a word start ("Request" in "XMLHTTPRequest").
       offset = start;
       U16_FWD_1(m_buffer.data(), offset, size);
       UChar32 nextCharacter = 0;
       if (offset < size)
         U16_GET(m_buffer.data(), 0, offset, size, nextCharacter);
       if (!isASCIIUpper(nextCharacter) && !isASCIIDigit(nextCharacter) &&
           !isSeparator(nextCharacter))
         return true;
     } else if (isASCIIDigit(firstCharacter)) {
       // The start of a digit run is a word start ("2" in "WebKit2").
       if (!isASCIIDigit(previousCharacter))
         return true;
     } else if (isSeparator(previousCharacter) ||
                isASCIIDigit(previousCharacter)) {
       // The start of a non-separator, non-uppercase, non-digit run is a word
       // start, except after an uppercase. ("org" in "webkit.org", but not "ore"
       // in "WebCore").
       return true;
     }
   }

   // Chinese and Japanese lack word boundary marks, and there is no clear
   // agreement on what constitutes a word, so treat the position before any CJK
   // character as a word start.
   if (Character::isCJKIdeographOrSymbol(firstCharacter))
     return true;

   size_t wordBreakSearchStart = start + length;
   while (wordBreakSearchStart > start)
     wordBreakSearchStart =
         findNextWordFromIndex(m_buffer.data(), m_buffer.size(),
                               wordBreakSearchStart, false /* backwards */);
   if (wordBreakSearchStart != start)
     return false;
   if (m_options & WholeWord)
     return static_cast<int>(start + length) ==
            findWordEndBoundary(m_buffer.data(), m_buffer.size(),
                                wordBreakSearchStart);
   return true;
 }

 inline size_t SearchBuffer::search(size_t& start) {
   size_t size = m_buffer.size();
   if (m_atBreak) {
     if (!size)
       return 0;
   } else {
     if (size != m_buffer.capacity())
       return 0;
   }

   UStringSearch* searcher = blink::searcher();

   UErrorCode status = U_ZERO_ERROR;
   usearch_setText(searcher, m_buffer.data(), size, &status);
   DCHECK_EQ(status, U_ZERO_ERROR);

   usearch_setOffset(searcher, m_prefixLength, &status);
   DCHECK_EQ(status, U_ZERO_ERROR);

   int matchStart = usearch_next(searcher, &status);
   DCHECK_EQ(status, U_ZERO_ERROR);

 nextMatch:
   if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) {
     DCHECK_EQ(matchStart, USEARCH_DONE);
     return 0;
   }

   // Matches that start in the overlap area are only tentative.
   // The same match may appear later, matching more characters,
   // possibly including a combining character that's not yet in the buffer.
   if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) {
     size_t overlap = m_overlap;
     if (m_options & AtWordStarts) {
       // Ensure that there is sufficient context before matchStart the next time
       // around for determining if it is at a word boundary.
       int wordBoundaryContextStart = matchStart;
       U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart);
       wordBoundaryContextStart = startOfLastWordBoundaryContext(
           m_buffer.data(), wordBoundaryContextStart);
       overlap = std::min(size - 1,
                          std::max(overlap, size - wordBoundaryContextStart));
     }
     memcpy(m_buffer.data(), m_buffer.data() + size - overlap,
            overlap * sizeof(UChar));
     m_prefixLength -= std::min(m_prefixLength, size - overlap);
     m_buffer.shrink(overlap);
     return 0;
   }

   size_t matchedLength = usearch_getMatchedLength(searcher);
   ASSERT_WITH_SECURITY_IMPLICATION(matchStart + matchedLength <= size);

   // If this match is "bad", move on to the next match.
   if (isBadMatch(m_buffer.data() + matchStart, matchedLength) ||
       ((m_options & AtWordStarts) &&
        !isWordStartMatch(matchStart, matchedLength))) {
     matchStart = usearch_next(searcher, &status);
     DCHECK_EQ(status, U_ZERO_ERROR);
     goto nextMatch;
   }

   size_t newSize = size - (matchStart + 1);
   memmove(m_buffer.data(), m_buffer.data() + matchStart + 1,
           newSize * sizeof(UChar));
   m_prefixLength -= std::min<size_t>(m_prefixLength, matchStart + 1);
   m_buffer.shrink(newSize);

   start = size - matchStart;
   return matchedLength;
 }

 // Check if there's any unpaird surrogate code point.
 // Non-character code points are not checked.
 static bool isValidUTF16(const String& s) {
   if (s.is8Bit())
     return true;
   const UChar* ustr = s.characters16();
   size_t length = s.length();
   size_t position = 0;
   while (position < length) {
     UChar32 character;
     U16_NEXT(ustr, position, length, character);
     if (U_IS_SURROGATE(character))
       return false;
   }
   return true;
 }

 template <typename Strategy>
 static size_t findPlainTextInternal(CharacterIteratorAlgorithm<Strategy>& it,
                                     const String& target,
                                     FindOptions options,
                                     size_t& matchStart) {
   matchStart = 0;
   size_t matchLength = 0;

   if (!isValidUTF16(target))
     return 0;

   SearchBuffer buffer(target, options);

   if (buffer.needsMoreContext()) {
     for (SimplifiedBackwardsTextIteratorAlgorithm<Strategy> backwardsIterator(
              PositionTemplate<Strategy>::firstPositionInNode(
                  it.ownerDocument()),
              PositionTemplate<Strategy>(it.currentContainer(),
                                         it.startOffset()));
          !backwardsIterator.atEnd(); backwardsIterator.advance()) {
       BackwardsTextBuffer characters;
       backwardsIterator.copyTextTo(&characters);
       buffer.prependContext(characters.data(), characters.size());
       if (!buffer.needsMoreContext())
         break;
     }
   }

   while (!it.atEnd()) {
     // TODO(xiaochengh): Should allow copying text to SearchBuffer directly
     ForwardsTextBuffer characters;
     it.copyTextTo(&characters);
     buffer.append(characters.data(), characters.size());
     it.advance(buffer.numberOfCharactersJustAppended());
   tryAgain:
     size_t matchStartOffset;
     if (size_t newMatchLength = buffer.search(matchStartOffset)) {
       // Note that we found a match, and where we found it.
       size_t lastCharacterInBufferOffset = it.characterOffset();
       DCHECK_GE(lastCharacterInBufferOffset, matchStartOffset);
       matchStart = lastCharacterInBufferOffset - matchStartOffset;
       matchLength = newMatchLength;
       // If searching forward, stop on the first match.
       // If searching backward, don't stop, so we end up with the last match.
       if (!(options & Backwards))
         break;
       goto tryAgain;
     }
     if (it.atBreak() && !buffer.atBreak()) {
       buffer.reachedBreak();
       goto tryAgain;
     }
   }

   return matchLength;
 }

 static const TextIteratorBehaviorFlags iteratorFlagsForFindPlainText =
     TextIteratorEntersTextControls | TextIteratorEntersOpenShadowRoots |
     TextIteratorDoesNotBreakAtReplacedElement |
     TextIteratorCollapseTrailingSpace;

 template <typename Strategy>
 static EphemeralRangeTemplate<Strategy> findPlainTextAlgorithm(
     const EphemeralRangeTemplate<Strategy>& inputRange,
     const String& target,
     FindOptions options) {
   // CharacterIterator requires layoutObjects to be up to date.
   if (!inputRange.startPosition().isConnected())
     return EphemeralRangeTemplate<Strategy>();
   DCHECK_EQ(inputRange.startPosition().document(),
             inputRange.endPosition().document());

   // FIXME: Reduce the code duplication with above (but how?).
   size_t matchStart;
   size_t matchLength;
   {
     TextIteratorBehaviorFlags behavior = iteratorFlagsForFindPlainText;
     if (options & FindAPICall)
       behavior |= TextIteratorForWindowFind;
     CharacterIteratorAlgorithm<Strategy> findIterator(inputRange, behavior);
     matchLength =
         findPlainTextInternal(findIterator, target, options, matchStart);
     if (!matchLength)
       return EphemeralRangeTemplate<Strategy>(options & Backwards
                                                   ? inputRange.startPosition()
                                                   : inputRange.endPosition());
   }

   CharacterIteratorAlgorithm<Strategy> computeRangeIterator(
       inputRange, iteratorFlagsForFindPlainText);
   return computeRangeIterator.calculateCharacterSubrange(matchStart,
                                                          matchLength);
 }

 EphemeralRange findPlainText(const EphemeralRange& inputRange,
                              const String& target,
                              FindOptions options) {
   return findPlainTextAlgorithm<EditingStrategy>(inputRange, target, options);
 }

 EphemeralRangeInFlatTree findPlainText(
     const EphemeralRangeInFlatTree& inputRange,
     const String& target,
     FindOptions options) {
   return findPlainTextAlgorithm<EditingInFlatTreeStrategy>(inputRange, target,
                                                            options);
 }

 }  // namespace blink
	/*
	* Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All
	* rights reserved.
	* Copyright (C) 2005 Alexey Proskuryakov.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	*
	* THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/

	#include "core/editing/iterators/SearchBuffer.h"

	#include "core/dom/Document.h"
	#include "core/editing/iterators/CharacterIterator.h"
	#include "core/editing/iterators/SimplifiedBackwardsTextIterator.h"
	#include "platform/text/Character.h"
	#include "platform/text/TextBoundaries.h"
	#include "platform/text/TextBreakIteratorInternalICU.h"
	#include "platform/text/UnicodeUtilities.h"
	#include "wtf/text/CharacterNames.h"
	#include <unicode/usearch.h>

	namespace blink {

	static const size_t minimumSearchBufferSize = 8192;

	#if DCHECK_IS_ON()
	static bool searcherInUse;
	#endif

	static UStringSearch* createSearcher() {
	// Provide a non-empty pattern and non-empty text so usearch_open will not
	// fail, but it doesn't matter exactly what it is, since we don't perform any
	// searches without setting both the pattern and the text.
	UErrorCode status = U_ZERO_ERROR;
	String searchCollatorName =
	currentSearchLocaleID() + String("@collation=search");
	UStringSearch* searcher =
	usearch_open(&newlineCharacter, 1, &newlineCharacter, 1,
	searchCollatorName.utf8().data(), 0, &status);
	DCHECK(status == U_ZERO_ERROR \|\| status == U_USING_FALLBACK_WARNING \|\|
	status == U_USING_DEFAULT_WARNING)
	<< status;
	return searcher;
	}

	static UStringSearch* searcher() {
	static UStringSearch* searcher = createSearcher();
	return searcher;
	}

	static inline void lockSearcher() {
	#if DCHECK_IS_ON()
	DCHECK(!searcherInUse);
	searcherInUse = true;
	#endif
	}

	static inline void unlockSearcher() {
	#if DCHECK_IS_ON()
	DCHECK(searcherInUse);
	searcherInUse = false;
	#endif
	}

	inline SearchBuffer::SearchBuffer(const String& target, FindOptions options)
	: m_options(options),
	m_prefixLength(0),
	m_numberOfCharactersJustAppended(0),
	m_atBreak(true),
	m_needsMoreContext(options & AtWordStarts),
	m_targetRequiresKanaWorkaround(containsKanaLetters(target)) {
	DCHECK(!target.isEmpty()) << target;
	target.appendTo(m_target);

	// FIXME: We'd like to tailor the searcher to fold quote marks for us instead
	// of doing it in a separate replacement pass here, but ICU doesn't offer a
	// way to add tailoring on top of the locale-specific tailoring as of this
	// writing.
	foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size());

	size_t targetLength = m_target.size();
	m_buffer.reserveInitialCapacity(
	std::max(targetLength * 8, minimumSearchBufferSize));
	m_overlap = m_buffer.capacity() / 4;

	if ((m_options & AtWordStarts) && targetLength) {
	UChar32 targetFirstCharacter;
	U16_GET(m_target.data(), 0, 0, targetLength, targetFirstCharacter);
	// Characters in the separator category never really occur at the beginning
	// of a word, so if the target begins with such a character, we just ignore
	// the AtWordStart option.
	if (isSeparator(targetFirstCharacter)) {
	m_options &= ~AtWordStarts;
	m_needsMoreContext = false;
	}
	}

	// Grab the single global searcher.
	// If we ever have a reason to do more than once search buffer at once, we'll
	// have to move to multiple searchers.
	lockSearcher();

	UStringSearch* searcher = blink::searcher();
	UCollator* collator = usearch_getCollator(searcher);

	UCollationStrength strength =
	m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY;
	if (ucol_getStrength(collator) != strength) {
	ucol_setStrength(collator, strength);
	usearch_reset(searcher);
	}

	UErrorCode status = U_ZERO_ERROR;
	usearch_setPattern(searcher, m_target.data(), targetLength, &status);
	DCHECK_EQ(status, U_ZERO_ERROR);

	// The kana workaround requires a normalized copy of the target string.
	if (m_targetRequiresKanaWorkaround)
	normalizeCharactersIntoNFCForm(m_target.data(), m_target.size(),
	m_normalizedTarget);
	}

	inline SearchBuffer::~SearchBuffer() {
	// Leave the static object pointing to valid strings (pattern=targer,
	// text=buffer). Otheriwse, usearch_reset() will results in 'use-after-free'
	// error.
	UErrorCode status = U_ZERO_ERROR;
	usearch_setPattern(blink::searcher(), &newlineCharacter, 1, &status);
	usearch_setText(blink::searcher(), &newlineCharacter, 1, &status);
	DCHECK_EQ(status, U_ZERO_ERROR);

	unlockSearcher();
	}

	template <typename CharType>
	inline void SearchBuffer::append(const CharType* characters, size_t length) {
	DCHECK(length);

	if (m_atBreak) {
	m_buffer.shrink(0);
	m_prefixLength = 0;
	m_atBreak = false;
	} else if (m_buffer.size() == m_buffer.capacity()) {
	memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap,
	m_overlap * sizeof(UChar));
	m_prefixLength -= std::min(m_prefixLength, m_buffer.size() - m_overlap);
	m_buffer.shrink(m_overlap);
	}

	size_t oldLength = m_buffer.size();
	size_t usableLength = std::min(m_buffer.capacity() - oldLength, length);
	DCHECK(usableLength);
	m_buffer.resize(oldLength + usableLength);
	UChar* destination = m_buffer.data() + oldLength;
	StringImpl::copyChars(destination, characters, usableLength);
	foldQuoteMarksAndSoftHyphens(destination, usableLength);
	m_numberOfCharactersJustAppended = usableLength;
	}

	inline bool SearchBuffer::needsMoreContext() const {
	return m_needsMoreContext;
	}

	inline void SearchBuffer::prependContext(const UChar* characters,
	size_t length) {
	DCHECK(m_needsMoreContext);
	DCHECK_EQ(m_prefixLength, m_buffer.size());

	if (!length)
	return;

	m_atBreak = false;

	size_t wordBoundaryContextStart = length;
	if (wordBoundaryContextStart) {
	U16_BACK_1(characters, 0, wordBoundaryContextStart);
	wordBoundaryContextStart =
	startOfLastWordBoundaryContext(characters, wordBoundaryContextStart);
	}

	size_t usableLength = std::min(m_buffer.capacity() - m_prefixLength,
	length - wordBoundaryContextStart);
	m_buffer.prepend(characters + length - usableLength, usableLength);
	m_prefixLength += usableLength;

	if (wordBoundaryContextStart \|\| m_prefixLength == m_buffer.capacity())
	m_needsMoreContext = false;
	}

	inline bool SearchBuffer::atBreak() const {
	return m_atBreak;
	}

	inline void SearchBuffer::reachedBreak() {
	m_atBreak = true;
	}

	inline bool SearchBuffer::isBadMatch(const UChar* match,
	size_t matchLength) const {
	// This function implements the kana workaround. If usearch treats
	// it as a match, but we do not want to, then it's a "bad match".
	if (!m_targetRequiresKanaWorkaround)
	return false;

	// Normalize into a match buffer. We reuse a single buffer rather than
	// creating a new one each time.
	normalizeCharactersIntoNFCForm(match, matchLength, m_normalizedMatch);

	return !checkOnlyKanaLettersInStrings(
	m_normalizedTarget.begin(), m_normalizedTarget.size(),
	m_normalizedMatch.begin(), m_normalizedMatch.size());
	}

	inline bool SearchBuffer::isWordStartMatch(size_t start, size_t length) const {
	DCHECK(m_options & AtWordStarts);

	if (!start)
	return true;

	int size = m_buffer.size();
	int offset = start;
	UChar32 firstCharacter;
	U16_GET(m_buffer.data(), 0, offset, size, firstCharacter);

	if (m_options & TreatMedialCapitalAsWordStart) {
	UChar32 previousCharacter;
	U16_PREV(m_buffer.data(), 0, offset, previousCharacter);

	if (isSeparator(firstCharacter)) {
	// The start of a separator run is a word start (".org" in "webkit.org").
	if (!isSeparator(previousCharacter))
	return true;
	} else if (isASCIIUpper(firstCharacter)) {
	// The start of an uppercase run is a word start ("Kit" in "WebKit").
	if (!isASCIIUpper(previousCharacter))
	return true;
	// The last character of an uppercase run followed by a non-separator,
	// non-digit is a word start ("Request" in "XMLHTTPRequest").
	offset = start;
	U16_FWD_1(m_buffer.data(), offset, size);
	UChar32 nextCharacter = 0;
	if (offset < size)
	U16_GET(m_buffer.data(), 0, offset, size, nextCharacter);
	if (!isASCIIUpper(nextCharacter) && !isASCIIDigit(nextCharacter) &&
	!isSeparator(nextCharacter))
	return true;
	} else if (isASCIIDigit(firstCharacter)) {
	// The start of a digit run is a word start ("2" in "WebKit2").
	if (!isASCIIDigit(previousCharacter))
	return true;
	} else if (isSeparator(previousCharacter) \|\|
	isASCIIDigit(previousCharacter)) {
	// The start of a non-separator, non-uppercase, non-digit run is a word
	// start, except after an uppercase. ("org" in "webkit.org", but not "ore"
	// in "WebCore").
	return true;
	}
	}

	// Chinese and Japanese lack word boundary marks, and there is no clear
	// agreement on what constitutes a word, so treat the position before any CJK
	// character as a word start.
	if (Character::isCJKIdeographOrSymbol(firstCharacter))
	return true;

	size_t wordBreakSearchStart = start + length;
	while (wordBreakSearchStart > start)
	wordBreakSearchStart =
	findNextWordFromIndex(m_buffer.data(), m_buffer.size(),
	wordBreakSearchStart, false /* backwards */);
	if (wordBreakSearchStart != start)
	return false;
	if (m_options & WholeWord)
	return static_cast<int>(start + length) ==
	findWordEndBoundary(m_buffer.data(), m_buffer.size(),
	wordBreakSearchStart);
	return true;
	}

	inline size_t SearchBuffer::search(size_t& start) {
	size_t size = m_buffer.size();
	if (m_atBreak) {
	if (!size)
	return 0;
	} else {
	if (size != m_buffer.capacity())
	return 0;
	}

	UStringSearch* searcher = blink::searcher();

	UErrorCode status = U_ZERO_ERROR;
	usearch_setText(searcher, m_buffer.data(), size, &status);
	DCHECK_EQ(status, U_ZERO_ERROR);

	usearch_setOffset(searcher, m_prefixLength, &status);
	DCHECK_EQ(status, U_ZERO_ERROR);

	int matchStart = usearch_next(searcher, &status);
	DCHECK_EQ(status, U_ZERO_ERROR);

	nextMatch:
	if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) {
	DCHECK_EQ(matchStart, USEARCH_DONE);
	return 0;
	}

	// Matches that start in the overlap area are only tentative.
	// The same match may appear later, matching more characters,
	// possibly including a combining character that's not yet in the buffer.
	if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) {
	size_t overlap = m_overlap;
	if (m_options & AtWordStarts) {
	// Ensure that there is sufficient context before matchStart the next time
	// around for determining if it is at a word boundary.
	int wordBoundaryContextStart = matchStart;
	U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart);
	wordBoundaryContextStart = startOfLastWordBoundaryContext(
	m_buffer.data(), wordBoundaryContextStart);
	overlap = std::min(size - 1,
	std::max(overlap, size - wordBoundaryContextStart));
	}
	memcpy(m_buffer.data(), m_buffer.data() + size - overlap,
	overlap * sizeof(UChar));
	m_prefixLength -= std::min(m_prefixLength, size - overlap);
	m_buffer.shrink(overlap);
	return 0;
	}

	size_t matchedLength = usearch_getMatchedLength(searcher);
	ASSERT_WITH_SECURITY_IMPLICATION(matchStart + matchedLength <= size);

	// If this match is "bad", move on to the next match.
	if (isBadMatch(m_buffer.data() + matchStart, matchedLength) \|\|
	((m_options & AtWordStarts) &&
	!isWordStartMatch(matchStart, matchedLength))) {
	matchStart = usearch_next(searcher, &status);
	DCHECK_EQ(status, U_ZERO_ERROR);
	goto nextMatch;
	}

	size_t newSize = size - (matchStart + 1);
	memmove(m_buffer.data(), m_buffer.data() + matchStart + 1,
	newSize * sizeof(UChar));
	m_prefixLength -= std::min<size_t>(m_prefixLength, matchStart + 1);
	m_buffer.shrink(newSize);

	start = size - matchStart;
	return matchedLength;
	}

	// Check if there's any unpaird surrogate code point.
	// Non-character code points are not checked.
	static bool isValidUTF16(const String& s) {
	if (s.is8Bit())
	return true;
	const UChar* ustr = s.characters16();
	size_t length = s.length();
	size_t position = 0;
	while (position < length) {
	UChar32 character;
	U16_NEXT(ustr, position, length, character);
	if (U_IS_SURROGATE(character))
	return false;
	}
	return true;
	}

	template <typename Strategy>
	static size_t findPlainTextInternal(CharacterIteratorAlgorithm<Strategy>& it,
	const String& target,
	FindOptions options,
	size_t& matchStart) {
	matchStart = 0;
	size_t matchLength = 0;

	if (!isValidUTF16(target))
	return 0;

	SearchBuffer buffer(target, options);

	if (buffer.needsMoreContext()) {
	for (SimplifiedBackwardsTextIteratorAlgorithm<Strategy> backwardsIterator(
	PositionTemplate<Strategy>::firstPositionInNode(
	it.ownerDocument()),
	PositionTemplate<Strategy>(it.currentContainer(),
	it.startOffset()));
	!backwardsIterator.atEnd(); backwardsIterator.advance()) {
	BackwardsTextBuffer characters;
	backwardsIterator.copyTextTo(&characters);
	buffer.prependContext(characters.data(), characters.size());
	if (!buffer.needsMoreContext())
	break;
	}
	}

	while (!it.atEnd()) {
	// TODO(xiaochengh): Should allow copying text to SearchBuffer directly
	ForwardsTextBuffer characters;
	it.copyTextTo(&characters);
	buffer.append(characters.data(), characters.size());
	it.advance(buffer.numberOfCharactersJustAppended());
	tryAgain:
	size_t matchStartOffset;
	if (size_t newMatchLength = buffer.search(matchStartOffset)) {
	// Note that we found a match, and where we found it.
	size_t lastCharacterInBufferOffset = it.characterOffset();
	DCHECK_GE(lastCharacterInBufferOffset, matchStartOffset);
	matchStart = lastCharacterInBufferOffset - matchStartOffset;
	matchLength = newMatchLength;
	// If searching forward, stop on the first match.
	// If searching backward, don't stop, so we end up with the last match.
	if (!(options & Backwards))
	break;
	goto tryAgain;
	}
	if (it.atBreak() && !buffer.atBreak()) {
	buffer.reachedBreak();
	goto tryAgain;
	}
	}

	return matchLength;
	}

	static const TextIteratorBehaviorFlags iteratorFlagsForFindPlainText =
	TextIteratorEntersTextControls \| TextIteratorEntersOpenShadowRoots \|
	TextIteratorDoesNotBreakAtReplacedElement \|
	TextIteratorCollapseTrailingSpace;

	template <typename Strategy>
	static EphemeralRangeTemplate<Strategy> findPlainTextAlgorithm(
	const EphemeralRangeTemplate<Strategy>& inputRange,
	const String& target,
	FindOptions options) {
	// CharacterIterator requires layoutObjects to be up to date.
	if (!inputRange.startPosition().isConnected())
	return EphemeralRangeTemplate<Strategy>();
	DCHECK_EQ(inputRange.startPosition().document(),
	inputRange.endPosition().document());

	// FIXME: Reduce the code duplication with above (but how?).
	size_t matchStart;
	size_t matchLength;
	{
	TextIteratorBehaviorFlags behavior = iteratorFlagsForFindPlainText;
	if (options & FindAPICall)
	behavior \|= TextIteratorForWindowFind;
	CharacterIteratorAlgorithm<Strategy> findIterator(inputRange, behavior);
	matchLength =
	findPlainTextInternal(findIterator, target, options, matchStart);
	if (!matchLength)
	return EphemeralRangeTemplate<Strategy>(options & Backwards
	? inputRange.startPosition()
	: inputRange.endPosition());
	}

	CharacterIteratorAlgorithm<Strategy> computeRangeIterator(
	inputRange, iteratorFlagsForFindPlainText);
	return computeRangeIterator.calculateCharacterSubrange(matchStart,
	matchLength);
	}

	EphemeralRange findPlainText(const EphemeralRange& inputRange,
	const String& target,
	FindOptions options) {
	return findPlainTextAlgorithm<EditingStrategy>(inputRange, target, options);
	}

	EphemeralRangeInFlatTree findPlainText(
	const EphemeralRangeInFlatTree& inputRange,
	const String& target,
	FindOptions options) {
	return findPlainTextAlgorithm<EditingInFlatTreeStrategy>(inputRange, target,
	options);
	}

	} // namespace blink