third_party/WebKit/Source/platform/text/TextBreakIteratorICU.cpp - chromium/src - Git at Google

 /*
  * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
  * Copyright (C) 2007, 2011, 2012 Apple Inc. All rights reserved.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Library General Public License for more details.
  *
  * You should have received a copy of the GNU Library General Public License
  * along with this library; see the file COPYING.LIB.  If not, write to
  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  * Boston, MA 02110-1301, USA.
  *
  */

 #include "platform/text/TextBreakIterator.h"

 #include "platform/text/TextBreakIteratorInternalICU.h"
 #include "wtf/Assertions.h"
 #include "wtf/HashMap.h"
 #include "wtf/PtrUtil.h"
 #include "wtf/ThreadSpecific.h"
 #include "wtf/ThreadingPrimitives.h"
 #include "wtf/text/WTFString.h"
 #include <memory>
 #include <unicode/rbbi.h>
 #include <unicode/ubrk.h>

 using namespace WTF;

 namespace blink {

 class LineBreakIteratorPool final {
   USING_FAST_MALLOC(LineBreakIteratorPool);
   WTF_MAKE_NONCOPYABLE(LineBreakIteratorPool);

  public:
   static LineBreakIteratorPool& sharedPool() {
     static WTF::ThreadSpecific<LineBreakIteratorPool>* pool =
         new WTF::ThreadSpecific<LineBreakIteratorPool>;
     return **pool;
   }

   static std::unique_ptr<LineBreakIteratorPool> create() {
     return wrapUnique(new LineBreakIteratorPool);
   }

   icu::BreakIterator* take(const AtomicString& locale) {
     icu::BreakIterator* iterator = 0;
     for (size_t i = 0; i < m_pool.size(); ++i) {
       if (m_pool[i].first == locale) {
         iterator = m_pool[i].second;
         m_pool.remove(i);
         break;
       }
     }

     if (!iterator) {
       UErrorCode openStatus = U_ZERO_ERROR;
       bool localeIsEmpty = locale.isEmpty();
       iterator = icu::BreakIterator::createLineInstance(
           localeIsEmpty ? icu::Locale(currentTextBreakLocaleID())
                         : icu::Locale(locale.utf8().data()),
           openStatus);
       // locale comes from a web page and it can be invalid, leading ICU
       // to fail, in which case we fall back to the default locale.
       if (!localeIsEmpty && U_FAILURE(openStatus)) {
         openStatus = U_ZERO_ERROR;
         iterator = icu::BreakIterator::createLineInstance(
             icu::Locale(currentTextBreakLocaleID()), openStatus);
       }

       if (U_FAILURE(openStatus)) {
         DLOG(ERROR) << "icu::BreakIterator construction failed with status "
                     << openStatus;
         return 0;
       }
     }

     ASSERT(!m_vendedIterators.contains(iterator));
     m_vendedIterators.set(iterator, locale);
     return iterator;
   }

   void put(icu::BreakIterator* iterator) {
     DCHECK(m_vendedIterators.contains(iterator));

     if (m_pool.size() == capacity) {
       delete (m_pool[0].second);
       m_pool.remove(0);
     }

     m_pool.append(Entry(m_vendedIterators.take(iterator), iterator));
   }

  private:
   LineBreakIteratorPool() {}

   static const size_t capacity = 4;

   typedef std::pair<AtomicString, icu::BreakIterator*> Entry;
   typedef Vector<Entry, capacity> Pool;
   Pool m_pool;
   HashMap<icu::BreakIterator*, AtomicString> m_vendedIterators;

   friend WTF::ThreadSpecific<LineBreakIteratorPool>::
   operator LineBreakIteratorPool*();
 };

 enum TextContext { NoContext, PriorContext, PrimaryContext };

 const int textBufferCapacity = 16;

 typedef struct {
   DISALLOW_NEW();
   UText text;
   UChar buffer[textBufferCapacity];
 } UTextWithBuffer;

 static inline int64_t textPinIndex(int64_t& index, int64_t limit) {
   if (index < 0)
     index = 0;
   else if (index > limit)
     index = limit;
   return index;
 }

 static inline int64_t textNativeLength(UText* text) {
   return text->a + text->b;
 }

 // Relocate pointer from source into destination as required.
 static void textFixPointer(const UText* source,
                            UText* destination,
                            const void*& pointer) {
   if (pointer >= source->pExtra &&
       pointer < static_cast<char*>(source->pExtra) + source->extraSize) {
     // Pointer references source extra buffer.
     pointer = static_cast<char*>(destination->pExtra) +
               (static_cast<const char*>(pointer) -
                static_cast<const char*>(source->pExtra));
   } else if (pointer >= source &&
              pointer <
                  reinterpret_cast<const char*>(source) + source->sizeOfStruct) {
     // Pointer references source text structure, but not source extra buffer.
     pointer = reinterpret_cast<char*>(destination) +
               (static_cast<const char*>(pointer) -
                reinterpret_cast<const char*>(source));
   }
 }

 static UText* textClone(UText* destination,
                         const UText* source,
                         UBool deep,
                         UErrorCode* status) {
   ASSERT_UNUSED(deep, !deep);
   if (U_FAILURE(*status))
     return 0;
   int32_t extraSize = source->extraSize;
   destination = utext_setup(destination, extraSize, status);
   if (U_FAILURE(*status))
     return destination;
   void* extraNew = destination->pExtra;
   int32_t flags = destination->flags;
   int sizeToCopy = std::min(source->sizeOfStruct, destination->sizeOfStruct);
   memcpy(destination, source, sizeToCopy);
   destination->pExtra = extraNew;
   destination->flags = flags;
   memcpy(destination->pExtra, source->pExtra, extraSize);
   textFixPointer(source, destination, destination->context);
   textFixPointer(source, destination, destination->p);
   textFixPointer(source, destination, destination->q);
   ASSERT(!destination->r);
   const void* chunkContents =
       static_cast<const void*>(destination->chunkContents);
   textFixPointer(source, destination, chunkContents);
   destination->chunkContents = static_cast<const UChar*>(chunkContents);
   return destination;
 }

 static int32_t textExtract(UText*,
                            int64_t,
                            int64_t,
                            UChar*,
                            int32_t,
                            UErrorCode* errorCode) {
   // In the present context, this text provider is used only with ICU functions
   // that do not perform an extract operation.
   ASSERT_NOT_REACHED();
   *errorCode = U_UNSUPPORTED_ERROR;
   return 0;
 }

 static void textClose(UText* text) {
   text->context = 0;
 }

 static inline TextContext textGetContext(const UText* text,
                                          int64_t nativeIndex,
                                          UBool forward) {
   if (!text->b || nativeIndex > text->b)
     return PrimaryContext;
   if (nativeIndex == text->b)
     return forward ? PrimaryContext : PriorContext;
   return PriorContext;
 }

 static inline TextContext textLatin1GetCurrentContext(const UText* text) {
   if (!text->chunkContents)
     return NoContext;
   return text->chunkContents == text->pExtra ? PrimaryContext : PriorContext;
 }

 static void textLatin1MoveInPrimaryContext(UText* text,
                                            int64_t nativeIndex,
                                            int64_t nativeLength,
                                            UBool forward) {
   ASSERT(text->chunkContents == text->pExtra);
   if (forward) {
     ASSERT(nativeIndex >= text->b && nativeIndex < nativeLength);
     text->chunkNativeStart = nativeIndex;
     text->chunkNativeLimit = nativeIndex + text->extraSize / sizeof(UChar);
     if (text->chunkNativeLimit > nativeLength)
       text->chunkNativeLimit = nativeLength;
   } else {
     ASSERT(nativeIndex > text->b && nativeIndex <= nativeLength);
     text->chunkNativeLimit = nativeIndex;
     text->chunkNativeStart = nativeIndex - text->extraSize / sizeof(UChar);
     if (text->chunkNativeStart < text->b)
       text->chunkNativeStart = text->b;
   }
   int64_t length = text->chunkNativeLimit - text->chunkNativeStart;
   // Ensure chunk length is well defined if computed length exceeds int32_t
   // range.
   ASSERT(length <= std::numeric_limits<int32_t>::max());
   text->chunkLength = length <= std::numeric_limits<int32_t>::max()
                           ? static_cast<int32_t>(length)
                           : 0;
   text->nativeIndexingLimit = text->chunkLength;
   text->chunkOffset = forward ? 0 : text->chunkLength;
   StringImpl::copyChars(
       const_cast<UChar*>(text->chunkContents),
       static_cast<const LChar*>(text->p) + (text->chunkNativeStart - text->b),
       static_cast<unsigned>(text->chunkLength));
 }

 static void textLatin1SwitchToPrimaryContext(UText* text,
                                              int64_t nativeIndex,
                                              int64_t nativeLength,
                                              UBool forward) {
   ASSERT(!text->chunkContents || text->chunkContents == text->q);
   text->chunkContents = static_cast<const UChar*>(text->pExtra);
   textLatin1MoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
 }

 static void textLatin1MoveInPriorContext(UText* text,
                                          int64_t nativeIndex,
                                          int64_t nativeLength,
                                          UBool forward) {
   ASSERT(text->chunkContents == text->q);
   ASSERT(forward ? nativeIndex < text->b : nativeIndex <= text->b);
   ASSERT_UNUSED(nativeLength, forward ? nativeIndex < nativeLength
                                       : nativeIndex <= nativeLength);
   ASSERT_UNUSED(forward, forward ? nativeIndex < nativeLength
                                  : nativeIndex <= nativeLength);
   text->chunkNativeStart = 0;
   text->chunkNativeLimit = text->b;
   text->chunkLength = text->b;
   text->nativeIndexingLimit = text->chunkLength;
   int64_t offset = nativeIndex - text->chunkNativeStart;
   // Ensure chunk offset is well defined if computed offset exceeds int32_t
   // range or chunk length.
   ASSERT(offset <= std::numeric_limits<int32_t>::max());
   text->chunkOffset = std::min(offset <= std::numeric_limits<int32_t>::max()
                                    ? static_cast<int32_t>(offset)
                                    : 0,
                                text->chunkLength);
 }

 static void textLatin1SwitchToPriorContext(UText* text,
                                            int64_t nativeIndex,
                                            int64_t nativeLength,
                                            UBool forward) {
   ASSERT(!text->chunkContents || text->chunkContents == text->pExtra);
   text->chunkContents = static_cast<const UChar*>(text->q);
   textLatin1MoveInPriorContext(text, nativeIndex, nativeLength, forward);
 }

 static inline bool textInChunkOrOutOfRange(UText* text,
                                            int64_t nativeIndex,
                                            int64_t nativeLength,
                                            UBool forward,
                                            UBool& isAccessible) {
   if (forward) {
     if (nativeIndex >= text->chunkNativeStart &&
         nativeIndex < text->chunkNativeLimit) {
       int64_t offset = nativeIndex - text->chunkNativeStart;
       // Ensure chunk offset is well formed if computed offset exceeds int32_t
       // range.
       ASSERT(offset <= std::numeric_limits<int32_t>::max());
       text->chunkOffset = offset <= std::numeric_limits<int32_t>::max()
                               ? static_cast<int32_t>(offset)
                               : 0;
       isAccessible = TRUE;
       return true;
     }
     if (nativeIndex >= nativeLength && text->chunkNativeLimit == nativeLength) {
       text->chunkOffset = text->chunkLength;
       isAccessible = FALSE;
       return true;
     }
   } else {
     if (nativeIndex > text->chunkNativeStart &&
         nativeIndex <= text->chunkNativeLimit) {
       int64_t offset = nativeIndex - text->chunkNativeStart;
       // Ensure chunk offset is well formed if computed offset exceeds int32_t
       // range.
       ASSERT(offset <= std::numeric_limits<int32_t>::max());
       text->chunkOffset = offset <= std::numeric_limits<int32_t>::max()
                               ? static_cast<int32_t>(offset)
                               : 0;
       isAccessible = TRUE;
       return true;
     }
     if (nativeIndex <= 0 && !text->chunkNativeStart) {
       text->chunkOffset = 0;
       isAccessible = FALSE;
       return true;
     }
   }
   return false;
 }

 static UBool textLatin1Access(UText* text, int64_t nativeIndex, UBool forward) {
   if (!text->context)
     return FALSE;
   int64_t nativeLength = textNativeLength(text);
   UBool isAccessible;
   if (textInChunkOrOutOfRange(text, nativeIndex, nativeLength, forward,
                               isAccessible))
     return isAccessible;
   nativeIndex = textPinIndex(nativeIndex, nativeLength - 1);
   TextContext currentContext = textLatin1GetCurrentContext(text);
   TextContext newContext = textGetContext(text, nativeIndex, forward);
   ASSERT(newContext != NoContext);
   if (newContext == currentContext) {
     if (currentContext == PrimaryContext) {
       textLatin1MoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
     } else {
       textLatin1MoveInPriorContext(text, nativeIndex, nativeLength, forward);
     }
   } else if (newContext == PrimaryContext) {
     textLatin1SwitchToPrimaryContext(text, nativeIndex, nativeLength, forward);
   } else {
     ASSERT(newContext == PriorContext);
     textLatin1SwitchToPriorContext(text, nativeIndex, nativeLength, forward);
   }
   return TRUE;
 }

 static const struct UTextFuncs textLatin1Funcs = {
     sizeof(UTextFuncs), 0,           0, 0, textClone, textNativeLength,
     textLatin1Access,   textExtract, 0, 0, 0,         0,
     textClose,          0,           0, 0,
 };

 static void textInit(UText* text,
                      const UTextFuncs* funcs,
                      const void* string,
                      unsigned length,
                      const UChar* priorContext,
                      int priorContextLength) {
   text->pFuncs = funcs;
   text->providerProperties = 1 << UTEXT_PROVIDER_STABLE_CHUNKS;
   text->context = string;
   text->p = string;
   text->a = length;
   text->q = priorContext;
   text->b = priorContextLength;
 }

 static UText* textOpenLatin1(UTextWithBuffer* utWithBuffer,
                              const LChar* string,
                              unsigned length,
                              const UChar* priorContext,
                              int priorContextLength,
                              UErrorCode* status) {
   if (U_FAILURE(*status))
     return 0;

   if (!string ||
       length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) {
     *status = U_ILLEGAL_ARGUMENT_ERROR;
     return 0;
   }
   UText* text =
       utext_setup(&utWithBuffer->text, sizeof(utWithBuffer->buffer), status);
   if (U_FAILURE(*status)) {
     ASSERT(!text);
     return 0;
   }
   textInit(text, &textLatin1Funcs, string, length, priorContext,
            priorContextLength);
   return text;
 }

 static inline TextContext textUTF16GetCurrentContext(const UText* text) {
   if (!text->chunkContents)
     return NoContext;
   return text->chunkContents == text->p ? PrimaryContext : PriorContext;
 }

 static void textUTF16MoveInPrimaryContext(UText* text,
                                           int64_t nativeIndex,
                                           int64_t nativeLength,
                                           UBool forward) {
   ASSERT(text->chunkContents == text->p);
   ASSERT_UNUSED(forward,
                 forward ? nativeIndex >= text->b : nativeIndex > text->b);
   ASSERT_UNUSED(forward, forward ? nativeIndex < nativeLength
                                  : nativeIndex <= nativeLength);
   text->chunkNativeStart = text->b;
   text->chunkNativeLimit = nativeLength;
   int64_t length = text->chunkNativeLimit - text->chunkNativeStart;
   // Ensure chunk length is well defined if computed length exceeds int32_t
   // range.
   ASSERT(length <= std::numeric_limits<int32_t>::max());
   text->chunkLength = length <= std::numeric_limits<int32_t>::max()
                           ? static_cast<int32_t>(length)
                           : 0;
   text->nativeIndexingLimit = text->chunkLength;
   int64_t offset = nativeIndex - text->chunkNativeStart;
   // Ensure chunk offset is well defined if computed offset exceeds int32_t
   // range or chunk length.
   ASSERT(offset <= std::numeric_limits<int32_t>::max());
   text->chunkOffset = std::min(offset <= std::numeric_limits<int32_t>::max()
                                    ? static_cast<int32_t>(offset)
                                    : 0,
                                text->chunkLength);
 }

 static void textUTF16SwitchToPrimaryContext(UText* text,
                                             int64_t nativeIndex,
                                             int64_t nativeLength,
                                             UBool forward) {
   ASSERT(!text->chunkContents || text->chunkContents == text->q);
   text->chunkContents = static_cast<const UChar*>(text->p);
   textUTF16MoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
 }

 static void textUTF16MoveInPriorContext(UText* text,
                                         int64_t nativeIndex,
                                         int64_t nativeLength,
                                         UBool forward) {
   ASSERT(text->chunkContents == text->q);
   ASSERT(forward ? nativeIndex < text->b : nativeIndex <= text->b);
   ASSERT_UNUSED(nativeLength, forward ? nativeIndex < nativeLength
                                       : nativeIndex <= nativeLength);
   ASSERT_UNUSED(forward, forward ? nativeIndex < nativeLength
                                  : nativeIndex <= nativeLength);
   text->chunkNativeStart = 0;
   text->chunkNativeLimit = text->b;
   text->chunkLength = text->b;
   text->nativeIndexingLimit = text->chunkLength;
   int64_t offset = nativeIndex - text->chunkNativeStart;
   // Ensure chunk offset is well defined if computed offset exceeds int32_t
   // range or chunk length.
   ASSERT(offset <= std::numeric_limits<int32_t>::max());
   text->chunkOffset = std::min(offset <= std::numeric_limits<int32_t>::max()
                                    ? static_cast<int32_t>(offset)
                                    : 0,
                                text->chunkLength);
 }

 static void textUTF16SwitchToPriorContext(UText* text,
                                           int64_t nativeIndex,
                                           int64_t nativeLength,
                                           UBool forward) {
   ASSERT(!text->chunkContents || text->chunkContents == text->p);
   text->chunkContents = static_cast<const UChar*>(text->q);
   textUTF16MoveInPriorContext(text, nativeIndex, nativeLength, forward);
 }

 static UBool textUTF16Access(UText* text, int64_t nativeIndex, UBool forward) {
   if (!text->context)
     return FALSE;
   int64_t nativeLength = textNativeLength(text);
   UBool isAccessible;
   if (textInChunkOrOutOfRange(text, nativeIndex, nativeLength, forward,
                               isAccessible))
     return isAccessible;
   nativeIndex = textPinIndex(nativeIndex, nativeLength - 1);
   TextContext currentContext = textUTF16GetCurrentContext(text);
   TextContext newContext = textGetContext(text, nativeIndex, forward);
   ASSERT(newContext != NoContext);
   if (newContext == currentContext) {
     if (currentContext == PrimaryContext) {
       textUTF16MoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
     } else {
       textUTF16MoveInPriorContext(text, nativeIndex, nativeLength, forward);
     }
   } else if (newContext == PrimaryContext) {
     textUTF16SwitchToPrimaryContext(text, nativeIndex, nativeLength, forward);
   } else {
     ASSERT(newContext == PriorContext);
     textUTF16SwitchToPriorContext(text, nativeIndex, nativeLength, forward);
   }
   return TRUE;
 }

 static const struct UTextFuncs textUTF16Funcs = {
     sizeof(UTextFuncs), 0,           0, 0, textClone, textNativeLength,
     textUTF16Access,    textExtract, 0, 0, 0,         0,
     textClose,          0,           0, 0,
 };

 static UText* textOpenUTF16(UText* text,
                             const UChar* string,
                             unsigned length,
                             const UChar* priorContext,
                             int priorContextLength,
                             UErrorCode* status) {
   if (U_FAILURE(*status))
     return 0;

   if (!string ||
       length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) {
     *status = U_ILLEGAL_ARGUMENT_ERROR;
     return 0;
   }

   text = utext_setup(text, 0, status);
   if (U_FAILURE(*status)) {
     ASSERT(!text);
     return 0;
   }
   textInit(text, &textUTF16Funcs, string, length, priorContext,
            priorContextLength);
   return text;
 }

 static UText emptyText = UTEXT_INITIALIZER;

 static TextBreakIterator* wordBreakIterator(const LChar* string, int length) {
   UErrorCode errorCode = U_ZERO_ERROR;
   static TextBreakIterator* breakIter = 0;
   if (!breakIter) {
     breakIter = icu::BreakIterator::createWordInstance(
         icu::Locale(currentTextBreakLocaleID()), errorCode);
     DCHECK(U_SUCCESS(errorCode))
         << "ICU could not open a break iterator: " << u_errorName(errorCode)
         << " (" << errorCode << ")";
     if (!breakIter)
       return 0;
   }

   UTextWithBuffer textLocal;
   textLocal.text = emptyText;
   textLocal.text.extraSize = sizeof(textLocal.buffer);
   textLocal.text.pExtra = textLocal.buffer;

   UErrorCode openStatus = U_ZERO_ERROR;
   UText* text = textOpenLatin1(&textLocal, string, length, 0, 0, &openStatus);
   if (U_FAILURE(openStatus)) {
     DLOG(ERROR) << "textOpenLatin1 failed with status " << openStatus;
     return 0;
   }

   UErrorCode setTextStatus = U_ZERO_ERROR;
   breakIter->setText(text, setTextStatus);
   if (U_FAILURE(setTextStatus))
     DLOG(ERROR) << "BreakIterator::seText failed with status " << setTextStatus;

   utext_close(text);

   return breakIter;
 }

 static void setText16(TextBreakIterator* iter,
                       const UChar* string,
                       int length) {
   UErrorCode errorCode = U_ZERO_ERROR;
   UText uText = UTEXT_INITIALIZER;
   utext_openUChars(&uText, string, length, &errorCode);
   if (U_FAILURE(errorCode))
     return;
   iter->setText(&uText, errorCode);
 }

 TextBreakIterator* wordBreakIterator(const UChar* string, int length) {
   UErrorCode errorCode = U_ZERO_ERROR;
   static TextBreakIterator* breakIter = 0;
   if (!breakIter) {
     breakIter = icu::BreakIterator::createWordInstance(
         icu::Locale(currentTextBreakLocaleID()), errorCode);
     DCHECK(U_SUCCESS(errorCode))
         << "ICU could not open a break iterator: " << u_errorName(errorCode)
         << " (" << errorCode << ")";
     if (!breakIter)
       return 0;
   }
   setText16(breakIter, string, length);
   return breakIter;
 }

 TextBreakIterator* wordBreakIterator(const String& string,
                                      int start,
                                      int length) {
   if (string.isEmpty())
     return 0;
   if (string.is8Bit())
     return wordBreakIterator(string.characters8() + start, length);
   return wordBreakIterator(string.characters16() + start, length);
 }

 TextBreakIterator* acquireLineBreakIterator(const LChar* string,
                                             int length,
                                             const AtomicString& locale,
                                             const UChar* priorContext,
                                             unsigned priorContextLength) {
   TextBreakIterator* iterator =
       LineBreakIteratorPool::sharedPool().take(locale);
   if (!iterator)
     return 0;

   UTextWithBuffer textLocal;
   textLocal.text = emptyText;
   textLocal.text.extraSize = sizeof(textLocal.buffer);
   textLocal.text.pExtra = textLocal.buffer;

   UErrorCode openStatus = U_ZERO_ERROR;
   UText* text = textOpenLatin1(&textLocal, string, length, priorContext,
                                priorContextLength, &openStatus);
   if (U_FAILURE(openStatus)) {
     DLOG(ERROR) << "textOpenLatin1 failed with status " << openStatus;
     return 0;
   }

   UErrorCode setTextStatus = U_ZERO_ERROR;
   iterator->setText(text, setTextStatus);
   if (U_FAILURE(setTextStatus)) {
     DLOG(ERROR) << "ubrk_setUText failed with status " << setTextStatus;
     return 0;
   }

   utext_close(text);

   return iterator;
 }

 TextBreakIterator* acquireLineBreakIterator(const UChar* string,
                                             int length,
                                             const AtomicString& locale,
                                             const UChar* priorContext,
                                             unsigned priorContextLength) {
   TextBreakIterator* iterator =
       LineBreakIteratorPool::sharedPool().take(locale);
   if (!iterator)
     return 0;

   UText textLocal = UTEXT_INITIALIZER;

   UErrorCode openStatus = U_ZERO_ERROR;
   UText* text = textOpenUTF16(&textLocal, string, length, priorContext,
                               priorContextLength, &openStatus);
   if (U_FAILURE(openStatus)) {
     DLOG(ERROR) << "textOpenUTF16 failed with status " << openStatus;
     return 0;
   }

   UErrorCode setTextStatus = U_ZERO_ERROR;
   iterator->setText(text, setTextStatus);
   if (U_FAILURE(setTextStatus)) {
     DLOG(ERROR) << "ubrk_setUText failed with status " << setTextStatus;
     return 0;
   }

   utext_close(text);

   return iterator;
 }

 void releaseLineBreakIterator(TextBreakIterator* iterator) {
   DCHECK(iterator);
   LineBreakIteratorPool::sharedPool().put(iterator);
 }

 static TextBreakIterator* nonSharedCharacterBreakIterator;

 static inline bool compareAndSwapNonSharedCharacterBreakIterator(
     TextBreakIterator* expected,
     TextBreakIterator* newValue) {
   DEFINE_STATIC_LOCAL(Mutex, nonSharedCharacterBreakIteratorMutex, ());
   MutexLocker locker(nonSharedCharacterBreakIteratorMutex);
   if (nonSharedCharacterBreakIterator != expected)
     return false;
   nonSharedCharacterBreakIterator = newValue;
   return true;
 }

 NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(
     const String& string)
     : m_is8Bit(true), m_charaters8(0), m_offset(0), m_length(0), m_iterator(0) {
   if (string.isEmpty())
     return;

   m_is8Bit = string.is8Bit();

   if (m_is8Bit) {
     m_charaters8 = string.characters8();
     m_offset = 0;
     m_length = string.length();
     return;
   }

   createIteratorForBuffer(string.characters16(), string.length());
 }

 NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(
     const UChar* buffer,
     unsigned length)
     : m_is8Bit(false),
       m_charaters8(0),
       m_offset(0),
       m_length(0),
       m_iterator(0) {
   createIteratorForBuffer(buffer, length);
 }

 void NonSharedCharacterBreakIterator::createIteratorForBuffer(
     const UChar* buffer,
     unsigned length) {
   m_iterator = nonSharedCharacterBreakIterator;
   bool createdIterator =
       m_iterator &&
       compareAndSwapNonSharedCharacterBreakIterator(m_iterator, 0);
   if (!createdIterator) {
     UErrorCode errorCode = U_ZERO_ERROR;
     m_iterator = icu::BreakIterator::createCharacterInstance(
         icu::Locale(currentTextBreakLocaleID()), errorCode);
     DCHECK(U_SUCCESS(errorCode))
         << "ICU could not open a break iterator: " << u_errorName(errorCode)
         << " (" << errorCode << ")";
   }

   setText16(m_iterator, buffer, length);
 }

 NonSharedCharacterBreakIterator::~NonSharedCharacterBreakIterator() {
   if (m_is8Bit)
     return;
   if (!compareAndSwapNonSharedCharacterBreakIterator(0, m_iterator))
     delete m_iterator;
 }

 int NonSharedCharacterBreakIterator::next() {
   if (!m_is8Bit)
     return m_iterator->next();

   if (m_offset >= m_length)
     return TextBreakDone;

   m_offset += clusterLengthStartingAt(m_offset);
   return m_offset;
 }

 int NonSharedCharacterBreakIterator::current() {
   if (!m_is8Bit)
     return m_iterator->current();
   return m_offset;
 }

 bool NonSharedCharacterBreakIterator::isBreak(int offset) const {
   if (!m_is8Bit)
     return m_iterator->isBoundary(offset);
   return !isLFAfterCR(offset);
 }

 int NonSharedCharacterBreakIterator::preceding(int offset) const {
   if (!m_is8Bit)
     return m_iterator->preceding(offset);
   if (offset <= 0)
     return TextBreakDone;
   if (isLFAfterCR(offset))
     return offset - 2;
   return offset - 1;
 }

 int NonSharedCharacterBreakIterator::following(int offset) const {
   if (!m_is8Bit)
     return m_iterator->following(offset);
   if (static_cast<unsigned>(offset) >= m_length)
     return TextBreakDone;
   return offset + clusterLengthStartingAt(offset);
 }

 TextBreakIterator* sentenceBreakIterator(const UChar* string, int length) {
   UErrorCode openStatus = U_ZERO_ERROR;
   static TextBreakIterator* iterator = 0;
   if (!iterator) {
     iterator = icu::BreakIterator::createSentenceInstance(
         icu::Locale(currentTextBreakLocaleID()), openStatus);
     DCHECK(U_SUCCESS(openStatus))
         << "ICU could not open a break iterator: " << u_errorName(openStatus)
         << " (" << openStatus << ")";
     if (!iterator)
       return 0;
   }

   setText16(iterator, string, length);
   return iterator;
 }

 bool isWordTextBreak(TextBreakIterator* iterator) {
   icu::RuleBasedBreakIterator* ruleBasedBreakIterator =
       static_cast<icu::RuleBasedBreakIterator*>(iterator);
   int ruleStatus = ruleBasedBreakIterator->getRuleStatus();
   return ruleStatus != UBRK_WORD_NONE;
 }

 static TextBreakIterator* setUpIteratorWithRules(const char* breakRules,
                                                  const UChar* string,
                                                  int length) {
   if (!string)
     return 0;

   static TextBreakIterator* iterator = 0;
   if (!iterator) {
     UParseError parseStatus;
     UErrorCode openStatus = U_ZERO_ERROR;
     Vector<UChar> rules;
     String(breakRules).appendTo(rules);

     iterator = new icu::RuleBasedBreakIterator(
         icu::UnicodeString(rules.data(), rules.size()), parseStatus,
         openStatus);
     DCHECK(U_SUCCESS(openStatus))
         << "ICU could not open a break iterator: " << u_errorName(openStatus)
         << " (" << openStatus << ")";
     if (!iterator)
       return 0;
   }

   setText16(iterator, string, length);
   return iterator;
 }

 TextBreakIterator* cursorMovementIterator(const UChar* string, int length) {
   // This rule set is based on character-break iterator rules of ICU 4.0
   // <http://source.icu-project.org/repos/icu/icu/tags/release-4-0/source/data/brkitr/char.txt>.
   // The major differences from the original ones are listed below:
   // * Replaced '[\p{Grapheme_Cluster_Break = SpacingMark}]' with
   //   '[\p{General_Category = Spacing Mark} - $Extend]' for ICU 3.8 or earlier;
   // * Removed rules that prevent a cursor from moving after prepend characters
   //   (Bug 24342);
   // * Added rules that prevent a cursor from moving after virama signs of Indic
   //   languages except Tamil (Bug 15790), and;
   // * Added rules that prevent a cursor from moving before Japanese half-width
   //   katakara voiced marks.
   // * Added rules for regional indicator symbols.
   static const char* const kRules =
       "$CR      = [\\p{Grapheme_Cluster_Break = CR}];"
       "$LF      = [\\p{Grapheme_Cluster_Break = LF}];"
       "$Control = [\\p{Grapheme_Cluster_Break = Control}];"
       "$VoiceMarks = [\\uFF9E\\uFF9F];"  // Japanese half-width katakana voiced
                                          // marks
       "$Extend  = [\\p{Grapheme_Cluster_Break = Extend} $VoiceMarks - [\\u0E30 "
       "\\u0E32 \\u0E45 \\u0EB0 \\u0EB2]];"
       "$SpacingMark = [[\\p{General_Category = Spacing Mark}] - $Extend];"
       "$L       = [\\p{Grapheme_Cluster_Break = L}];"
       "$V       = [\\p{Grapheme_Cluster_Break = V}];"
       "$T       = [\\p{Grapheme_Cluster_Break = T}];"
       "$LV      = [\\p{Grapheme_Cluster_Break = LV}];"
       "$LVT     = [\\p{Grapheme_Cluster_Break = LVT}];"
       "$Hin0    = [\\u0905-\\u0939];"          // Devanagari Letter A,...,Ha
       "$HinV    = \\u094D;"                    // Devanagari Sign Virama
       "$Hin1    = [\\u0915-\\u0939];"          // Devanagari Letter Ka,...,Ha
       "$Ben0    = [\\u0985-\\u09B9];"          // Bengali Letter A,...,Ha
       "$BenV    = \\u09CD;"                    // Bengali Sign Virama
       "$Ben1    = [\\u0995-\\u09B9];"          // Bengali Letter Ka,...,Ha
       "$Pan0    = [\\u0A05-\\u0A39];"          // Gurmukhi Letter A,...,Ha
       "$PanV    = \\u0A4D;"                    // Gurmukhi Sign Virama
       "$Pan1    = [\\u0A15-\\u0A39];"          // Gurmukhi Letter Ka,...,Ha
       "$Guj0    = [\\u0A85-\\u0AB9];"          // Gujarati Letter A,...,Ha
       "$GujV    = \\u0ACD;"                    // Gujarati Sign Virama
       "$Guj1    = [\\u0A95-\\u0AB9];"          // Gujarati Letter Ka,...,Ha
       "$Ori0    = [\\u0B05-\\u0B39];"          // Oriya Letter A,...,Ha
       "$OriV    = \\u0B4D;"                    // Oriya Sign Virama
       "$Ori1    = [\\u0B15-\\u0B39];"          // Oriya Letter Ka,...,Ha
       "$Tel0    = [\\u0C05-\\u0C39];"          // Telugu Letter A,...,Ha
       "$TelV    = \\u0C4D;"                    // Telugu Sign Virama
       "$Tel1    = [\\u0C14-\\u0C39];"          // Telugu Letter Ka,...,Ha
       "$Kan0    = [\\u0C85-\\u0CB9];"          // Kannada Letter A,...,Ha
       "$KanV    = \\u0CCD;"                    // Kannada Sign Virama
       "$Kan1    = [\\u0C95-\\u0CB9];"          // Kannada Letter A,...,Ha
       "$Mal0    = [\\u0D05-\\u0D39];"          // Malayalam Letter A,...,Ha
       "$MalV    = \\u0D4D;"                    // Malayalam Sign Virama
       "$Mal1    = [\\u0D15-\\u0D39];"          // Malayalam Letter A,...,Ha
       "$RI      = [\\U0001F1E6-\\U0001F1FF];"  // Emoji regional indicators
       "!!chain;"
       "!!forward;"
       "$CR $LF;"
       "$L ($L | $V | $LV | $LVT);"
       "($LV | $V) ($V | $T);"
       "($LVT | $T) $T;"
       "[^$Control $CR $LF] $Extend;"
       "[^$Control $CR $LF] $SpacingMark;"
       "$RI $RI / $RI;"
       "$RI $RI;"
       "$Hin0 $HinV $Hin1;"  // Devanagari Virama (forward)
       "$Ben0 $BenV $Ben1;"  // Bengali Virama (forward)
       "$Pan0 $PanV $Pan1;"  // Gurmukhi Virama (forward)
       "$Guj0 $GujV $Guj1;"  // Gujarati Virama (forward)
       "$Ori0 $OriV $Ori1;"  // Oriya Virama (forward)
       "$Tel0 $TelV $Tel1;"  // Telugu Virama (forward)
       "$Kan0 $KanV $Kan1;"  // Kannada Virama (forward)
       "$Mal0 $MalV $Mal1;"  // Malayalam Virama (forward)
       "!!reverse;"
       "$LF $CR;"
       "($L | $V | $LV | $LVT) $L;"
       "($V | $T) ($LV | $V);"
       "$T ($LVT | $T);"
       "$Extend      [^$Control $CR $LF];"
       "$SpacingMark [^$Control $CR $LF];"
       "$RI $RI / $RI $RI;"
       "$RI $RI;"
       "$Hin1 $HinV $Hin0;"  // Devanagari Virama (backward)
       "$Ben1 $BenV $Ben0;"  // Bengali Virama (backward)
       "$Pan1 $PanV $Pan0;"  // Gurmukhi Virama (backward)
       "$Guj1 $GujV $Guj0;"  // Gujarati Virama (backward)
       "$Ori1 $OriV $Ori0;"  // Gujarati Virama (backward)
       "$Tel1 $TelV $Tel0;"  // Telugu Virama (backward)
       "$Kan1 $KanV $Kan0;"  // Kannada Virama (backward)
       "$Mal1 $MalV $Mal0;"  // Malayalam Virama (backward)
       "!!safe_reverse;"
       "!!safe_forward;";

   return setUpIteratorWithRules(kRules, string, length);
 }

 }  // namespace blink