third_party/blink/renderer/platform/text/text_break_iterator_icu.cc - chromium/src - Git at Google

 /*
  * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
  * Copyright (C) 2007, 2011, 2012 Apple Inc. All rights reserved.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Library General Public License for more details.
  *
  * You should have received a copy of the GNU Library General Public License
  * along with this library; see the file COPYING.LIB.  If not, write to
  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  * Boston, MA 02110-1301, USA.
  *
  */

 #include "third_party/blink/renderer/platform/text/text_break_iterator.h"

 #include <unicode/rbbi.h>
 #include <unicode/ubrk.h>
 #include <algorithm>
 #include <limits>
 #include <memory>
 #include <utility>

 #include "base/macros.h"
 #include "base/memory/ptr_util.h"
 #include "third_party/blink/renderer/platform/text/icu_error.h"
 #include "third_party/blink/renderer/platform/text/text_break_iterator_internal_icu.h"
 #include "third_party/blink/renderer/platform/wtf/assertions.h"
 #include "third_party/blink/renderer/platform/wtf/hash_map.h"
 #include "third_party/blink/renderer/platform/wtf/text/atomic_string_hash.h"
 #include "third_party/blink/renderer/platform/wtf/text/wtf_string.h"
 #include "third_party/blink/renderer/platform/wtf/thread_specific.h"
 #include "third_party/blink/renderer/platform/wtf/threading_primitives.h"

 namespace blink {

 class LineBreakIteratorPool final {
   USING_FAST_MALLOC(LineBreakIteratorPool);

  public:
   static LineBreakIteratorPool& SharedPool() {
     static WTF::ThreadSpecific<LineBreakIteratorPool>* pool =
         new WTF::ThreadSpecific<LineBreakIteratorPool>;
     return **pool;
   }

   static std::unique_ptr<LineBreakIteratorPool> Create() {
     return base::WrapUnique(new LineBreakIteratorPool);
   }

   icu::BreakIterator* Take(const AtomicString& locale) {
     icu::BreakIterator* iterator = nullptr;
     for (size_t i = 0; i < pool_.size(); ++i) {
       if (pool_[i].first == locale) {
         iterator = pool_[i].second;
         pool_.EraseAt(i);
         break;
       }
     }

     if (!iterator) {
       UErrorCode open_status = U_ZERO_ERROR;
       bool locale_is_empty = locale.IsEmpty();
       iterator = icu::BreakIterator::createLineInstance(
           locale_is_empty ? icu::Locale(CurrentTextBreakLocaleID())
                           : icu::Locale(locale.Utf8().data()),
           open_status);
       // locale comes from a web page and it can be invalid, leading ICU
       // to fail, in which case we fall back to the default locale.
       if (!locale_is_empty && U_FAILURE(open_status)) {
         open_status = U_ZERO_ERROR;
         iterator = icu::BreakIterator::createLineInstance(
             icu::Locale(CurrentTextBreakLocaleID()), open_status);
       }

       if (U_FAILURE(open_status)) {
         DLOG(ERROR) << "icu::BreakIterator construction failed with status "
                     << open_status;
         return nullptr;
       }
     }

     DCHECK(!vended_iterators_.Contains(iterator));
     vended_iterators_.Set(iterator, locale);
     return iterator;
   }

   void Put(icu::BreakIterator* iterator) {
     DCHECK(vended_iterators_.Contains(iterator));

     if (pool_.size() == kCapacity) {
       delete (pool_[0].second);
       pool_.EraseAt(0);
     }

     pool_.push_back(Entry(vended_iterators_.Take(iterator), iterator));
   }

  private:
   LineBreakIteratorPool() = default;

   static const size_t kCapacity = 4;

   typedef std::pair<AtomicString, icu::BreakIterator*> Entry;
   typedef Vector<Entry, kCapacity> Pool;
   Pool pool_;
   HashMap<icu::BreakIterator*, AtomicString> vended_iterators_;

   friend WTF::ThreadSpecific<LineBreakIteratorPool>::
   operator LineBreakIteratorPool*();

   DISALLOW_COPY_AND_ASSIGN(LineBreakIteratorPool);
 };

 enum TextContext { kNoContext, kPriorContext, kPrimaryContext };

 const int kTextBufferCapacity = 16;

 typedef struct {
   DISALLOW_NEW();
   UText text;
   UChar buffer[kTextBufferCapacity];
 } UTextWithBuffer;

 static inline int64_t TextPinIndex(int64_t& index, int64_t limit) {
   if (index < 0)
     index = 0;
   else if (index > limit)
     index = limit;
   return index;
 }

 static inline int64_t TextNativeLength(UText* text) {
   return text->a + text->b;
 }

 // Relocate pointer from source into destination as required.
 static void TextFixPointer(const UText* source,
                            UText* destination,
                            const void*& pointer) {
   if (pointer >= source->pExtra &&
       pointer < static_cast<char*>(source->pExtra) + source->extraSize) {
     // Pointer references source extra buffer.
     pointer = static_cast<char*>(destination->pExtra) +
               (static_cast<const char*>(pointer) -
                static_cast<const char*>(source->pExtra));
   } else if (pointer >= source &&
              pointer <
                  reinterpret_cast<const char*>(source) + source->sizeOfStruct) {
     // Pointer references source text structure, but not source extra buffer.
     pointer = reinterpret_cast<char*>(destination) +
               (static_cast<const char*>(pointer) -
                reinterpret_cast<const char*>(source));
   }
 }

 static UText* TextClone(UText* destination,
                         const UText* source,
                         UBool deep,
                         UErrorCode* status) {
   DCHECK(!deep);
   if (U_FAILURE(*status))
     return nullptr;
   int32_t extra_size = source->extraSize;
   destination = utext_setup(destination, extra_size, status);
   if (U_FAILURE(*status))
     return destination;
   void* extra_new = destination->pExtra;
   int32_t flags = destination->flags;
   int size_to_copy = std::min(source->sizeOfStruct, destination->sizeOfStruct);
   memcpy(destination, source, size_to_copy);
   destination->pExtra = extra_new;
   destination->flags = flags;
   memcpy(destination->pExtra, source->pExtra, extra_size);
   TextFixPointer(source, destination, destination->context);
   TextFixPointer(source, destination, destination->p);
   TextFixPointer(source, destination, destination->q);
   DCHECK(!destination->r);
   const void* chunk_contents =
       static_cast<const void*>(destination->chunkContents);
   TextFixPointer(source, destination, chunk_contents);
   destination->chunkContents = static_cast<const UChar*>(chunk_contents);
   return destination;
 }

 static int32_t TextExtract(UText*,
                            int64_t,
                            int64_t,
                            UChar*,
                            int32_t,
                            UErrorCode* error_code) {
   // In the present context, this text provider is used only with ICU functions
   // that do not perform an extract operation.
   NOTREACHED();
   *error_code = U_UNSUPPORTED_ERROR;
   return 0;
 }

 static void TextClose(UText* text) {
   text->context = nullptr;
 }

 static inline TextContext TextGetContext(const UText* text,
                                          int64_t native_index,
                                          UBool forward) {
   if (!text->b || native_index > text->b)
     return kPrimaryContext;
   if (native_index == text->b)
     return forward ? kPrimaryContext : kPriorContext;
   return kPriorContext;
 }

 static inline TextContext TextLatin1GetCurrentContext(const UText* text) {
   if (!text->chunkContents)
     return kNoContext;
   return text->chunkContents == text->pExtra ? kPrimaryContext : kPriorContext;
 }

 static void TextLatin1MoveInPrimaryContext(UText* text,
                                            int64_t native_index,
                                            int64_t native_length,
                                            UBool forward) {
   DCHECK_EQ(text->chunkContents, text->pExtra);
   if (forward) {
     DCHECK_GE(native_index, text->b);
     DCHECK_LT(native_index, native_length);
     text->chunkNativeStart = native_index;
     text->chunkNativeLimit = native_index + text->extraSize / sizeof(UChar);
     if (text->chunkNativeLimit > native_length)
       text->chunkNativeLimit = native_length;
   } else {
     DCHECK_GT(native_index, text->b);
     DCHECK_LE(native_index, native_length);
     text->chunkNativeLimit = native_index;
     text->chunkNativeStart = native_index - text->extraSize / sizeof(UChar);
     if (text->chunkNativeStart < text->b)
       text->chunkNativeStart = text->b;
   }
   int64_t length = text->chunkNativeLimit - text->chunkNativeStart;
   // Ensure chunk length is well defined if computed length exceeds int32_t
   // range.
   DCHECK_LE(length, std::numeric_limits<int32_t>::max());
   text->chunkLength = length <= std::numeric_limits<int32_t>::max()
                           ? static_cast<int32_t>(length)
                           : 0;
   text->nativeIndexingLimit = text->chunkLength;
   text->chunkOffset = forward ? 0 : text->chunkLength;
   StringImpl::CopyChars(
       const_cast<UChar*>(text->chunkContents),
       static_cast<const LChar*>(text->p) + (text->chunkNativeStart - text->b),
       static_cast<unsigned>(text->chunkLength));
 }

 static void TextLatin1SwitchToPrimaryContext(UText* text,
                                              int64_t native_index,
                                              int64_t native_length,
                                              UBool forward) {
   DCHECK(!text->chunkContents || text->chunkContents == text->q);
   text->chunkContents = static_cast<const UChar*>(text->pExtra);
   TextLatin1MoveInPrimaryContext(text, native_index, native_length, forward);
 }

 static void TextLatin1MoveInPriorContext(UText* text,
                                          int64_t native_index,
                                          int64_t native_length,
                                          UBool forward) {
   DCHECK_EQ(text->chunkContents, text->q);
   DCHECK(forward ? native_index < text->b : native_index <= text->b);
   DCHECK(forward ? native_index < native_length
                  : native_index <= native_length);
   DCHECK(forward ? native_index < native_length
                  : native_index <= native_length);
   text->chunkNativeStart = 0;
   text->chunkNativeLimit = text->b;
   text->chunkLength = text->b;
   text->nativeIndexingLimit = text->chunkLength;
   int64_t offset = native_index - text->chunkNativeStart;
   // Ensure chunk offset is well defined if computed offset exceeds int32_t
   // range or chunk length.
   DCHECK_LE(offset, std::numeric_limits<int32_t>::max());
   text->chunkOffset = std::min(offset <= std::numeric_limits<int32_t>::max()
                                    ? static_cast<int32_t>(offset)
                                    : 0,
                                text->chunkLength);
 }

 static void TextLatin1SwitchToPriorContext(UText* text,
                                            int64_t native_index,
                                            int64_t native_length,
                                            UBool forward) {
   DCHECK(!text->chunkContents || text->chunkContents == text->pExtra);
   text->chunkContents = static_cast<const UChar*>(text->q);
   TextLatin1MoveInPriorContext(text, native_index, native_length, forward);
 }

 static inline bool TextInChunkOrOutOfRange(UText* text,
                                            int64_t native_index,
                                            int64_t native_length,
                                            UBool forward,
                                            UBool& is_accessible) {
   if (forward) {
     if (native_index >= text->chunkNativeStart &&
         native_index < text->chunkNativeLimit) {
       int64_t offset = native_index - text->chunkNativeStart;
       // Ensure chunk offset is well formed if computed offset exceeds int32_t
       // range.
       DCHECK_LE(offset, std::numeric_limits<int32_t>::max());
       text->chunkOffset = offset <= std::numeric_limits<int32_t>::max()
                               ? static_cast<int32_t>(offset)
                               : 0;
       is_accessible = TRUE;
       return true;
     }
     if (native_index >= native_length &&
         text->chunkNativeLimit == native_length) {
       text->chunkOffset = text->chunkLength;
       is_accessible = FALSE;
       return true;
     }
   } else {
     if (native_index > text->chunkNativeStart &&
         native_index <= text->chunkNativeLimit) {
       int64_t offset = native_index - text->chunkNativeStart;
       // Ensure chunk offset is well formed if computed offset exceeds int32_t
       // range.
       DCHECK_LE(offset, std::numeric_limits<int32_t>::max());
       text->chunkOffset = offset <= std::numeric_limits<int32_t>::max()
                               ? static_cast<int32_t>(offset)
                               : 0;
       is_accessible = TRUE;
       return true;
     }
     if (native_index <= 0 && !text->chunkNativeStart) {
       text->chunkOffset = 0;
       is_accessible = FALSE;
       return true;
     }
   }
   return false;
 }

 static UBool TextLatin1Access(UText* text,
                               int64_t native_index,
                               UBool forward) {
   if (!text->context)
     return FALSE;
   int64_t native_length = TextNativeLength(text);
   UBool is_accessible;
   if (TextInChunkOrOutOfRange(text, native_index, native_length, forward,
                               is_accessible))
     return is_accessible;
   native_index = TextPinIndex(native_index, native_length - 1);
   TextContext current_context = TextLatin1GetCurrentContext(text);
   TextContext new_context = TextGetContext(text, native_index, forward);
   DCHECK_NE(new_context, kNoContext);
   if (new_context == current_context) {
     if (current_context == kPrimaryContext) {
       TextLatin1MoveInPrimaryContext(text, native_index, native_length,
                                      forward);
     } else {
       TextLatin1MoveInPriorContext(text, native_index, native_length, forward);
     }
   } else if (new_context == kPrimaryContext) {
     TextLatin1SwitchToPrimaryContext(text, native_index, native_length,
                                      forward);
   } else {
     DCHECK_EQ(new_context, kPriorContext);
     TextLatin1SwitchToPriorContext(text, native_index, native_length, forward);
   }
   return TRUE;
 }

 static const struct UTextFuncs kTextLatin1Funcs = {
     sizeof(UTextFuncs),
     0,
     0,
     0,
     TextClone,
     TextNativeLength,
     TextLatin1Access,
     TextExtract,
     nullptr,
     nullptr,
     nullptr,
     nullptr,
     TextClose,
     nullptr,
     nullptr,
     nullptr,
 };

 static void TextInit(UText* text,
                      const UTextFuncs* funcs,
                      const void* string,
                      unsigned length,
                      const UChar* prior_context,
                      int prior_context_length) {
   text->pFuncs = funcs;
   text->providerProperties = 1 << UTEXT_PROVIDER_STABLE_CHUNKS;
   text->context = string;
   text->p = string;
   text->a = length;
   text->q = prior_context;
   text->b = prior_context_length;
 }

 static UText* TextOpenLatin1(UTextWithBuffer* ut_with_buffer,
                              const LChar* string,
                              unsigned length,
                              const UChar* prior_context,
                              int prior_context_length,
                              UErrorCode* status) {
   if (U_FAILURE(*status))
     return nullptr;

   if (!string ||
       length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) {
     *status = U_ILLEGAL_ARGUMENT_ERROR;
     return nullptr;
   }
   UText* text = utext_setup(&ut_with_buffer->text,
                             sizeof(ut_with_buffer->buffer), status);
   if (U_FAILURE(*status)) {
     DCHECK(!text);
     return nullptr;
   }
   TextInit(text, &kTextLatin1Funcs, string, length, prior_context,
            prior_context_length);
   return text;
 }

 static inline TextContext TextUTF16GetCurrentContext(const UText* text) {
   if (!text->chunkContents)
     return kNoContext;
   return text->chunkContents == text->p ? kPrimaryContext : kPriorContext;
 }

 static void TextUTF16MoveInPrimaryContext(UText* text,
                                           int64_t native_index,
                                           int64_t native_length,
                                           UBool forward) {
   DCHECK_EQ(text->chunkContents, text->p);
   DCHECK(forward ? native_index >= text->b : native_index > text->b);
   DCHECK(forward ? native_index < native_length
                  : native_index <= native_length);
   text->chunkNativeStart = text->b;
   text->chunkNativeLimit = native_length;
   int64_t length = text->chunkNativeLimit - text->chunkNativeStart;
   // Ensure chunk length is well defined if computed length exceeds int32_t
   // range.
   DCHECK_LE(length, std::numeric_limits<int32_t>::max());
   text->chunkLength = length <= std::numeric_limits<int32_t>::max()
                           ? static_cast<int32_t>(length)
                           : 0;
   text->nativeIndexingLimit = text->chunkLength;
   int64_t offset = native_index - text->chunkNativeStart;
   // Ensure chunk offset is well defined if computed offset exceeds int32_t
   // range or chunk length.
   DCHECK_LE(offset, std::numeric_limits<int32_t>::max());
   text->chunkOffset = std::min(offset <= std::numeric_limits<int32_t>::max()
                                    ? static_cast<int32_t>(offset)
                                    : 0,
                                text->chunkLength);
 }

 static void TextUTF16SwitchToPrimaryContext(UText* text,
                                             int64_t native_index,
                                             int64_t native_length,
                                             UBool forward) {
   DCHECK(!text->chunkContents || text->chunkContents == text->q);
   text->chunkContents = static_cast<const UChar*>(text->p);
   TextUTF16MoveInPrimaryContext(text, native_index, native_length, forward);
 }

 static void TextUTF16MoveInPriorContext(UText* text,
                                         int64_t native_index,
                                         int64_t native_length,
                                         UBool forward) {
   DCHECK_EQ(text->chunkContents, text->q);
   DCHECK(forward ? native_index < text->b : native_index <= text->b);
   DCHECK(forward ? native_index < native_length
                  : native_index <= native_length);
   DCHECK(forward ? native_index < native_length
                  : native_index <= native_length);
   text->chunkNativeStart = 0;
   text->chunkNativeLimit = text->b;
   text->chunkLength = text->b;
   text->nativeIndexingLimit = text->chunkLength;
   int64_t offset = native_index - text->chunkNativeStart;
   // Ensure chunk offset is well defined if computed offset exceeds int32_t
   // range or chunk length.
   DCHECK_LE(offset, std::numeric_limits<int32_t>::max());
   text->chunkOffset = std::min(offset <= std::numeric_limits<int32_t>::max()
                                    ? static_cast<int32_t>(offset)
                                    : 0,
                                text->chunkLength);
 }

 static void TextUTF16SwitchToPriorContext(UText* text,
                                           int64_t native_index,
                                           int64_t native_length,
                                           UBool forward) {
   DCHECK(!text->chunkContents || text->chunkContents == text->p);
   text->chunkContents = static_cast<const UChar*>(text->q);
   TextUTF16MoveInPriorContext(text, native_index, native_length, forward);
 }

 static UBool TextUTF16Access(UText* text, int64_t native_index, UBool forward) {
   if (!text->context)
     return FALSE;
   int64_t native_length = TextNativeLength(text);
   UBool is_accessible;
   if (TextInChunkOrOutOfRange(text, native_index, native_length, forward,
                               is_accessible))
     return is_accessible;
   native_index = TextPinIndex(native_index, native_length - 1);
   TextContext current_context = TextUTF16GetCurrentContext(text);
   TextContext new_context = TextGetContext(text, native_index, forward);
   DCHECK_NE(new_context, kNoContext);
   if (new_context == current_context) {
     if (current_context == kPrimaryContext) {
       TextUTF16MoveInPrimaryContext(text, native_index, native_length, forward);
     } else {
       TextUTF16MoveInPriorContext(text, native_index, native_length, forward);
     }
   } else if (new_context == kPrimaryContext) {
     TextUTF16SwitchToPrimaryContext(text, native_index, native_length, forward);
   } else {
     DCHECK_EQ(new_context, kPriorContext);
     TextUTF16SwitchToPriorContext(text, native_index, native_length, forward);
   }
   return TRUE;
 }

 static const struct UTextFuncs kTextUTF16Funcs = {
     sizeof(UTextFuncs),
     0,
     0,
     0,
     TextClone,
     TextNativeLength,
     TextUTF16Access,
     TextExtract,
     nullptr,
     nullptr,
     nullptr,
     nullptr,
     TextClose,
     nullptr,
     nullptr,
     nullptr,
 };

 static UText* TextOpenUTF16(UText* text,
                             const UChar* string,
                             unsigned length,
                             const UChar* prior_context,
                             int prior_context_length,
                             UErrorCode* status) {
   if (U_FAILURE(*status))
     return nullptr;

   if (!string ||
       length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) {
     *status = U_ILLEGAL_ARGUMENT_ERROR;
     return nullptr;
   }

   text = utext_setup(text, 0, status);
   if (U_FAILURE(*status)) {
     DCHECK(!text);
     return nullptr;
   }
   TextInit(text, &kTextUTF16Funcs, string, length, prior_context,
            prior_context_length);
   return text;
 }

 static UText g_empty_text = UTEXT_INITIALIZER;

 static TextBreakIterator* WordBreakIterator(const LChar* string, int length) {
   UErrorCode error_code = U_ZERO_ERROR;
   static TextBreakIterator* break_iter = nullptr;
   if (!break_iter) {
     break_iter = icu::BreakIterator::createWordInstance(
         icu::Locale(CurrentTextBreakLocaleID()), error_code);
     DCHECK(U_SUCCESS(error_code))
         << "ICU could not open a break iterator: " << u_errorName(error_code)
         << " (" << error_code << ")";
     if (!break_iter)
       return nullptr;
   }

   UTextWithBuffer text_local;
   text_local.text = g_empty_text;
   text_local.text.extraSize = sizeof(text_local.buffer);
   text_local.text.pExtra = text_local.buffer;

   UErrorCode open_status = U_ZERO_ERROR;
   UText* text =
       TextOpenLatin1(&text_local, string, length, nullptr, 0, &open_status);
   if (U_FAILURE(open_status)) {
     DLOG(ERROR) << "textOpenLatin1 failed with status " << open_status;
     return nullptr;
   }

   UErrorCode set_text_status = U_ZERO_ERROR;
   break_iter->setText(text, set_text_status);
   if (U_FAILURE(set_text_status))
     DLOG(ERROR) << "BreakIterator::seText failed with status "
                 << set_text_status;

   utext_close(text);

   return break_iter;
 }

 static void SetText16(TextBreakIterator* iter,
                       const UChar* string,
                       int length) {
   UErrorCode error_code = U_ZERO_ERROR;
   UText u_text = UTEXT_INITIALIZER;
   utext_openUChars(&u_text, string, length, &error_code);
   if (U_FAILURE(error_code))
     return;
   iter->setText(&u_text, error_code);
 }

 TextBreakIterator* WordBreakIterator(const UChar* string, int length) {
   UErrorCode error_code = U_ZERO_ERROR;
   static TextBreakIterator* break_iter = nullptr;
   if (!break_iter) {
     break_iter = icu::BreakIterator::createWordInstance(
         icu::Locale(CurrentTextBreakLocaleID()), error_code);
     DCHECK(U_SUCCESS(error_code))
         << "ICU could not open a break iterator: " << u_errorName(error_code)
         << " (" << error_code << ")";
     if (!break_iter)
       return nullptr;
   }
   SetText16(break_iter, string, length);
   return break_iter;
 }

 TextBreakIterator* WordBreakIterator(const String& string,
                                      int start,
                                      int length) {
   if (string.IsEmpty())
     return nullptr;
   if (string.Is8Bit())
     return WordBreakIterator(string.Characters8() + start, length);
   return WordBreakIterator(string.Characters16() + start, length);
 }

 TextBreakIterator* AcquireLineBreakIterator(const LChar* string,
                                             int length,
                                             const AtomicString& locale,
                                             const UChar* prior_context,
                                             unsigned prior_context_length) {
   TextBreakIterator* iterator =
       LineBreakIteratorPool::SharedPool().Take(locale);
   if (!iterator)
     return nullptr;

   UTextWithBuffer text_local;
   text_local.text = g_empty_text;
   text_local.text.extraSize = sizeof(text_local.buffer);
   text_local.text.pExtra = text_local.buffer;

   UErrorCode open_status = U_ZERO_ERROR;
   UText* text = TextOpenLatin1(&text_local, string, length, prior_context,
                                prior_context_length, &open_status);
   if (U_FAILURE(open_status)) {
     DLOG(ERROR) << "textOpenLatin1 failed with status " << open_status;
     return nullptr;
   }

   UErrorCode set_text_status = U_ZERO_ERROR;
   iterator->setText(text, set_text_status);
   if (U_FAILURE(set_text_status)) {
     DLOG(ERROR) << "ubrk_setUText failed with status " << set_text_status;
     return nullptr;
   }

   utext_close(text);

   return iterator;
 }

 TextBreakIterator* AcquireLineBreakIterator(const UChar* string,
                                             int length,
                                             const AtomicString& locale,
                                             const UChar* prior_context,
                                             unsigned prior_context_length) {
   TextBreakIterator* iterator =
       LineBreakIteratorPool::SharedPool().Take(locale);
   if (!iterator)
     return nullptr;

   UText text_local = UTEXT_INITIALIZER;

   UErrorCode open_status = U_ZERO_ERROR;
   UText* text = TextOpenUTF16(&text_local, string, length, prior_context,
                               prior_context_length, &open_status);
   if (U_FAILURE(open_status)) {
     DLOG(ERROR) << "textOpenUTF16 failed with status " << open_status;
     return nullptr;
   }

   UErrorCode set_text_status = U_ZERO_ERROR;
   iterator->setText(text, set_text_status);
   if (U_FAILURE(set_text_status)) {
     DLOG(ERROR) << "ubrk_setUText failed with status " << set_text_status;
     return nullptr;
   }

   utext_close(text);

   return iterator;
 }

 void ReleaseLineBreakIterator(TextBreakIterator* iterator) {
   DCHECK(iterator);
   LineBreakIteratorPool::SharedPool().Put(iterator);
 }

 static TextBreakIterator* GetNonSharedCharacterBreakIterator() {
   DEFINE_THREAD_SAFE_STATIC_LOCAL(
       ThreadSpecific<std::unique_ptr<TextBreakIterator>>, thread_specific, ());

   std::unique_ptr<TextBreakIterator>& iterator = *thread_specific;

   if (!iterator) {
     ICUError error_code;
     iterator = base::WrapUnique(icu::BreakIterator::createCharacterInstance(
         icu::Locale(CurrentTextBreakLocaleID()), error_code));
     CHECK(U_SUCCESS(error_code) && iterator)
         << "ICU could not open a break iterator: " << u_errorName(error_code)
         << " (" << error_code << ")";
   }

   DCHECK(iterator);
   return iterator.get();
 }

 NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(
     const StringView& string)
     : is8_bit_(true),
       charaters8_(nullptr),
       offset_(0),
       length_(0),
       iterator_(nullptr) {
   if (string.IsEmpty())
     return;

   is8_bit_ = string.Is8Bit();

   if (is8_bit_) {
     charaters8_ = string.Characters8();
     offset_ = 0;
     length_ = string.length();
     return;
   }

   CreateIteratorForBuffer(string.Characters16(), string.length());
 }

 NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(
     const UChar* buffer,
     unsigned length)
     : is8_bit_(false),
       charaters8_(nullptr),
       offset_(0),
       length_(0),
       iterator_(nullptr) {
   CreateIteratorForBuffer(buffer, length);
 }

 void NonSharedCharacterBreakIterator::CreateIteratorForBuffer(
     const UChar* buffer,
     unsigned length) {
   iterator_ = GetNonSharedCharacterBreakIterator();
   SetText16(iterator_, buffer, length);
 }

 NonSharedCharacterBreakIterator::~NonSharedCharacterBreakIterator() {
   if (is8_bit_)
     return;
 }

 int NonSharedCharacterBreakIterator::Next() {
   if (!is8_bit_)
     return iterator_->next();

   if (offset_ >= length_)
     return kTextBreakDone;

   offset_ += ClusterLengthStartingAt(offset_);
   return offset_;
 }

 int NonSharedCharacterBreakIterator::Current() {
   if (!is8_bit_)
     return iterator_->current();
   return offset_;
 }

 bool NonSharedCharacterBreakIterator::IsBreak(int offset) const {
   if (!is8_bit_)
     return iterator_->isBoundary(offset);
   return !IsLFAfterCR(offset);
 }

 int NonSharedCharacterBreakIterator::Preceding(int offset) const {
   if (!is8_bit_)
     return iterator_->preceding(offset);
   if (offset <= 0)
     return kTextBreakDone;
   if (IsLFAfterCR(offset))
     return offset - 2;
   return offset - 1;
 }

 int NonSharedCharacterBreakIterator::Following(int offset) const {
   if (!is8_bit_)
     return iterator_->following(offset);
   if (static_cast<unsigned>(offset) >= length_)
     return kTextBreakDone;
   return offset + ClusterLengthStartingAt(offset);
 }

 TextBreakIterator* SentenceBreakIterator(const UChar* string, int length) {
   UErrorCode open_status = U_ZERO_ERROR;
   static TextBreakIterator* iterator = nullptr;
   if (!iterator) {
     iterator = icu::BreakIterator::createSentenceInstance(
         icu::Locale(CurrentTextBreakLocaleID()), open_status);
     DCHECK(U_SUCCESS(open_status))
         << "ICU could not open a break iterator: " << u_errorName(open_status)
         << " (" << open_status << ")";
     if (!iterator)
       return nullptr;
   }

   SetText16(iterator, string, length);
   return iterator;
 }

 bool IsWordTextBreak(TextBreakIterator* iterator) {
   icu::RuleBasedBreakIterator* rule_based_break_iterator =
       static_cast<icu::RuleBasedBreakIterator*>(iterator);
   int rule_status = rule_based_break_iterator->getRuleStatus();
   return rule_status != UBRK_WORD_NONE;
 }

 TextBreakIterator* CursorMovementIterator(const UChar* string, int length) {
   // This rule set is based on character-break iterator rules of ICU 4.0
   // <http://source.icu-project.org/repos/icu/icu/tags/release-4-0/source/data/brkitr/char.txt>.
   // The major differences from the original ones are listed below:
   // * Replaced '[\p{Grapheme_Cluster_Break = SpacingMark}]' with
   //   '[\p{General_Category = Spacing Mark} - $Extend]' for ICU 3.8 or earlier;
   // * Removed rules that prevent a cursor from moving after prepend characters
   //   (Bug 24342);
   // * Added rules that prevent a cursor from moving after virama signs of Indic
   //   languages except Tamil (Bug 15790), and;
   // * Added rules that prevent a cursor from moving before Japanese half-width
   //   katakara voiced marks.
   // * Added rules for regional indicator symbols.
   static const char* const kRules =
       "$CR      = [\\p{Grapheme_Cluster_Break = CR}];"
       "$LF      = [\\p{Grapheme_Cluster_Break = LF}];"
       "$Control = [\\p{Grapheme_Cluster_Break = Control}];"
       "$VoiceMarks = [\\uFF9E\\uFF9F];"  // Japanese half-width katakana voiced
                                          // marks
       "$Extend  = [\\p{Grapheme_Cluster_Break = Extend} $VoiceMarks - [\\u0E30 "
       "\\u0E32 \\u0E45 \\u0EB0 \\u0EB2]];"
       "$SpacingMark = [[\\p{General_Category = Spacing Mark}] - $Extend];"
       "$L       = [\\p{Grapheme_Cluster_Break = L}];"
       "$V       = [\\p{Grapheme_Cluster_Break = V}];"
       "$T       = [\\p{Grapheme_Cluster_Break = T}];"
       "$LV      = [\\p{Grapheme_Cluster_Break = LV}];"
       "$LVT     = [\\p{Grapheme_Cluster_Break = LVT}];"
       "$Hin0    = [\\u0905-\\u0939];"          // Devanagari Letter A,...,Ha
       "$HinV    = \\u094D;"                    // Devanagari Sign Virama
       "$Hin1    = [\\u0915-\\u0939];"          // Devanagari Letter Ka,...,Ha
       "$Ben0    = [\\u0985-\\u09B9];"          // Bengali Letter A,...,Ha
       "$BenV    = \\u09CD;"                    // Bengali Sign Virama
       "$Ben1    = [\\u0995-\\u09B9];"          // Bengali Letter Ka,...,Ha
       "$Pan0    = [\\u0A05-\\u0A39];"          // Gurmukhi Letter A,...,Ha
       "$PanV    = \\u0A4D;"                    // Gurmukhi Sign Virama
       "$Pan1    = [\\u0A15-\\u0A39];"          // Gurmukhi Letter Ka,...,Ha
       "$Guj0    = [\\u0A85-\\u0AB9];"          // Gujarati Letter A,...,Ha
       "$GujV    = \\u0ACD;"                    // Gujarati Sign Virama
       "$Guj1    = [\\u0A95-\\u0AB9];"          // Gujarati Letter Ka,...,Ha
       "$Ori0    = [\\u0B05-\\u0B39];"          // Oriya Letter A,...,Ha
       "$OriV    = \\u0B4D;"                    // Oriya Sign Virama
       "$Ori1    = [\\u0B15-\\u0B39];"          // Oriya Letter Ka,...,Ha
       "$Tel0    = [\\u0C05-\\u0C39];"          // Telugu Letter A,...,Ha
       "$TelV    = \\u0C4D;"                    // Telugu Sign Virama
       "$Tel1    = [\\u0C14-\\u0C39];"          // Telugu Letter Ka,...,Ha
       "$Kan0    = [\\u0C85-\\u0CB9];"          // Kannada Letter A,...,Ha
       "$KanV    = \\u0CCD;"                    // Kannada Sign Virama
       "$Kan1    = [\\u0C95-\\u0CB9];"          // Kannada Letter A,...,Ha
       "$Mal0    = [\\u0D05-\\u0D39];"          // Malayalam Letter A,...,Ha
       "$MalV    = \\u0D4D;"                    // Malayalam Sign Virama
       "$Mal1    = [\\u0D15-\\u0D39];"          // Malayalam Letter A,...,Ha
       "$RI      = [\\U0001F1E6-\\U0001F1FF];"  // Emoji regional indicators
       "!!chain;"
       "!!forward;"
       "$CR $LF;"
       "$L ($L | $V | $LV | $LVT);"
       "($LV | $V) ($V | $T);"
       "($LVT | $T) $T;"
       "[^$Control $CR $LF] $Extend;"
       "[^$Control $CR $LF] $SpacingMark;"
       "$RI $RI / $RI;"
       "$RI $RI;"
       "$Hin0 $HinV $Hin1;"  // Devanagari Virama (forward)
       "$Ben0 $BenV $Ben1;"  // Bengali Virama (forward)
       "$Pan0 $PanV $Pan1;"  // Gurmukhi Virama (forward)
       "$Guj0 $GujV $Guj1;"  // Gujarati Virama (forward)
       "$Ori0 $OriV $Ori1;"  // Oriya Virama (forward)
       "$Tel0 $TelV $Tel1;"  // Telugu Virama (forward)
       "$Kan0 $KanV $Kan1;"  // Kannada Virama (forward)
       "$Mal0 $MalV $Mal1;"  // Malayalam Virama (forward)
       "!!reverse;"
       "$LF $CR;"
       "($L | $V | $LV | $LVT) $L;"
       "($V | $T) ($LV | $V);"
       "$T ($LVT | $T);"
       "$Extend      [^$Control $CR $LF];"
       "$SpacingMark [^$Control $CR $LF];"
       "$RI $RI / $RI $RI;"
       "$RI $RI;"
       "$Hin1 $HinV $Hin0;"  // Devanagari Virama (backward)
       "$Ben1 $BenV $Ben0;"  // Bengali Virama (backward)
       "$Pan1 $PanV $Pan0;"  // Gurmukhi Virama (backward)
       "$Guj1 $GujV $Guj0;"  // Gujarati Virama (backward)
       "$Ori1 $OriV $Ori0;"  // Gujarati Virama (backward)
       "$Tel1 $TelV $Tel0;"  // Telugu Virama (backward)
       "$Kan1 $KanV $Kan0;"  // Kannada Virama (backward)
       "$Mal1 $MalV $Mal0;"  // Malayalam Virama (backward)
       "!!safe_reverse;"
       "!!safe_forward;";

   if (!string)
     return nullptr;

   DEFINE_THREAD_SAFE_STATIC_LOCAL(
       ThreadSpecific<std::unique_ptr<icu::RuleBasedBreakIterator>>,
       thread_specific, ());

   std::unique_ptr<icu::RuleBasedBreakIterator>& iterator = *thread_specific;

   if (!iterator) {
     UParseError parse_status;
     UErrorCode open_status = U_ZERO_ERROR;
     // break_rules is ASCII. Pick the most efficient UnicodeString ctor.
     iterator = std::make_unique<icu::RuleBasedBreakIterator>(
         icu::UnicodeString(kRules, -1, US_INV), parse_status, open_status);
     DCHECK(U_SUCCESS(open_status))
         << "ICU could not open a break iterator: " << u_errorName(open_status)
         << " (" << open_status << ")";
   }

   SetText16(iterator.get(), string, length);
   return iterator.get();
 }

 }  // namespace blink