third_party/blink/renderer/platform/wtf/text/wtf_string.h - chromium/src - Git at Google

 /*
  * (C) 1999 Lars Knoll (knoll@kde.org)
  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012, 2013 Apple Inc.
  * All rights reserved.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Library General Public License for more details.
  *
  * You should have received a copy of the GNU Library General Public License
  * along with this library; see the file COPYING.LIB.  If not, write to
  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  * Boston, MA 02110-1301, USA.
  *
  */

 #ifndef THIRD_PARTY_BLINK_RENDERER_PLATFORM_WTF_TEXT_WTF_STRING_H_
 #define THIRD_PARTY_BLINK_RENDERER_PLATFORM_WTF_TEXT_WTF_STRING_H_

 // This file would be called String.h, but that conflicts with <string.h>
 // on systems without case-sensitive file systems.

 #include <iosfwd>
 #include "build/build_config.h"
 #include "third_party/blink/renderer/platform/wtf/allocator.h"
 #include "third_party/blink/renderer/platform/wtf/compiler.h"
 #include "third_party/blink/renderer/platform/wtf/text/string_impl.h"
 #include "third_party/blink/renderer/platform/wtf/text/string_view.h"
 #include "third_party/blink/renderer/platform/wtf/wtf_export.h"
 #include "third_party/blink/renderer/platform/wtf/wtf_size_t.h"

 #ifdef __OBJC__
 #include <objc/objc.h>
 #endif

 namespace WTF {

 class CString;
 struct StringHash;

 enum UTF8ConversionMode {
   kLenientUTF8Conversion,
   kStrictUTF8Conversion,
   kStrictUTF8ConversionReplacingUnpairedSurrogatesWithFFFD
 };

 #define DISPATCH_CASE_OP(caseSensitivity, op, args)     \
   ((caseSensitivity == kTextCaseSensitive)              \
        ? op args                                        \
        : (caseSensitivity == kTextCaseASCIIInsensitive) \
              ? op##IgnoringASCIICase args               \
              : op##IgnoringCase args)

 // You can find documentation about this class in this doc:
 // https://docs.google.com/document/d/1kOCUlJdh2WJMJGDf-WoEQhmnjKLaOYRbiHz5TiGJl14/edit?usp=sharing
 class WTF_EXPORT String {
   USING_FAST_MALLOC(String);

  public:
   // Construct a null string, distinguishable from an empty string.
   String() = default;

   // Construct a string with UTF-16 data.
   String(const UChar* characters, unsigned length);

   // Construct a string by copying the contents of a vector.
   // This method will never create a null string. Vectors with size() == 0
   // will return the empty string.
   // NOTE: This is different from String(vector.data(), vector.size())
   // which will sometimes return a null string when vector.data() is null
   // which can only occur for vectors without inline capacity.
   // See: https://bugs.webkit.org/show_bug.cgi?id=109792
   template <wtf_size_t inlineCapacity>
   explicit String(const Vector<UChar, inlineCapacity>&);

   // Construct a string with UTF-16 data, from a null-terminated source.
   String(const UChar*);
   String(const char16_t* chars)
       : String(reinterpret_cast<const UChar*>(chars)) {}

   // Construct a string with latin1 data.
   String(const LChar* characters, unsigned length);
   String(const char* characters, unsigned length);

 #if defined(ARCH_CPU_64_BITS)
   // Only define a size_t constructor if size_t is 64 bit otherwise
   // we'd have a duplicate define.
   String(const char* characters, size_t length);
 #endif  // defined(ARCH_CPU_64_BITS)

   // Construct a string with latin1 data, from a null-terminated source.
   String(const LChar* characters)
       : String(reinterpret_cast<const char*>(characters)) {}
   String(const char* characters)
       : String(characters, characters ? strlen(characters) : 0) {}

   // Construct a string referencing an existing StringImpl.
   String(StringImpl* impl) : impl_(impl) {}
   String(scoped_refptr<StringImpl> impl) : impl_(std::move(impl)) {}

   void swap(String& o) { impl_.swap(o.impl_); }

   template <typename CharType>
   static String Adopt(StringBuffer<CharType>& buffer) {
     if (!buffer.length())
       return StringImpl::empty_;
     return String(buffer.Release());
   }

   explicit operator bool() const { return !IsNull(); }
   bool IsNull() const { return !impl_; }
   bool IsEmpty() const { return !impl_ || !impl_->length(); }

   StringImpl* Impl() const { return impl_.get(); }
   scoped_refptr<StringImpl> ReleaseImpl() { return std::move(impl_); }

   unsigned length() const {
     if (!impl_)
       return 0;
     return impl_->length();
   }

   const LChar* Characters8() const {
     if (!impl_)
       return nullptr;
     DCHECK(impl_->Is8Bit());
     return impl_->Characters8();
   }

   const UChar* Characters16() const {
     if (!impl_)
       return nullptr;
     DCHECK(!impl_->Is8Bit());
     return impl_->Characters16();
   }

   ALWAYS_INLINE const void* Bytes() const {
     if (!impl_)
       return nullptr;
     return impl_->Bytes();
   }

   // Return characters8() or characters16() depending on CharacterType.
   template <typename CharacterType>
   inline const CharacterType* GetCharacters() const;

   bool Is8Bit() const { return impl_->Is8Bit(); }

   CString Ascii() const;
   CString Latin1() const;
   CString Utf8(UTF8ConversionMode = kLenientUTF8Conversion) const;

   UChar operator[](unsigned index) const {
     if (!impl_ || index >= impl_->length())
       return 0;
     return (*impl_)[index];
   }

   static String Number(int);
   static String Number(unsigned);
   static String Number(long);
   static String Number(unsigned long);
   static String Number(long long);
   static String Number(unsigned long long);

   static String Number(double, unsigned precision = 6);

   // Number to String conversion following the ECMAScript definition.
   static String NumberToStringECMAScript(double);
   static String NumberToStringFixedWidth(double, unsigned decimal_places);

   // Find characters.
   wtf_size_t find(UChar c, unsigned start = 0) const {
     return impl_ ? impl_->Find(c, start) : kNotFound;
   }
   wtf_size_t find(LChar c, unsigned start = 0) const {
     return impl_ ? impl_->Find(c, start) : kNotFound;
   }
   wtf_size_t find(char c, unsigned start = 0) const {
     return find(static_cast<LChar>(c), start);
   }
   wtf_size_t Find(CharacterMatchFunctionPtr match_function,
                   unsigned start = 0) const {
     return impl_ ? impl_->Find(match_function, start) : kNotFound;
   }

   // Find substrings.
   wtf_size_t Find(
       const StringView& value,
       unsigned start = 0,
       TextCaseSensitivity case_sensitivity = kTextCaseSensitive) const {
     return impl_
                ? DISPATCH_CASE_OP(case_sensitivity, impl_->Find, (value, start))
                : kNotFound;
   }

   // Unicode aware case insensitive string matching. Non-ASCII characters might
   // match to ASCII characters. This function is rarely used to implement web
   // platform features.
   wtf_size_t FindIgnoringCase(const StringView& value,
                               unsigned start = 0) const {
     return impl_ ? impl_->FindIgnoringCase(value, start) : kNotFound;
   }

   // ASCII case insensitive string matching.
   wtf_size_t FindIgnoringASCIICase(const StringView& value,
                                    unsigned start = 0) const {
     return impl_ ? impl_->FindIgnoringASCIICase(value, start) : kNotFound;
   }

   bool Contains(char c) const { return find(c) != kNotFound; }
   bool Contains(
       const StringView& value,
       TextCaseSensitivity case_sensitivity = kTextCaseSensitive) const {
     return Find(value, 0, case_sensitivity) != kNotFound;
   }

   // Find the last instance of a single character or string.
   wtf_size_t ReverseFind(UChar c, unsigned start = UINT_MAX) const {
     return impl_ ? impl_->ReverseFind(c, start) : kNotFound;
   }
   wtf_size_t ReverseFind(const StringView& value,
                          unsigned start = UINT_MAX) const {
     return impl_ ? impl_->ReverseFind(value, start) : kNotFound;
   }

   UChar32 CharacterStartingAt(unsigned) const;

   bool StartsWith(
       const StringView& prefix,
       TextCaseSensitivity case_sensitivity = kTextCaseSensitive) const {
     return impl_
                ? DISPATCH_CASE_OP(case_sensitivity, impl_->StartsWith, (prefix))
                : prefix.IsEmpty();
   }
   bool StartsWithIgnoringCase(const StringView& prefix) const {
     return impl_ ? impl_->StartsWithIgnoringCase(prefix) : prefix.IsEmpty();
   }
   bool StartsWithIgnoringASCIICase(const StringView& prefix) const {
     return impl_ ? impl_->StartsWithIgnoringASCIICase(prefix)
                  : prefix.IsEmpty();
   }
   bool StartsWith(UChar character) const {
     return impl_ ? impl_->StartsWith(character) : false;
   }

   bool EndsWith(
       const StringView& suffix,
       TextCaseSensitivity case_sensitivity = kTextCaseSensitive) const {
     return impl_ ? DISPATCH_CASE_OP(case_sensitivity, impl_->EndsWith, (suffix))
                  : suffix.IsEmpty();
   }
   bool EndsWithIgnoringCase(const StringView& prefix) const {
     return impl_ ? impl_->EndsWithIgnoringCase(prefix) : prefix.IsEmpty();
   }
   bool EndsWithIgnoringASCIICase(const StringView& prefix) const {
     return impl_ ? impl_->EndsWithIgnoringASCIICase(prefix) : prefix.IsEmpty();
   }
   bool EndsWith(UChar character) const {
     return impl_ ? impl_->EndsWith(character) : false;
   }

   void append(const StringView&);
   void append(LChar);
   void append(char c) { append(static_cast<LChar>(c)); }
   void append(UChar);
   void insert(const StringView&, unsigned pos);

   // TODO(esprehn): replace strangely both modifies this String *and* return a
   // value. It should only do one of those.
   String& Replace(UChar pattern, UChar replacement) {
     if (impl_)
       impl_ = impl_->Replace(pattern, replacement);
     return *this;
   }
   String& Replace(UChar pattern, const StringView& replacement) {
     if (impl_)
       impl_ = impl_->Replace(pattern, replacement);
     return *this;
   }
   String& Replace(const StringView& pattern, const StringView& replacement) {
     if (impl_)
       impl_ = impl_->Replace(pattern, replacement);
     return *this;
   }
   String& replace(unsigned index,
                   unsigned length_to_replace,
                   const StringView& replacement) {
     if (impl_)
       impl_ = impl_->Replace(index, length_to_replace, replacement);
     return *this;
   }

   void Fill(UChar c) {
     if (impl_)
       impl_ = impl_->Fill(c);
   }

   void Ensure16Bit();

   void Truncate(unsigned length);
   void Remove(unsigned start, unsigned length = 1);

   String Substring(unsigned pos, unsigned len = UINT_MAX) const;
   String Left(unsigned len) const { return Substring(0, len); }
   String Right(unsigned len) const { return Substring(length() - len, len); }

   // Returns a lowercase version of the string. This function might convert
   // non-ASCII characters to ASCII characters. For example, DeprecatedLower()
   // for U+212A is 'k'.
   // This function is rarely used to implement web platform features. See
   // crbug.com/627682.
   // This function is deprecated. We should use LowerASCII() or introduce
   // LowerUnicode().
   String DeprecatedLower() const;

   // |locale_identifier| is case-insensitive, and accepts either of "aa_aa" or
   // "aa-aa". Empty/null |locale_identifier| indicates locale-independent
   // Unicode case conversion.
   String LowerUnicode(const AtomicString& locale_identifier) const;
   String UpperUnicode(const AtomicString& locale_identifier) const;

   // Returns a lowercase version of the string.
   // This function converts ASCII characters only.
   String LowerASCII() const;
   // Returns a uppercase version of the string.
   // This function converts ASCII characters only.
   String UpperASCII() const;

   String StripWhiteSpace() const;
   String StripWhiteSpace(IsWhiteSpaceFunctionPtr) const;
   String SimplifyWhiteSpace(StripBehavior = kStripExtraWhiteSpace) const;
   String SimplifyWhiteSpace(IsWhiteSpaceFunctionPtr,
                             StripBehavior = kStripExtraWhiteSpace) const;

   String RemoveCharacters(CharacterMatchFunctionPtr) const;
   template <bool isSpecialCharacter(UChar)>
   bool IsAllSpecialCharacters() const;

   // Return the string with case folded for case insensitive comparison.
   String FoldCase() const;

   // Takes a printf format and args and prints into a String.
   // This function supports Latin-1 characters only.
   PRINTF_FORMAT(1, 2) static String Format(const char* format, ...);

   // Returns a version suitable for gtest and base/logging.*.  It prepends and
   // appends double-quotes, and escapes characters other than ASCII printables.
   String EncodeForDebugging() const;

   // Returns an uninitialized string. The characters needs to be written
   // into the buffer returned in data before the returned string is used.
   // Failure to do this will have unpredictable results.
   static String CreateUninitialized(unsigned length, UChar*& data) {
     return StringImpl::CreateUninitialized(length, data);
   }
   static String CreateUninitialized(unsigned length, LChar*& data) {
     return StringImpl::CreateUninitialized(length, data);
   }

   void Split(const StringView& separator,
              bool allow_empty_entries,
              Vector<String>& result) const;
   void Split(const StringView& separator, Vector<String>& result) const {
     Split(separator, false, result);
   }
   void Split(UChar separator,
              bool allow_empty_entries,
              Vector<String>& result) const;
   void Split(UChar separator, Vector<String>& result) const {
     Split(separator, false, result);
   }

   // Copy characters out of the string. See StringImpl.h for detailed docs.
   unsigned CopyTo(UChar* buffer, unsigned start, unsigned max_length) const {
     return impl_ ? impl_->CopyTo(buffer, start, max_length) : 0;
   }
   template <typename BufferType>
   void AppendTo(BufferType&,
                 unsigned start = 0,
                 unsigned length = UINT_MAX) const;
   template <typename BufferType>
   void PrependTo(BufferType&,
                  unsigned start = 0,
                  unsigned length = UINT_MAX) const;

   // Convert the string into a number.

   // The following ToFooStrict functions accept:
   //  - leading '+'
   //  - leading Unicode whitespace
   //  - trailing Unicode whitespace
   //  - no "-0" (ToUIntStrict and ToUInt64Strict)
   //  - no out-of-range numbers which the resultant type can't represent
   //
   // If the input string is not acceptable, 0 is returned and |*ok| becomes
   // |false|.
   //
   // We can use these functions to implement a Web Platform feature only if the
   // input string is already valid according to the specification of the
   // feature.
   int ToIntStrict(bool* ok = nullptr) const;
   unsigned ToUIntStrict(bool* ok = nullptr) const;
   unsigned HexToUIntStrict(bool* ok) const;
   int64_t ToInt64Strict(bool* ok = nullptr) const;
   uint64_t ToUInt64Strict(bool* ok = nullptr) const;

   // The following ToFoo functions accept:
   //  - leading '+'
   //  - leading Unicode whitespace
   //  - trailing garbage
   //  - no "-0" (ToUInt and ToUInt64)
   //  - no out-of-range numbers which the resultant type can't represent
   //
   // If the input string is not acceptable, 0 is returned and |*ok| becomes
   // |false|.
   //
   // We can use these functions to implement a Web Platform feature only if the
   // input string is already valid according to the specification of the
   // feature.
   int ToInt(bool* ok = nullptr) const;
   unsigned ToUInt(bool* ok = nullptr) const;

   // These functions accepts:
   //  - leading '+'
   //  - numbers without leading zeros such as ".5"
   //  - numbers ending with "." such as "3."
   //  - scientific notation
   //  - leading whitespace (IsASCIISpace, not IsHTMLSpace)
   //  - no trailing whitespace
   //  - no trailing garbage
   //  - no numbers such as "NaN" "Infinity"
   //
   // A huge absolute number which a double/float can't represent is accepted,
   // and +Infinity or -Infinity is returned.
   //
   // A small absolute numbers which a double/float can't represent is accepted,
   // and 0 is returned
   //
   // If the input string is not acceptable, 0.0 is returned and |*ok| becomes
   // |false|.
   //
   // We can use these functions to implement a Web Platform feature only if the
   // input string is already valid according to the specification of the
   // feature.
   //
   // FIXME: Like the strict functions above, these give false for "ok" when
   // there is trailing garbage.  Like the non-strict functions above, these
   // return the value when there is trailing garbage.  It would be better if
   // these were more consistent with the above functions instead.
   double ToDouble(bool* ok = nullptr) const;
   float ToFloat(bool* ok = nullptr) const;

   String IsolatedCopy() const;
   bool IsSafeToSendToAnotherThread() const;

 #ifdef __OBJC__
   String(NSString*);

   // This conversion maps null string to "", which loses the meaning of null
   // string, but we need this mapping because AppKit crashes when passed nil
   // NSStrings.
   operator NSString*() const {
     if (!impl_)
       return @"";
     return *impl_;
   }
 #endif

   static String Make8BitFrom16BitSource(const UChar*, wtf_size_t);
   template <wtf_size_t inlineCapacity>
   static String Make8BitFrom16BitSource(
       const Vector<UChar, inlineCapacity>& buffer) {
     return Make8BitFrom16BitSource(buffer.data(), buffer.size());
   }

   static String Make16BitFrom8BitSource(const LChar*, wtf_size_t);

   // String::fromUTF8 will return a null string if
   // the input data contains invalid UTF-8 sequences.
   static String FromUTF8(const LChar*, size_t);
   static String FromUTF8(const LChar*);
   static String FromUTF8(const char* s, size_t length) {
     return FromUTF8(reinterpret_cast<const LChar*>(s), length);
   }
   static String FromUTF8(const char* s) {
     return FromUTF8(reinterpret_cast<const LChar*>(s));
   }
   static String FromUTF8(const CString&);

   // Tries to convert the passed in string to UTF-8, but will fall back to
   // Latin-1 if the string is not valid UTF-8.
   static String FromUTF8WithLatin1Fallback(const LChar*, size_t);
   static String FromUTF8WithLatin1Fallback(const char* s, size_t length) {
     return FromUTF8WithLatin1Fallback(reinterpret_cast<const LChar*>(s),
                                       length);
   }

   bool ContainsOnlyASCII() const {
     return !impl_ || impl_->ContainsOnlyASCII();
   }
   bool ContainsOnlyLatin1() const;
   bool ContainsOnlyWhitespace() const {
     return !impl_ || impl_->ContainsOnlyWhitespace();
   }

   size_t CharactersSizeInBytes() const {
     return impl_ ? impl_->CharactersSizeInBytes() : 0;
   }

 #ifndef NDEBUG
   // For use in the debugger.
   void Show() const;
 #endif

  private:
   friend struct HashTraits<String>;

   template <typename CharacterType>
   void AppendInternal(CharacterType);

   scoped_refptr<StringImpl> impl_;
 };

 #undef DISPATCH_CASE_OP

 inline bool operator==(const String& a, const String& b) {
   // We don't use equalStringView here since we want the isAtomic() fast path
   // inside WTF::equal.
   return Equal(a.Impl(), b.Impl());
 }
 inline bool operator==(const String& a, const char* b) {
   return EqualStringView(a, b);
 }
 inline bool operator==(const char* a, const String& b) {
   return b == a;
 }

 inline bool operator!=(const String& a, const String& b) {
   return !(a == b);
 }
 inline bool operator!=(const String& a, const char* b) {
   return !(a == b);
 }
 inline bool operator!=(const char* a, const String& b) {
   return !(a == b);
 }

 inline bool EqualIgnoringNullity(const String& a, const String& b) {
   return EqualIgnoringNullity(a.Impl(), b.Impl());
 }

 template <wtf_size_t inlineCapacity>
 inline bool EqualIgnoringNullity(const Vector<UChar, inlineCapacity>& a,
                                  const String& b) {
   return EqualIgnoringNullity(a, b.Impl());
 }

 inline void swap(String& a, String& b) {
   a.swap(b);
 }

 // Definitions of string operations

 template <wtf_size_t inlineCapacity>
 String::String(const Vector<UChar, inlineCapacity>& vector)
     : impl_(vector.size() ? StringImpl::Create(vector.data(), vector.size())
                           : StringImpl::empty_) {}

 template <>
 inline const LChar* String::GetCharacters<LChar>() const {
   DCHECK(Is8Bit());
   return Characters8();
 }

 template <>
 inline const UChar* String::GetCharacters<UChar>() const {
   DCHECK(!Is8Bit());
   return Characters16();
 }

 inline bool String::ContainsOnlyLatin1() const {
   if (IsEmpty())
     return true;

   if (Is8Bit())
     return true;

   const UChar* characters = Characters16();
   UChar ored = 0;
   for (wtf_size_t i = 0; i < impl_->length(); ++i)
     ored |= characters[i];
   return !(ored & 0xFF00);
 }

 #ifdef __OBJC__
 // This is for situations in WebKit where the long standing behavior has been
 // "nil if empty", so we try to maintain longstanding behavior for the sake of
 // entrenched clients
 inline NSString* NsStringNilIfEmpty(const String& str) {
   return str.IsEmpty() ? nil : (NSString*)str;
 }
 #endif

 WTF_EXPORT int CodePointCompare(const String&, const String&);

 inline bool CodePointCompareLessThan(const String& a, const String& b) {
   return CodePointCompare(a.Impl(), b.Impl()) < 0;
 }

 WTF_EXPORT int CodePointCompareIgnoringASCIICase(const String&, const char*);

 template <bool isSpecialCharacter(UChar)>
 inline bool String::IsAllSpecialCharacters() const {
   return StringView(*this).IsAllSpecialCharacters<isSpecialCharacter>();
 }

 template <typename BufferType>
 void String::AppendTo(BufferType& result,
                       unsigned position,
                       unsigned length) const {
   if (!impl_)
     return;
   impl_->AppendTo(result, position, length);
 }

 template <typename BufferType>
 void String::PrependTo(BufferType& result,
                        unsigned position,
                        unsigned length) const {
   if (!impl_)
     return;
   impl_->PrependTo(result, position, length);
 }

 // StringHash is the default hash for String
 template <typename T>
 struct DefaultHash;
 template <>
 struct DefaultHash<String> {
   typedef StringHash Hash;
 };

 // Shared global empty string.
 WTF_EXPORT extern const String& g_empty_string;
 WTF_EXPORT extern const String& g_empty_string16_bit;
 WTF_EXPORT extern const String& g_xmlns_with_colon;

 // Pretty printer for gtest and base/logging.*.  It prepends and appends
 // double-quotes, and escapes characters other than ASCII printables.
 WTF_EXPORT std::ostream& operator<<(std::ostream&, const String&);

 inline StringView::StringView(const String& string,
                               unsigned offset,
                               unsigned length)
     : StringView(string.Impl(), offset, length) {}
 inline StringView::StringView(const String& string, unsigned offset)
     : StringView(string.Impl(), offset) {}
 inline StringView::StringView(const String& string)
     : StringView(string.Impl()) {}

 }  // namespace WTF

 WTF_ALLOW_MOVE_AND_INIT_WITH_MEM_FUNCTIONS(String);

 using WTF::CString;
 using WTF::kStrictUTF8Conversion;
 using WTF::kStrictUTF8ConversionReplacingUnpairedSurrogatesWithFFFD;
 using WTF::String;
 using WTF::g_empty_string;
 using WTF::g_empty_string16_bit;
 using WTF::Equal;
 using WTF::Find;
 using WTF::IsSpaceOrNewline;

 #include "third_party/blink/renderer/platform/wtf/text/atomic_string.h"
 #endif  // THIRD_PARTY_BLINK_RENDERER_PLATFORM_WTF_TEXT_WTF_STRING_H_