blob: 354001358ba11f6bde98c02a52e155ac3e3f3d57 [file] [log] [blame]
/*
* (C) 1999 Lars Knoll (knoll@kde.org)
* Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012, 2013 Apple Inc.
* All rights reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#ifndef WTFString_h
#define WTFString_h
// This file would be called String.h, but that conflicts with <string.h>
// on systems without case-sensitive file systems.
#include "wtf/Allocator.h"
#include "wtf/HashTableDeletedValueType.h"
#include "wtf/WTFExport.h"
#include "wtf/text/ASCIIFastPath.h"
#include "wtf/text/StringImpl.h"
#include "wtf/text/StringView.h"
#include <algorithm>
#include <iosfwd>
#ifdef __OBJC__
#include <objc/objc.h>
#endif
namespace WTF {
class CString;
struct StringHash;
enum UTF8ConversionMode {
LenientUTF8Conversion,
StrictUTF8Conversion,
StrictUTF8ConversionReplacingUnpairedSurrogatesWithFFFD
};
#define DISPATCH_CASE_OP(caseSensitivity, op, args) \
((caseSensitivity == TextCaseSensitive) \
? op args \
: (caseSensitivity == TextCaseASCIIInsensitive) \
? op##IgnoringASCIICase args \
: op##IgnoringCase args)
template <bool isSpecialCharacter(UChar), typename CharacterType>
bool isAllSpecialCharacters(const CharacterType*, size_t);
// You can find documentation about this class in this doc:
// https://docs.google.com/document/d/1kOCUlJdh2WJMJGDf-WoEQhmnjKLaOYRbiHz5TiGJl14/edit?usp=sharing
class WTF_EXPORT String {
USING_FAST_MALLOC(String);
public:
// Construct a null string, distinguishable from an empty string.
String() {}
// Construct a string with UTF-16 data.
String(const UChar* characters, unsigned length);
// Construct a string by copying the contents of a vector.
// This method will never create a null string. Vectors with size() == 0
// will return the empty string.
// NOTE: This is different from String(vector.data(), vector.size())
// which will sometimes return a null string when vector.data() is null
// which can only occur for vectors without inline capacity.
// See: https://bugs.webkit.org/show_bug.cgi?id=109792
template <size_t inlineCapacity>
explicit String(const Vector<UChar, inlineCapacity>&);
// Construct a string with UTF-16 data, from a null-terminated source.
String(const UChar*);
// Construct a string with latin1 data.
String(const LChar* characters, unsigned length);
String(const char* characters, unsigned length);
// Construct a string with latin1 data, from a null-terminated source.
String(const LChar* characters);
String(const char* characters);
// Construct a string referencing an existing StringImpl.
String(StringImpl* impl) : m_impl(impl) {}
String(PassRefPtr<StringImpl> impl) : m_impl(impl) {}
void swap(String& o) { m_impl.swap(o.m_impl); }
template <typename CharType>
static String adopt(StringBuffer<CharType>& buffer) {
if (!buffer.length())
return StringImpl::empty();
return String(buffer.release());
}
bool isNull() const { return !m_impl; }
bool isEmpty() const { return !m_impl || !m_impl->length(); }
StringImpl* impl() const { return m_impl.get(); }
PassRefPtr<StringImpl> releaseImpl() { return m_impl.release(); }
unsigned length() const {
if (!m_impl)
return 0;
return m_impl->length();
}
const LChar* characters8() const {
if (!m_impl)
return 0;
ASSERT(m_impl->is8Bit());
return m_impl->characters8();
}
const UChar* characters16() const {
if (!m_impl)
return 0;
ASSERT(!m_impl->is8Bit());
return m_impl->characters16();
}
// Return characters8() or characters16() depending on CharacterType.
template <typename CharacterType>
inline const CharacterType* getCharacters() const;
bool is8Bit() const { return m_impl->is8Bit(); }
CString ascii() const;
CString latin1() const;
CString utf8(UTF8ConversionMode = LenientUTF8Conversion) const;
UChar operator[](unsigned index) const {
if (!m_impl || index >= m_impl->length())
return 0;
return (*m_impl)[index];
}
static String number(int);
static String number(unsigned);
static String number(long);
static String number(unsigned long);
static String number(long long);
static String number(unsigned long long);
static String number(double, unsigned precision = 6);
// Number to String conversion following the ECMAScript definition.
static String numberToStringECMAScript(double);
static String numberToStringFixedWidth(double, unsigned decimalPlaces);
// Find characters.
size_t find(UChar c, unsigned start = 0) const {
return m_impl ? m_impl->find(c, start) : kNotFound;
}
size_t find(LChar c, unsigned start = 0) const {
return m_impl ? m_impl->find(c, start) : kNotFound;
}
size_t find(char c, unsigned start = 0) const {
return find(static_cast<LChar>(c), start);
}
size_t find(CharacterMatchFunctionPtr matchFunction,
unsigned start = 0) const {
return m_impl ? m_impl->find(matchFunction, start) : kNotFound;
}
// Find substrings.
size_t find(const StringView& value,
unsigned start = 0,
TextCaseSensitivity caseSensitivity = TextCaseSensitive) const {
return m_impl
? DISPATCH_CASE_OP(caseSensitivity, m_impl->find, (value, start))
: kNotFound;
}
// Unicode aware case insensitive string matching.
size_t findIgnoringCase(const StringView& value, unsigned start = 0) const {
return m_impl ? m_impl->findIgnoringCase(value, start) : kNotFound;
}
// ASCII case insensitive string matching.
size_t findIgnoringASCIICase(const StringView& value,
unsigned start = 0) const {
return m_impl ? m_impl->findIgnoringASCIICase(value, start) : kNotFound;
}
bool contains(char c) const { return find(c) != kNotFound; }
bool contains(const StringView& value,
TextCaseSensitivity caseSensitivity = TextCaseSensitive) const {
return find(value, 0, caseSensitivity) != kNotFound;
}
// Find the last instance of a single character or string.
size_t reverseFind(UChar c, unsigned start = UINT_MAX) const {
return m_impl ? m_impl->reverseFind(c, start) : kNotFound;
}
size_t reverseFind(const StringView& value, unsigned start = UINT_MAX) const {
return m_impl ? m_impl->reverseFind(value, start) : kNotFound;
}
UChar32 characterStartingAt(unsigned) const;
bool startsWith(
const StringView& prefix,
TextCaseSensitivity caseSensitivity = TextCaseSensitive) const {
return m_impl
? DISPATCH_CASE_OP(caseSensitivity, m_impl->startsWith, (prefix))
: prefix.isEmpty();
}
bool startsWith(UChar character) const {
return m_impl ? m_impl->startsWith(character) : false;
}
bool endsWith(const StringView& suffix,
TextCaseSensitivity caseSensitivity = TextCaseSensitive) const {
return m_impl
? DISPATCH_CASE_OP(caseSensitivity, m_impl->endsWith, (suffix))
: suffix.isEmpty();
}
bool endsWith(UChar character) const {
return m_impl ? m_impl->endsWith(character) : false;
}
void append(const StringView&);
void append(LChar);
void append(char c) { append(static_cast<LChar>(c)); }
void append(UChar);
void insert(const StringView&, unsigned pos);
// TODO(esprehn): replace strangely both modifies this String *and* return a
// value. It should only do one of those.
String& replace(UChar pattern, UChar replacement) {
if (m_impl)
m_impl = m_impl->replace(pattern, replacement);
return *this;
}
String& replace(UChar pattern, const StringView& replacement) {
if (m_impl)
m_impl = m_impl->replace(pattern, replacement);
return *this;
}
String& replace(const StringView& pattern, const StringView& replacement) {
if (m_impl)
m_impl = m_impl->replace(pattern, replacement);
return *this;
}
String& replace(unsigned index,
unsigned lengthToReplace,
const StringView& replacement) {
if (m_impl)
m_impl = m_impl->replace(index, lengthToReplace, replacement);
return *this;
}
void fill(UChar c) {
if (m_impl)
m_impl = m_impl->fill(c);
}
void ensure16Bit();
void truncate(unsigned length);
void remove(unsigned start, unsigned length = 1);
String substring(unsigned pos, unsigned len = UINT_MAX) const;
String left(unsigned len) const { return substring(0, len); }
String right(unsigned len) const { return substring(length() - len, len); }
// Returns a lowercase/uppercase version of the string
String lower() const;
String upper() const;
String lower(const AtomicString& localeIdentifier) const;
String upper(const AtomicString& localeIdentifier) const;
String stripWhiteSpace() const;
String stripWhiteSpace(IsWhiteSpaceFunctionPtr) const;
String simplifyWhiteSpace(StripBehavior = StripExtraWhiteSpace) const;
String simplifyWhiteSpace(IsWhiteSpaceFunctionPtr,
StripBehavior = StripExtraWhiteSpace) const;
String removeCharacters(CharacterMatchFunctionPtr) const;
template <bool isSpecialCharacter(UChar)>
bool isAllSpecialCharacters() const;
// Return the string with case folded for case insensitive comparison.
String foldCase() const;
static String format(const char*, ...) WTF_ATTRIBUTE_PRINTF(1, 2);
// Returns an uninitialized string. The characters needs to be written
// into the buffer returned in data before the returned string is used.
// Failure to do this will have unpredictable results.
static String createUninitialized(unsigned length, UChar*& data) {
return StringImpl::createUninitialized(length, data);
}
static String createUninitialized(unsigned length, LChar*& data) {
return StringImpl::createUninitialized(length, data);
}
void split(const String& separator,
bool allowEmptyEntries,
Vector<String>& result) const;
void split(const String& separator, Vector<String>& result) const {
split(separator, false, result);
}
void split(UChar separator,
bool allowEmptyEntries,
Vector<String>& result) const;
void split(UChar separator, Vector<String>& result) const {
split(separator, false, result);
}
// Copy characters out of the string. See StringImpl.h for detailed docs.
unsigned copyTo(UChar* buffer, unsigned start, unsigned maxLength) const {
return m_impl ? m_impl->copyTo(buffer, start, maxLength) : 0;
}
template <typename BufferType>
void appendTo(BufferType&,
unsigned start = 0,
unsigned length = UINT_MAX) const;
template <typename BufferType>
void prependTo(BufferType&,
unsigned start = 0,
unsigned length = UINT_MAX) const;
// Convert the string into a number.
int toIntStrict(bool* ok = 0, int base = 10) const;
unsigned toUIntStrict(bool* ok = 0, int base = 10) const;
int64_t toInt64Strict(bool* ok = 0, int base = 10) const;
uint64_t toUInt64Strict(bool* ok = 0, int base = 10) const;
int toInt(bool* ok = 0) const;
unsigned toUInt(bool* ok = 0) const;
int64_t toInt64(bool* ok = 0) const;
uint64_t toUInt64(bool* ok = 0) const;
// FIXME: Like the strict functions above, these give false for "ok" when
// there is trailing garbage. Like the non-strict functions above, these
// return the value when there is trailing garbage. It would be better if
// these were more consistent with the above functions instead.
double toDouble(bool* ok = 0) const;
float toFloat(bool* ok = 0) const;
String isolatedCopy() const;
bool isSafeToSendToAnotherThread() const;
#ifdef __OBJC__
String(NSString*);
// This conversion maps null string to "", which loses the meaning of null
// string, but we need this mapping because AppKit crashes when passed nil
// NSStrings.
operator NSString*() const {
if (!m_impl)
return @"";
return *m_impl;
}
#endif
static String make8BitFrom16BitSource(const UChar*, size_t);
template <size_t inlineCapacity>
static String make8BitFrom16BitSource(
const Vector<UChar, inlineCapacity>& buffer) {
return make8BitFrom16BitSource(buffer.data(), buffer.size());
}
static String make16BitFrom8BitSource(const LChar*, size_t);
// String::fromUTF8 will return a null string if
// the input data contains invalid UTF-8 sequences.
static String fromUTF8(const LChar*, size_t);
static String fromUTF8(const LChar*);
static String fromUTF8(const char* s, size_t length) {
return fromUTF8(reinterpret_cast<const LChar*>(s), length);
}
static String fromUTF8(const char* s) {
return fromUTF8(reinterpret_cast<const LChar*>(s));
}
static String fromUTF8(const CString&);
// Tries to convert the passed in string to UTF-8, but will fall back to
// Latin-1 if the string is not valid UTF-8.
static String fromUTF8WithLatin1Fallback(const LChar*, size_t);
static String fromUTF8WithLatin1Fallback(const char* s, size_t length) {
return fromUTF8WithLatin1Fallback(reinterpret_cast<const LChar*>(s),
length);
}
bool containsOnlyASCII() const;
bool containsOnlyLatin1() const;
bool containsOnlyWhitespace() const {
return !m_impl || m_impl->containsOnlyWhitespace();
}
size_t charactersSizeInBytes() const {
return m_impl ? m_impl->charactersSizeInBytes() : 0;
}
// Hash table deleted values, which are only constructed and never copied or
// destroyed.
String(WTF::HashTableDeletedValueType) : m_impl(WTF::HashTableDeletedValue) {}
bool isHashTableDeletedValue() const {
return m_impl.isHashTableDeletedValue();
}
#ifndef NDEBUG
void show() const;
#endif
private:
typedef struct ImplicitConversionFromWTFStringToBoolDisallowed*(
String::*UnspecifiedBoolType);
operator UnspecifiedBoolType() const;
template <typename CharacterType>
void appendInternal(CharacterType);
RefPtr<StringImpl> m_impl;
};
#undef DISPATCH_CASE_OP
inline bool operator==(const String& a, const String& b) {
return equal(a.impl(), b.impl());
}
inline bool operator==(const String& a, const LChar* b) {
return equal(a.impl(), b);
}
inline bool operator==(const String& a, const char* b) {
return equal(a.impl(), reinterpret_cast<const LChar*>(b));
}
inline bool operator==(const LChar* a, const String& b) {
return equal(a, b.impl());
}
inline bool operator==(const char* a, const String& b) {
return equal(reinterpret_cast<const LChar*>(a), b.impl());
}
template <size_t inlineCapacity>
inline bool operator==(const Vector<char, inlineCapacity>& a, const String& b) {
return equal(b.impl(), a.data(), a.size());
}
template <size_t inlineCapacity>
inline bool operator==(const String& a, const Vector<char, inlineCapacity>& b) {
return b == a;
}
inline bool operator!=(const String& a, const String& b) {
return !equal(a.impl(), b.impl());
}
inline bool operator!=(const String& a, const LChar* b) {
return !equal(a.impl(), b);
}
inline bool operator!=(const String& a, const char* b) {
return !equal(a.impl(), reinterpret_cast<const LChar*>(b));
}
inline bool operator!=(const LChar* a, const String& b) {
return !equal(a, b.impl());
}
inline bool operator!=(const char* a, const String& b) {
return !equal(reinterpret_cast<const LChar*>(a), b.impl());
}
template <size_t inlineCapacity>
inline bool operator!=(const Vector<char, inlineCapacity>& a, const String& b) {
return !(a == b);
}
template <size_t inlineCapacity>
inline bool operator!=(const String& a, const Vector<char, inlineCapacity>& b) {
return b != a;
}
inline bool equalPossiblyIgnoringCase(const String& a,
const String& b,
bool ignoreCase) {
return ignoreCase ? equalIgnoringCase(a, b) : (a == b);
}
inline bool equalIgnoringNullity(const String& a, const String& b) {
return equalIgnoringNullity(a.impl(), b.impl());
}
template <size_t inlineCapacity>
inline bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a,
const String& b) {
return equalIgnoringNullity(a, b.impl());
}
inline bool operator!(const String& str) {
return str.isNull();
}
inline void swap(String& a, String& b) {
a.swap(b);
}
// Definitions of string operations
template <size_t inlineCapacity>
String::String(const Vector<UChar, inlineCapacity>& vector)
: m_impl(vector.size() ? StringImpl::create(vector.data(), vector.size())
: StringImpl::empty()) {}
template <>
inline const LChar* String::getCharacters<LChar>() const {
ASSERT(is8Bit());
return characters8();
}
template <>
inline const UChar* String::getCharacters<UChar>() const {
ASSERT(!is8Bit());
return characters16();
}
inline bool String::containsOnlyLatin1() const {
if (isEmpty())
return true;
if (is8Bit())
return true;
const UChar* characters = characters16();
UChar ored = 0;
for (size_t i = 0; i < m_impl->length(); ++i)
ored |= characters[i];
return !(ored & 0xFF00);
}
#ifdef __OBJC__
// This is for situations in WebKit where the long standing behavior has been
// "nil if empty", so we try to maintain longstanding behavior for the sake of
// entrenched clients
inline NSString* nsStringNilIfEmpty(const String& str) {
return str.isEmpty() ? nil : (NSString*)str;
}
#endif
inline bool String::containsOnlyASCII() const {
if (isEmpty())
return true;
if (is8Bit())
return charactersAreAllASCII(characters8(), m_impl->length());
return charactersAreAllASCII(characters16(), m_impl->length());
}
WTF_EXPORT int codePointCompare(const String&, const String&);
inline bool codePointCompareLessThan(const String& a, const String& b) {
return codePointCompare(a.impl(), b.impl()) < 0;
}
WTF_EXPORT int codePointCompareIgnoringASCIICase(const String&, const char*);
template <bool isSpecialCharacter(UChar), typename CharacterType>
inline bool isAllSpecialCharacters(const CharacterType* characters,
size_t length) {
for (size_t i = 0; i < length; ++i) {
if (!isSpecialCharacter(characters[i]))
return false;
}
return true;
}
template <bool isSpecialCharacter(UChar)>
inline bool String::isAllSpecialCharacters() const {
size_t len = length();
if (!len)
return true;
if (is8Bit())
return WTF::isAllSpecialCharacters<isSpecialCharacter, LChar>(characters8(),
len);
return WTF::isAllSpecialCharacters<isSpecialCharacter, UChar>(characters16(),
len);
}
template <typename BufferType>
void String::appendTo(BufferType& result,
unsigned position,
unsigned length) const {
if (!m_impl)
return;
m_impl->appendTo(result, position, length);
}
template <typename BufferType>
void String::prependTo(BufferType& result,
unsigned position,
unsigned length) const {
if (!m_impl)
return;
m_impl->prependTo(result, position, length);
}
// StringHash is the default hash for String
template <typename T>
struct DefaultHash;
template <>
struct DefaultHash<String> {
typedef StringHash Hash;
};
// Shared global empty string.
WTF_EXPORT const String& emptyString();
WTF_EXPORT const String& emptyString16Bit();
WTF_EXPORT extern const String& xmlnsWithColon;
// Pretty printer for gtest and base/logging.*. It prepends and appends
// double-quotes, and escapes chracters other than ASCII printables.
WTF_EXPORT std::ostream& operator<<(std::ostream&, const String&);
inline StringView::StringView(const String& string,
unsigned offset,
unsigned length)
: StringView(string.impl(), offset, length) {}
inline StringView::StringView(const String& string, unsigned offset)
: StringView(string.impl(), offset) {}
inline StringView::StringView(const String& string)
: StringView(string.impl()) {}
} // namespace WTF
WTF_ALLOW_MOVE_AND_INIT_WITH_MEM_FUNCTIONS(String);
using WTF::CString;
using WTF::StrictUTF8Conversion;
using WTF::StrictUTF8ConversionReplacingUnpairedSurrogatesWithFFFD;
using WTF::String;
using WTF::emptyString;
using WTF::emptyString16Bit;
using WTF::charactersAreAllASCII;
using WTF::equal;
using WTF::find;
using WTF::isAllSpecialCharacters;
using WTF::isSpaceOrNewline;
#include "wtf/text/AtomicString.h"
#endif // WTFString_h