blob: f0ccecceeacb1396b999b865e7487fca8968c647 [file] [log] [blame]
/*
* Copyright (C) 2014 Google Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Google Inc. nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_CHARACTER_H_
#define THIRD_PARTY_BLINK_RENDERER_PLATFORM_TEXT_CHARACTER_H_
#include "third_party/blink/renderer/platform/platform_export.h"
#include "third_party/blink/renderer/platform/text/character_property.h"
#include "third_party/blink/renderer/platform/text/text_direction.h"
#include "third_party/blink/renderer/platform/text/text_run.h"
#include "third_party/blink/renderer/platform/wtf/allocator.h"
#include "third_party/blink/renderer/platform/wtf/ascii_ctype.h"
#include "third_party/blink/renderer/platform/wtf/hash_set.h"
#include "third_party/blink/renderer/platform/wtf/text/character_names.h"
#include "third_party/blink/renderer/platform/wtf/text/wtf_string.h"
namespace blink {
class PLATFORM_EXPORT Character {
STATIC_ONLY(Character);
public:
static inline bool IsInRange(UChar32 character,
UChar32 lower_bound,
UChar32 upper_bound) {
return character >= lower_bound && character <= upper_bound;
}
static inline bool IsUnicodeVariationSelector(UChar32 character) {
// http://www.unicode.org/Public/UCD/latest/ucd/StandardizedVariants.html
return IsInRange(character, 0x180B,
0x180D) // MONGOLIAN FREE VARIATION SELECTOR ONE to THREE
||
IsInRange(character, 0xFE00, 0xFE0F) // VARIATION SELECTOR-1 to 16
|| IsInRange(character, 0xE0100,
0xE01EF); // VARIATION SELECTOR-17 to 256
}
static bool IsCJKIdeographOrSymbol(UChar32 c) {
// Below U+02C7 is likely a common case.
return c < 0x2C7 ? false : IsCJKIdeographOrSymbolSlow(c);
}
static bool IsCJKIdeographOrSymbolBase(UChar32 c) {
return IsCJKIdeographOrSymbol(c) &&
!(U_GET_GC_MASK(c) & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_SK_MASK));
}
static bool IsHangul(UChar32 c) {
// Below U+1100 is likely a common case.
return c < 0x1100 ? false : IsHangulSlow(c);
}
static unsigned ExpansionOpportunityCount(const LChar*,
unsigned length,
TextDirection,
bool& is_after_expansion,
const TextJustify);
static unsigned ExpansionOpportunityCount(const UChar*,
unsigned length,
TextDirection,
bool& is_after_expansion,
const TextJustify);
static unsigned ExpansionOpportunityCount(const TextRun& run,
bool& is_after_expansion) {
if (run.Is8Bit())
return ExpansionOpportunityCount(run.Characters8(), run.length(),
run.Direction(), is_after_expansion,
run.GetTextJustify());
return ExpansionOpportunityCount(run.Characters16(), run.length(),
run.Direction(), is_after_expansion,
run.GetTextJustify());
}
static bool IsUprightInMixedVertical(UChar32 character);
// https://html.spec.whatwg.org/multipage/scripting.html#prod-potentialcustomelementname
static bool IsPotentialCustomElementName8BitChar(LChar ch) {
return IsASCIILower(ch) || IsASCIIDigit(ch) || ch == '-' || ch == '.' ||
ch == '_' || ch == 0xb7 || (0xc0 <= ch && ch != 0xd7 && ch != 0xf7);
}
static bool IsPotentialCustomElementNameChar(UChar32 character);
// http://unicode.org/reports/tr9/#Directional_Formatting_Characters
static bool IsBidiControl(UChar32 character);
static bool TreatAsSpace(UChar32 c) {
return c == kSpaceCharacter || c == kTabulationCharacter ||
c == kNewlineCharacter || c == kNoBreakSpaceCharacter;
}
static bool TreatAsZeroWidthSpace(UChar32 c) {
return TreatAsZeroWidthSpaceInComplexScript(c) ||
c == kZeroWidthNonJoinerCharacter || c == kZeroWidthJoinerCharacter;
}
static bool LegacyTreatAsZeroWidthSpaceInComplexScript(UChar32 c) {
return c < 0x20 // ASCII Control Characters
||
(c >= 0x7F && c < 0xA0) // ASCII Delete .. No-break spaceCharacter
|| TreatAsZeroWidthSpaceInComplexScript(c);
}
static bool TreatAsZeroWidthSpaceInComplexScript(UChar32 c) {
return c == kFormFeedCharacter || c == kCarriageReturnCharacter ||
c == kSoftHyphenCharacter || c == kZeroWidthSpaceCharacter ||
(c >= kLeftToRightMarkCharacter && c <= kRightToLeftMarkCharacter) ||
(c >= kLeftToRightEmbedCharacter &&
c <= kRightToLeftOverrideCharacter) ||
c == kZeroWidthNoBreakSpaceCharacter ||
c == kObjectReplacementCharacter;
}
static bool CanTextDecorationSkipInk(UChar32);
static bool CanReceiveTextEmphasis(UChar32);
static bool IsGraphemeExtended(UChar32 c) {
// http://unicode.org/reports/tr29/#Extend
return u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND);
}
// Returns true if the character has a Emoji property.
// See http://www.unicode.org/Public/emoji/3.0/emoji-data.txt
static bool IsEmoji(UChar32);
// Default presentation style according to:
// http://www.unicode.org/reports/tr51/#Presentation_Style
static bool IsEmojiTextDefault(UChar32);
static bool IsEmojiEmojiDefault(UChar32);
static bool IsEmojiModifierBase(UChar32);
static bool IsEmojiKeycapBase(UChar32);
static bool IsRegionalIndicator(UChar32);
static bool IsModifier(UChar32 c) { return c >= 0x1F3FB && c <= 0x1F3FF; }
// http://www.unicode.org/reports/tr51/proposed.html#flag-emoji-tag-sequences
static bool IsEmojiFlagSequenceTag(UChar32);
static inline UChar NormalizeSpaces(UChar character) {
if (TreatAsSpace(character))
return kSpaceCharacter;
if (TreatAsZeroWidthSpace(character))
return kZeroWidthSpaceCharacter;
return character;
}
static inline bool IsNormalizedCanvasSpaceCharacter(UChar32 c) {
// According to specification all space characters should be replaced with
// 0x0020 space character.
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-canvas-element.html#text-preparation-algorithm
// The space characters according to specification are : U+0020, U+0009,
// U+000A, U+000C, and U+000D.
// http://www.whatwg.org/specs/web-apps/current-work/multipage/common-microsyntaxes.html#space-character
// This function returns true for 0x000B also, so that this is backward
// compatible. Otherwise, the test
// web_tests/canvas/philip/tests/2d.text.draw.space.collapse.space.html
// will fail
return c == 0x0009 || (c >= 0x000A && c <= 0x000D);
}
static String NormalizeSpaces(const LChar*, unsigned length);
static String NormalizeSpaces(const UChar*, unsigned length);
static bool IsCommonOrInheritedScript(UChar32);
static bool IsPrivateUse(UChar32);
static bool IsNonCharacter(UChar32);
// Returns whether a script code could be determined for the given character
// and that script code is not USCRIPT_COMMON or USCRIPT_INHERITED.
static bool HasDefiniteScript(UChar32);
static bool IsModernGeorgianUppercase(UChar32 c) {
return IsInRange(c, 0x1C90, 0x1CBF);
}
// Map modern secular Georgian uppercase letters added in Unicode
// 11.0 to their corresponding lowercase letters.
// https://www.unicode.org/charts/PDF/U10A0.pdf
// https://www.unicode.org/charts/PDF/U1C90.pdf
static UChar32 LowercaseModernGeorgianUppercase(UChar32 c) {
return (c - (0x1C90 - 0x10D0));
}
private:
static bool IsCJKIdeographOrSymbolSlow(UChar32);
static bool IsHangulSlow(UChar32);
};
} // namespace blink
#endif