| /* |
| * Copyright (C) 2011 Google Inc. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are |
| * met: |
| * |
| * * Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * * Redistributions in binary form must reproduce the above |
| * copyright notice, this list of conditions and the following disclaimer |
| * in the documentation and/or other materials provided with the |
| * distribution. |
| * * Neither the name of Google Inc. nor the names of its |
| * contributors may be used to endorse or promote products derived from |
| * this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "platform/text/LocaleToScriptMapping.h" |
| |
| #include "wtf/HashMap.h" |
| #include "wtf/HashSet.h" |
| #include "wtf/text/StringHash.h" |
| |
| namespace blink { |
| |
| struct SubtagScript { |
| const char* subtag; |
| UScriptCode script; |
| }; |
| |
| using SubtagScriptMap = HashMap<String, UScriptCode, CaseFoldingHash>; |
| |
| static void createSubtagScriptMap(SubtagScriptMap& map, |
| const SubtagScript list[], |
| size_t size) { |
| map.reserveCapacityForSize(size); |
| for (size_t i = 0; i < size; ++i) |
| map.set(list[i].subtag, list[i].script); |
| } |
| |
| UScriptCode scriptNameToCode(const String& scriptName) { |
| // This generally maps an ISO 15924 script code to its UScriptCode, but |
| // certain families of script codes are treated as a single script for |
| // assigning a per-script font in Settings. For example, "hira" is mapped to |
| // USCRIPT_KATAKANA_OR_HIRAGANA instead of USCRIPT_HIRAGANA, since we want all |
| // Japanese scripts to be rendered using the same font setting. |
| static const SubtagScript scriptNameCodeList[] = { |
| {"zyyy", USCRIPT_COMMON}, |
| {"qaai", USCRIPT_INHERITED}, |
| {"arab", USCRIPT_ARABIC}, |
| {"armn", USCRIPT_ARMENIAN}, |
| {"beng", USCRIPT_BENGALI}, |
| {"bopo", USCRIPT_BOPOMOFO}, |
| {"cher", USCRIPT_CHEROKEE}, |
| {"copt", USCRIPT_COPTIC}, |
| {"cyrl", USCRIPT_CYRILLIC}, |
| {"dsrt", USCRIPT_DESERET}, |
| {"deva", USCRIPT_DEVANAGARI}, |
| {"ethi", USCRIPT_ETHIOPIC}, |
| {"geor", USCRIPT_GEORGIAN}, |
| {"goth", USCRIPT_GOTHIC}, |
| {"grek", USCRIPT_GREEK}, |
| {"gujr", USCRIPT_GUJARATI}, |
| {"guru", USCRIPT_GURMUKHI}, |
| {"hani", USCRIPT_HAN}, |
| {"hang", USCRIPT_HANGUL}, |
| {"hebr", USCRIPT_HEBREW}, |
| {"hira", USCRIPT_KATAKANA_OR_HIRAGANA}, |
| {"knda", USCRIPT_KANNADA}, |
| {"kana", USCRIPT_KATAKANA_OR_HIRAGANA}, |
| {"khmr", USCRIPT_KHMER}, |
| {"laoo", USCRIPT_LAO}, |
| {"latn", USCRIPT_LATIN}, |
| {"mlym", USCRIPT_MALAYALAM}, |
| {"mong", USCRIPT_MONGOLIAN}, |
| {"mymr", USCRIPT_MYANMAR}, |
| {"ogam", USCRIPT_OGHAM}, |
| {"ital", USCRIPT_OLD_ITALIC}, |
| {"orya", USCRIPT_ORIYA}, |
| {"runr", USCRIPT_RUNIC}, |
| {"sinh", USCRIPT_SINHALA}, |
| {"syrc", USCRIPT_SYRIAC}, |
| {"taml", USCRIPT_TAMIL}, |
| {"telu", USCRIPT_TELUGU}, |
| {"thaa", USCRIPT_THAANA}, |
| {"thai", USCRIPT_THAI}, |
| {"tibt", USCRIPT_TIBETAN}, |
| {"cans", USCRIPT_CANADIAN_ABORIGINAL}, |
| {"yiii", USCRIPT_YI}, |
| {"tglg", USCRIPT_TAGALOG}, |
| {"hano", USCRIPT_HANUNOO}, |
| {"buhd", USCRIPT_BUHID}, |
| {"tagb", USCRIPT_TAGBANWA}, |
| {"brai", USCRIPT_BRAILLE}, |
| {"cprt", USCRIPT_CYPRIOT}, |
| {"limb", USCRIPT_LIMBU}, |
| {"linb", USCRIPT_LINEAR_B}, |
| {"osma", USCRIPT_OSMANYA}, |
| {"shaw", USCRIPT_SHAVIAN}, |
| {"tale", USCRIPT_TAI_LE}, |
| {"ugar", USCRIPT_UGARITIC}, |
| {"hrkt", USCRIPT_KATAKANA_OR_HIRAGANA}, |
| {"bugi", USCRIPT_BUGINESE}, |
| {"glag", USCRIPT_GLAGOLITIC}, |
| {"khar", USCRIPT_KHAROSHTHI}, |
| {"sylo", USCRIPT_SYLOTI_NAGRI}, |
| {"talu", USCRIPT_NEW_TAI_LUE}, |
| {"tfng", USCRIPT_TIFINAGH}, |
| {"xpeo", USCRIPT_OLD_PERSIAN}, |
| {"bali", USCRIPT_BALINESE}, |
| {"batk", USCRIPT_BATAK}, |
| {"blis", USCRIPT_BLISSYMBOLS}, |
| {"brah", USCRIPT_BRAHMI}, |
| {"cham", USCRIPT_CHAM}, |
| {"cirt", USCRIPT_CIRTH}, |
| {"cyrs", USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC}, |
| {"egyd", USCRIPT_DEMOTIC_EGYPTIAN}, |
| {"egyh", USCRIPT_HIERATIC_EGYPTIAN}, |
| {"egyp", USCRIPT_EGYPTIAN_HIEROGLYPHS}, |
| {"geok", USCRIPT_KHUTSURI}, |
| {"hans", USCRIPT_SIMPLIFIED_HAN}, |
| {"hant", USCRIPT_TRADITIONAL_HAN}, |
| {"hmng", USCRIPT_PAHAWH_HMONG}, |
| {"hung", USCRIPT_OLD_HUNGARIAN}, |
| {"inds", USCRIPT_HARAPPAN_INDUS}, |
| {"java", USCRIPT_JAVANESE}, |
| {"kali", USCRIPT_KAYAH_LI}, |
| {"latf", USCRIPT_LATIN_FRAKTUR}, |
| {"latg", USCRIPT_LATIN_GAELIC}, |
| {"lepc", USCRIPT_LEPCHA}, |
| {"lina", USCRIPT_LINEAR_A}, |
| {"mand", USCRIPT_MANDAEAN}, |
| {"maya", USCRIPT_MAYAN_HIEROGLYPHS}, |
| {"mero", USCRIPT_MEROITIC}, |
| {"nkoo", USCRIPT_NKO}, |
| {"orkh", USCRIPT_ORKHON}, |
| {"perm", USCRIPT_OLD_PERMIC}, |
| {"phag", USCRIPT_PHAGS_PA}, |
| {"phnx", USCRIPT_PHOENICIAN}, |
| {"plrd", USCRIPT_PHONETIC_POLLARD}, |
| {"roro", USCRIPT_RONGORONGO}, |
| {"sara", USCRIPT_SARATI}, |
| {"syre", USCRIPT_ESTRANGELO_SYRIAC}, |
| {"syrj", USCRIPT_WESTERN_SYRIAC}, |
| {"syrn", USCRIPT_EASTERN_SYRIAC}, |
| {"teng", USCRIPT_TENGWAR}, |
| {"vaii", USCRIPT_VAI}, |
| {"visp", USCRIPT_VISIBLE_SPEECH}, |
| {"xsux", USCRIPT_CUNEIFORM}, |
| {"jpan", USCRIPT_KATAKANA_OR_HIRAGANA}, |
| {"kore", USCRIPT_HANGUL}, |
| {"zxxx", USCRIPT_UNWRITTEN_LANGUAGES}, |
| {"zzzz", USCRIPT_UNKNOWN}}; |
| DEFINE_STATIC_LOCAL(SubtagScriptMap, scriptNameCodeMap, ()); |
| if (scriptNameCodeMap.isEmpty()) |
| createSubtagScriptMap(scriptNameCodeMap, scriptNameCodeList, |
| WTF_ARRAY_LENGTH(scriptNameCodeList)); |
| |
| const auto& it = scriptNameCodeMap.find(scriptName); |
| if (it != scriptNameCodeMap.end()) |
| return it->value; |
| return USCRIPT_INVALID_CODE; |
| } |
| |
| UScriptCode localeToScriptCodeForFontSelection(const String& locale) { |
| static const SubtagScript localeScriptList[] = { |
| {"aa", USCRIPT_LATIN}, |
| {"ab", USCRIPT_CYRILLIC}, |
| {"ady", USCRIPT_CYRILLIC}, |
| {"aeb", USCRIPT_ARABIC}, |
| {"af", USCRIPT_LATIN}, |
| {"ak", USCRIPT_LATIN}, |
| {"am", USCRIPT_ETHIOPIC}, |
| {"ar", USCRIPT_ARABIC}, |
| {"arq", USCRIPT_ARABIC}, |
| {"ary", USCRIPT_ARABIC}, |
| {"arz", USCRIPT_ARABIC}, |
| {"as", USCRIPT_BENGALI}, |
| {"ast", USCRIPT_LATIN}, |
| {"av", USCRIPT_CYRILLIC}, |
| {"ay", USCRIPT_LATIN}, |
| {"az", USCRIPT_LATIN}, |
| {"azb", USCRIPT_ARABIC}, |
| {"ba", USCRIPT_CYRILLIC}, |
| {"bal", USCRIPT_ARABIC}, |
| {"be", USCRIPT_CYRILLIC}, |
| {"bej", USCRIPT_ARABIC}, |
| {"bg", USCRIPT_CYRILLIC}, |
| {"bi", USCRIPT_LATIN}, |
| {"bn", USCRIPT_BENGALI}, |
| {"bo", USCRIPT_TIBETAN}, |
| {"bqi", USCRIPT_ARABIC}, |
| {"brh", USCRIPT_ARABIC}, |
| {"bs", USCRIPT_LATIN}, |
| {"ca", USCRIPT_LATIN}, |
| {"ce", USCRIPT_CYRILLIC}, |
| {"ceb", USCRIPT_LATIN}, |
| {"ch", USCRIPT_LATIN}, |
| {"chk", USCRIPT_LATIN}, |
| {"cja", USCRIPT_ARABIC}, |
| {"cjm", USCRIPT_ARABIC}, |
| {"ckb", USCRIPT_ARABIC}, |
| {"cs", USCRIPT_LATIN}, |
| {"cy", USCRIPT_LATIN}, |
| {"da", USCRIPT_LATIN}, |
| {"dcc", USCRIPT_ARABIC}, |
| {"de", USCRIPT_LATIN}, |
| {"doi", USCRIPT_ARABIC}, |
| {"dv", USCRIPT_THAANA}, |
| {"dyo", USCRIPT_ARABIC}, |
| {"dz", USCRIPT_TIBETAN}, |
| {"ee", USCRIPT_LATIN}, |
| {"efi", USCRIPT_LATIN}, |
| {"el", USCRIPT_GREEK}, |
| {"en", USCRIPT_LATIN}, |
| {"es", USCRIPT_LATIN}, |
| {"et", USCRIPT_LATIN}, |
| {"eu", USCRIPT_LATIN}, |
| {"fa", USCRIPT_ARABIC}, |
| {"fi", USCRIPT_LATIN}, |
| {"fil", USCRIPT_LATIN}, |
| {"fj", USCRIPT_LATIN}, |
| {"fo", USCRIPT_LATIN}, |
| {"fr", USCRIPT_LATIN}, |
| {"fur", USCRIPT_LATIN}, |
| {"fy", USCRIPT_LATIN}, |
| {"ga", USCRIPT_LATIN}, |
| {"gaa", USCRIPT_LATIN}, |
| {"gba", USCRIPT_ARABIC}, |
| {"gbz", USCRIPT_ARABIC}, |
| {"gd", USCRIPT_LATIN}, |
| {"gil", USCRIPT_LATIN}, |
| {"gl", USCRIPT_LATIN}, |
| {"gjk", USCRIPT_ARABIC}, |
| {"gju", USCRIPT_ARABIC}, |
| {"glk", USCRIPT_ARABIC}, |
| {"gn", USCRIPT_LATIN}, |
| {"gsw", USCRIPT_LATIN}, |
| {"gu", USCRIPT_GUJARATI}, |
| {"ha", USCRIPT_LATIN}, |
| {"haw", USCRIPT_LATIN}, |
| {"haz", USCRIPT_ARABIC}, |
| {"he", USCRIPT_HEBREW}, |
| {"hi", USCRIPT_DEVANAGARI}, |
| {"hil", USCRIPT_LATIN}, |
| {"hnd", USCRIPT_ARABIC}, |
| {"hno", USCRIPT_ARABIC}, |
| {"ho", USCRIPT_LATIN}, |
| {"hr", USCRIPT_LATIN}, |
| {"ht", USCRIPT_LATIN}, |
| {"hu", USCRIPT_LATIN}, |
| {"hy", USCRIPT_ARMENIAN}, |
| {"id", USCRIPT_LATIN}, |
| {"ig", USCRIPT_LATIN}, |
| {"ii", USCRIPT_YI}, |
| {"ilo", USCRIPT_LATIN}, |
| {"inh", USCRIPT_CYRILLIC}, |
| {"is", USCRIPT_LATIN}, |
| {"it", USCRIPT_LATIN}, |
| {"iu", USCRIPT_CANADIAN_ABORIGINAL}, |
| {"ja", USCRIPT_KATAKANA_OR_HIRAGANA}, |
| {"jv", USCRIPT_LATIN}, |
| {"ka", USCRIPT_GEORGIAN}, |
| {"kaj", USCRIPT_LATIN}, |
| {"kam", USCRIPT_LATIN}, |
| {"kbd", USCRIPT_CYRILLIC}, |
| {"kha", USCRIPT_LATIN}, |
| {"khw", USCRIPT_ARABIC}, |
| {"kk", USCRIPT_CYRILLIC}, |
| {"kl", USCRIPT_LATIN}, |
| {"km", USCRIPT_KHMER}, |
| {"kn", USCRIPT_KANNADA}, |
| {"ko", USCRIPT_HANGUL}, |
| {"kok", USCRIPT_DEVANAGARI}, |
| {"kos", USCRIPT_LATIN}, |
| {"kpe", USCRIPT_LATIN}, |
| {"krc", USCRIPT_CYRILLIC}, |
| {"ks", USCRIPT_ARABIC}, |
| {"ku", USCRIPT_ARABIC}, |
| {"kum", USCRIPT_CYRILLIC}, |
| {"kvx", USCRIPT_ARABIC}, |
| {"kxp", USCRIPT_ARABIC}, |
| {"ky", USCRIPT_CYRILLIC}, |
| {"la", USCRIPT_LATIN}, |
| {"lah", USCRIPT_ARABIC}, |
| {"lb", USCRIPT_LATIN}, |
| {"lez", USCRIPT_CYRILLIC}, |
| {"lki", USCRIPT_ARABIC}, |
| {"ln", USCRIPT_LATIN}, |
| {"lo", USCRIPT_LAO}, |
| {"lrc", USCRIPT_ARABIC}, |
| {"lt", USCRIPT_LATIN}, |
| {"luz", USCRIPT_ARABIC}, |
| {"lv", USCRIPT_LATIN}, |
| {"mai", USCRIPT_DEVANAGARI}, |
| {"mdf", USCRIPT_CYRILLIC}, |
| {"mfa", USCRIPT_ARABIC}, |
| {"mg", USCRIPT_LATIN}, |
| {"mh", USCRIPT_LATIN}, |
| {"mi", USCRIPT_LATIN}, |
| {"mk", USCRIPT_CYRILLIC}, |
| {"ml", USCRIPT_MALAYALAM}, |
| {"mn", USCRIPT_CYRILLIC}, |
| {"mr", USCRIPT_DEVANAGARI}, |
| {"ms", USCRIPT_LATIN}, |
| {"mt", USCRIPT_LATIN}, |
| {"mvy", USCRIPT_ARABIC}, |
| {"my", USCRIPT_MYANMAR}, |
| {"myv", USCRIPT_CYRILLIC}, |
| {"mzn", USCRIPT_ARABIC}, |
| {"na", USCRIPT_LATIN}, |
| {"nb", USCRIPT_LATIN}, |
| {"ne", USCRIPT_DEVANAGARI}, |
| {"niu", USCRIPT_LATIN}, |
| {"nl", USCRIPT_LATIN}, |
| {"nn", USCRIPT_LATIN}, |
| {"nr", USCRIPT_LATIN}, |
| {"nso", USCRIPT_LATIN}, |
| {"ny", USCRIPT_LATIN}, |
| {"oc", USCRIPT_LATIN}, |
| {"om", USCRIPT_LATIN}, |
| {"or", USCRIPT_ORIYA}, |
| {"os", USCRIPT_CYRILLIC}, |
| {"pa", USCRIPT_GURMUKHI}, |
| {"pag", USCRIPT_LATIN}, |
| {"pap", USCRIPT_LATIN}, |
| {"pau", USCRIPT_LATIN}, |
| {"pl", USCRIPT_LATIN}, |
| {"pon", USCRIPT_LATIN}, |
| {"prd", USCRIPT_ARABIC}, |
| {"prs", USCRIPT_ARABIC}, |
| {"ps", USCRIPT_ARABIC}, |
| {"pt", USCRIPT_LATIN}, |
| {"qu", USCRIPT_LATIN}, |
| {"rm", USCRIPT_LATIN}, |
| {"rmt", USCRIPT_ARABIC}, |
| {"rn", USCRIPT_LATIN}, |
| {"ro", USCRIPT_LATIN}, |
| {"ru", USCRIPT_CYRILLIC}, |
| {"rw", USCRIPT_LATIN}, |
| {"sa", USCRIPT_DEVANAGARI}, |
| {"sah", USCRIPT_CYRILLIC}, |
| {"sat", USCRIPT_LATIN}, |
| {"sd", USCRIPT_ARABIC}, |
| {"sdh", USCRIPT_ARABIC}, |
| {"se", USCRIPT_LATIN}, |
| {"sg", USCRIPT_LATIN}, |
| {"shi", USCRIPT_ARABIC}, |
| {"si", USCRIPT_SINHALA}, |
| {"sid", USCRIPT_LATIN}, |
| {"sk", USCRIPT_LATIN}, |
| {"skr", USCRIPT_ARABIC}, |
| {"sl", USCRIPT_LATIN}, |
| {"sm", USCRIPT_LATIN}, |
| {"so", USCRIPT_LATIN}, |
| {"sq", USCRIPT_LATIN}, |
| {"sr", USCRIPT_CYRILLIC}, |
| {"ss", USCRIPT_LATIN}, |
| {"st", USCRIPT_LATIN}, |
| {"su", USCRIPT_LATIN}, |
| {"sus", USCRIPT_ARABIC}, |
| {"sv", USCRIPT_LATIN}, |
| {"sw", USCRIPT_LATIN}, |
| {"swb", USCRIPT_ARABIC}, |
| {"syr", USCRIPT_ARABIC}, |
| {"ta", USCRIPT_TAMIL}, |
| {"te", USCRIPT_TELUGU}, |
| {"tet", USCRIPT_LATIN}, |
| {"tg", USCRIPT_CYRILLIC}, |
| {"th", USCRIPT_THAI}, |
| {"ti", USCRIPT_ETHIOPIC}, |
| {"tig", USCRIPT_ETHIOPIC}, |
| {"tk", USCRIPT_LATIN}, |
| {"tkl", USCRIPT_LATIN}, |
| {"tl", USCRIPT_LATIN}, |
| {"tn", USCRIPT_LATIN}, |
| {"to", USCRIPT_LATIN}, |
| {"tpi", USCRIPT_LATIN}, |
| {"tr", USCRIPT_LATIN}, |
| {"trv", USCRIPT_LATIN}, |
| {"ts", USCRIPT_LATIN}, |
| {"tt", USCRIPT_CYRILLIC}, |
| {"ttt", USCRIPT_ARABIC}, |
| {"tvl", USCRIPT_LATIN}, |
| {"tw", USCRIPT_LATIN}, |
| {"ty", USCRIPT_LATIN}, |
| {"tyv", USCRIPT_CYRILLIC}, |
| {"udm", USCRIPT_CYRILLIC}, |
| {"ug", USCRIPT_ARABIC}, |
| {"uk", USCRIPT_CYRILLIC}, |
| {"und", USCRIPT_LATIN}, |
| {"ur", USCRIPT_ARABIC}, |
| {"uz", USCRIPT_CYRILLIC}, |
| {"ve", USCRIPT_LATIN}, |
| {"vi", USCRIPT_LATIN}, |
| {"wal", USCRIPT_ETHIOPIC}, |
| {"war", USCRIPT_LATIN}, |
| {"wo", USCRIPT_LATIN}, |
| {"xh", USCRIPT_LATIN}, |
| {"yap", USCRIPT_LATIN}, |
| {"yo", USCRIPT_LATIN}, |
| {"za", USCRIPT_LATIN}, |
| {"zdj", USCRIPT_ARABIC}, |
| {"zh", USCRIPT_SIMPLIFIED_HAN}, |
| {"zu", USCRIPT_LATIN}, |
| // Encompassed languages within the Chinese macrolanguage. |
| // http://www-01.sil.org/iso639-3/documentation.asp?id=zho |
| // http://lists.w3.org/Archives/Public/public-i18n-cjk/2016JulSep/0022.html |
| {"cdo", USCRIPT_SIMPLIFIED_HAN}, |
| {"cjy", USCRIPT_SIMPLIFIED_HAN}, |
| {"cmn", USCRIPT_SIMPLIFIED_HAN}, |
| {"cpx", USCRIPT_SIMPLIFIED_HAN}, |
| {"czh", USCRIPT_SIMPLIFIED_HAN}, |
| {"czo", USCRIPT_SIMPLIFIED_HAN}, |
| {"gan", USCRIPT_SIMPLIFIED_HAN}, |
| {"hsn", USCRIPT_SIMPLIFIED_HAN}, |
| {"mnp", USCRIPT_SIMPLIFIED_HAN}, |
| {"wuu", USCRIPT_SIMPLIFIED_HAN}, |
| {"hak", USCRIPT_TRADITIONAL_HAN}, |
| {"lzh", USCRIPT_TRADITIONAL_HAN}, |
| {"nan", USCRIPT_TRADITIONAL_HAN}, |
| {"yue", USCRIPT_TRADITIONAL_HAN}, |
| {"zh-cdo", USCRIPT_SIMPLIFIED_HAN}, |
| {"zh-cjy", USCRIPT_SIMPLIFIED_HAN}, |
| {"zh-cmn", USCRIPT_SIMPLIFIED_HAN}, |
| {"zh-cpx", USCRIPT_SIMPLIFIED_HAN}, |
| {"zh-czh", USCRIPT_SIMPLIFIED_HAN}, |
| {"zh-czo", USCRIPT_SIMPLIFIED_HAN}, |
| {"zh-gan", USCRIPT_SIMPLIFIED_HAN}, |
| {"zh-hsn", USCRIPT_SIMPLIFIED_HAN}, |
| {"zh-mnp", USCRIPT_SIMPLIFIED_HAN}, |
| {"zh-wuu", USCRIPT_SIMPLIFIED_HAN}, |
| {"zh-hak", USCRIPT_TRADITIONAL_HAN}, |
| {"zh-lzh", USCRIPT_TRADITIONAL_HAN}, |
| {"zh-nan", USCRIPT_TRADITIONAL_HAN}, |
| {"zh-yue", USCRIPT_TRADITIONAL_HAN}, |
| // Chinese with regions. Logically, regions should be handled |
| // separately, but this works for the current purposes. |
| {"zh-hk", USCRIPT_TRADITIONAL_HAN}, |
| {"zh-mo", USCRIPT_TRADITIONAL_HAN}, |
| {"zh-tw", USCRIPT_TRADITIONAL_HAN}, |
| }; |
| DEFINE_STATIC_LOCAL(SubtagScriptMap, localeScriptMap, ()); |
| if (localeScriptMap.isEmpty()) |
| createSubtagScriptMap(localeScriptMap, localeScriptList, |
| WTF_ARRAY_LENGTH(localeScriptList)); |
| |
| // BCP 47 uses '-' as the delimiter but ICU uses '_'. |
| // https://tools.ietf.org/html/bcp47 |
| String canonicalLocale = locale; |
| canonicalLocale.replace('_', '-'); |
| while (!canonicalLocale.isEmpty()) { |
| const auto& it = localeScriptMap.find(canonicalLocale); |
| if (it != localeScriptMap.end()) |
| return it->value; |
| size_t pos = canonicalLocale.reverseFind('-'); |
| if (pos == kNotFound) |
| break; |
| // script = 4ALPHA |
| if (canonicalLocale.length() - (pos + 1) == 4) { |
| UScriptCode code = scriptNameToCode(canonicalLocale.substring(pos + 1)); |
| if (code != USCRIPT_INVALID_CODE && code != USCRIPT_UNKNOWN) |
| return code; |
| } |
| canonicalLocale = canonicalLocale.substring(0, pos); |
| } |
| return USCRIPT_COMMON; |
| } |
| |
| static UScriptCode scriptCodeForHanFromRegion(const String& region) { |
| static const SubtagScript regionScriptList[] = { |
| {"hk", USCRIPT_TRADITIONAL_HAN}, {"jp", USCRIPT_KATAKANA_OR_HIRAGANA}, |
| {"kr", USCRIPT_HANGUL}, {"mo", USCRIPT_TRADITIONAL_HAN}, |
| {"tw", USCRIPT_TRADITIONAL_HAN}, |
| }; |
| DEFINE_STATIC_LOCAL(SubtagScriptMap, regionScriptMap, ()); |
| if (regionScriptMap.isEmpty()) |
| createSubtagScriptMap(regionScriptMap, regionScriptList, |
| WTF_ARRAY_LENGTH(regionScriptList)); |
| |
| const auto& it = regionScriptMap.find(region); |
| return it != regionScriptMap.end() ? it->value : USCRIPT_COMMON; |
| } |
| |
| UScriptCode scriptCodeForHanFromSubtags(const String& locale, char delimiter) { |
| // Some sites emit lang="en-JP" when English is set as the preferred |
| // language. Use script/region subtags of the content locale to pick the |
| // fallback font for unified Han ideographs. |
| for (size_t end = locale.find(delimiter); end != kNotFound;) { |
| size_t begin = end + 1; |
| end = locale.find(delimiter, begin); |
| size_t len = (end == kNotFound ? locale.length() : end) - begin; |
| UScriptCode script; |
| switch (len) { |
| case 2: // region = 2ALPHA / 3DIGIT |
| script = scriptCodeForHanFromRegion(locale.substring(begin, len)); |
| if (script != USCRIPT_COMMON) |
| return script; |
| break; |
| case 4: // script = 4ALPHA |
| script = scriptNameToCode(locale.substring(begin, len)); |
| if (isUnambiguousHanScript(script)) |
| return script; |
| } |
| } |
| |
| return USCRIPT_COMMON; |
| } |
| |
| } // namespace blink |