/*
 * Copyright (C) 2007 Apple Computer, Inc.
 *
 * Portions are Copyright (C) 1998 Netscape Communications Corporation.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
 *
 * Alternatively, the contents of this file may be used under the terms
 * of either the Mozilla Public License Version 1.1, found at
 * http://www.mozilla.org/MPL/ (the "MPL") or the GNU General Public
 * License Version 2.0, found at http://www.fsf.org/copyleft/gpl.html
 * (the "GPL"), in which case the provisions of the MPL or the GPL are
 * applicable instead of those above.  If you wish to allow use of your
 * version of this file only under the terms of one of those two
 * licenses (the MPL or the GPL) and not to allow others to use your
 * version of this file under the LGPL, indicate your decision by
 * deletingthe provisions above and replace them with the notice and
 * other provisions required by the MPL or the GPL, as the case may be.
 * If you do not delete the provisions above, a recipient may use your
 * version of this file under any of the LGPL, the MPL or the GPL.
 */

#include "platform/text/UnicodeRange.h"

namespace blink {

/**********************************************************************
 * Unicode subranges as defined in unicode 3.0
 * x-western, x-central-euro, tr, x-baltic  -> latin
 *  0000 - 036f
 *  1e00 - 1eff
 *  2000 - 206f  (general punctuation)
 *  20a0 - 20cf  (currency symbols)
 *  2100 - 214f  (letterlike symbols)
 *  2150 - 218f  (Number Forms)
 * el         -> greek
 *  0370 - 03ff
 *  1f00 - 1fff
 * x-cyrillic -> cyrillic
 *  0400 - 04ff
 * he         -> hebrew
 *  0590 - 05ff
 * ar         -> arabic
 *  0600 - 06ff
 *  fb50 - fdff (arabic presentation forms)
 *  fe70 - feff (arabic presentation forms b)
 * th - thai
 *  0e00 - 0e7f
 * ko        -> korean
 *  ac00 - d7af  (hangul Syllables)
 *  1100 - 11ff    (jamo)
 *  3130 - 318f (hangul compatibility jamo)
 * ja
 *  3040 - 309f (hiragana)
 *  30a0 - 30ff (katakana)
 * zh-CN
 * zh-TW
 *
 * CJK
 *  3100 - 312f (bopomofo)
 *  31a0 - 31bf (bopomofo extended)
 *  3000 - 303f (CJK Symbols and Punctuation)
 *  2e80 - 2eff (CJK radicals supplement)
 *  2f00 - 2fdf (Kangxi Radicals)
 *  2ff0 - 2fff (Ideographic Description Characters)
 *  3190 - 319f (kanbun)
 *  3200 - 32ff (Enclosed CJK letters and Months)
 *  3300 - 33ff (CJK compatibility)
 *  3400 - 4dbf (CJK Unified Ideographs Extension A)
 *  4e00 - 9faf (CJK Unified Ideographs)
 *  f900 - fa5f (CJK Compatibility Ideographs)
 *  fe30 - fe4f (CJK compatibility Forms)
 *  ff00 - ffef (halfwidth and fullwidth forms)
 *
 * Armenian
 *  0530 - 058f
 * Sriac
 *  0700 - 074f
 * Thaana
 *  0780 - 07bf
 * Devanagari
 *  0900 - 097f
 * Bengali
 *  0980 - 09ff
 * Gurmukhi
 *  0a00 - 0a7f
 * Gujarati
 *  0a80 - 0aff
 * Oriya
 *  0b00 - 0b7f
 * Tamil
 *  0b80 - 0bff
 * Telugu
 *  0c00 - 0c7f
 * Kannada
 *  0c80 - 0cff
 * Malayalam
 *  0d00 - 0d7f
 * Sinhala
 *  0d80 - 0def
 * Lao
 *  0e80 - 0eff
 * Tibetan
 *  0f00 - 0fbf
 * Myanmar
 *  1000 - 109f
 * Georgian
 *  10a0 - 10ff
 * Ethiopic
 *  1200 - 137f
 * Cherokee
 *  13a0 - 13ff
 * Canadian Aboriginal Syllabics
 *  1400 - 167f
 * Ogham
 *  1680 - 169f
 * Runic
 *  16a0 - 16ff
 * Khmer
 *  1780 - 17ff
 * Mongolian
 *  1800 - 18af
 * Misc - superscripts and subscripts
 *  2070 - 209f
 * Misc - Combining Diacritical Marks for Symbols
 *  20d0 - 20ff
 * Misc - Arrows
 *  2190 - 21ff
 * Misc - Mathematical Operators
 *  2200 - 22ff
 * Misc - Miscellaneous Technical
 *  2300 - 23ff
 * Misc - Control picture
 *  2400 - 243f
 * Misc - Optical character recognition
 *  2440 - 2450
 * Misc - Enclose Alphanumerics
 *  2460 - 24ff
 * Misc - Box Drawing
 *  2500 - 257f
 * Misc - Block Elements
 *  2580 - 259f
 * Misc - Geometric Shapes
 *  25a0 - 25ff
 * Misc - Miscellaneous Symbols
 *  2600 - 267f
 * Misc - Dingbats
 *  2700 - 27bf
 * Misc - Braille Patterns
 *  2800 - 28ff
 * Yi Syllables
 *  a000 - a48f
 * Yi radicals
 *  a490 - a4cf
 * Alphabetic Presentation Forms
 *  fb00 - fb4f
 * Misc - Combining half Marks
 *  fe20 - fe2f
 * Misc - small form variants
 *  fe50 - fe6f
 * Misc - Specials
 *  fff0 - ffff
 *********************************************************************/

static const unsigned cNumSubTables = 9;
static const unsigned cSubTableSize = 16;

static const unsigned char gUnicodeSubrangeTable[cNumSubTables][cSubTableSize] =
    {
        {
            // table for X---
            cRangeTableBase + 1,  // u0xxx
            cRangeTableBase + 2,  // u1xxx
            cRangeTableBase + 3,  // u2xxx
            cRangeSetCJK,         // u3xxx
            cRangeSetCJK,         // u4xxx
            cRangeSetCJK,         // u5xxx
            cRangeSetCJK,         // u6xxx
            cRangeSetCJK,         // u7xxx
            cRangeSetCJK,         // u8xxx
            cRangeSetCJK,         // u9xxx
            cRangeTableBase + 4,  // uaxxx
            cRangeKorean,         // ubxxx
            cRangeKorean,         // ucxxx
            cRangeTableBase + 5,  // udxxx
            cRangePrivate,        // uexxx
            cRangeTableBase + 6   // ufxxx
        },
        {
            // table for 0X--
            cRangeSetLatin,  // u00xx
            cRangeSetLatin,  // u01xx
            cRangeSetLatin,  // u02xx
            cRangeGreek,     // u03xx     XXX 0300-036f is in fact
                             // cRangeCombiningDiacriticalMarks
            cRangeCyrillic,  // u04xx
            cRangeTableBase +
                7,  // u05xx, includes Cyrillic supplement, Hebrew, and Armenian
            cRangeArabic,         // u06xx
            cRangeTertiaryTable,  // u07xx
            cRangeUnassigned,     // u08xx
            cRangeTertiaryTable,  // u09xx
            cRangeTertiaryTable,  // u0axx
            cRangeTertiaryTable,  // u0bxx
            cRangeTertiaryTable,  // u0cxx
            cRangeTertiaryTable,  // u0dxx
            cRangeTertiaryTable,  // u0exx
            cRangeTibetan,        // u0fxx
        },
        {
            // table for 1x--
            cRangeTertiaryTable,  // u10xx
            cRangeKorean,         // u11xx
            cRangeEthiopic,       // u12xx
            cRangeTertiaryTable,  // u13xx
            cRangeCanadian,       // u14xx
            cRangeCanadian,       // u15xx
            cRangeTertiaryTable,  // u16xx
            cRangeKhmer,          // u17xx
            cRangeMongolian,      // u18xx
            cRangeUnassigned,     // u19xx
            cRangeUnassigned,     // u1axx
            cRangeUnassigned,     // u1bxx
            cRangeUnassigned,     // u1cxx
            cRangeUnassigned,     // u1dxx
            cRangeSetLatin,       // u1exx
            cRangeGreek,          // u1fxx
        },
        {
            // table for 2x--
            cRangeSetLatin,               // u20xx
            cRangeSetLatin,               // u21xx
            cRangeMathOperators,          // u22xx
            cRangeMiscTechnical,          // u23xx
            cRangeControlOpticalEnclose,  // u24xx
            cRangeBoxBlockGeometrics,     // u25xx
            cRangeMiscSymbols,            // u26xx
            cRangeDingbats,               // u27xx
            cRangeBraillePattern,         // u28xx
            cRangeUnassigned,             // u29xx
            cRangeUnassigned,             // u2axx
            cRangeUnassigned,             // u2bxx
            cRangeUnassigned,             // u2cxx
            cRangeUnassigned,             // u2dxx
            cRangeSetCJK,                 // u2exx
            cRangeSetCJK,                 // u2fxx
        },
        {
            // table for ax--
            cRangeYi,          // ua0xx
            cRangeYi,          // ua1xx
            cRangeYi,          // ua2xx
            cRangeYi,          // ua3xx
            cRangeYi,          // ua4xx
            cRangeUnassigned,  // ua5xx
            cRangeUnassigned,  // ua6xx
            cRangeUnassigned,  // ua7xx
            cRangeUnassigned,  // ua8xx
            cRangeUnassigned,  // ua9xx
            cRangeUnassigned,  // uaaxx
            cRangeUnassigned,  // uabxx
            cRangeKorean,      // uacxx
            cRangeKorean,      // uadxx
            cRangeKorean,      // uaexx
            cRangeKorean,      // uafxx
        },
        {
            // table for dx--
            cRangeKorean,     // ud0xx
            cRangeKorean,     // ud1xx
            cRangeKorean,     // ud2xx
            cRangeKorean,     // ud3xx
            cRangeKorean,     // ud4xx
            cRangeKorean,     // ud5xx
            cRangeKorean,     // ud6xx
            cRangeKorean,     // ud7xx
            cRangeSurrogate,  // ud8xx
            cRangeSurrogate,  // ud9xx
            cRangeSurrogate,  // udaxx
            cRangeSurrogate,  // udbxx
            cRangeSurrogate,  // udcxx
            cRangeSurrogate,  // uddxx
            cRangeSurrogate,  // udexx
            cRangeSurrogate,  // udfxx
        },
        {
            // table for fx--
            cRangePrivate,  // uf0xx
            cRangePrivate,  // uf1xx
            cRangePrivate,  // uf2xx
            cRangePrivate,  // uf3xx
            cRangePrivate,  // uf4xx
            cRangePrivate,  // uf5xx
            cRangePrivate,  // uf6xx
            cRangePrivate,  // uf7xx
            cRangePrivate,  // uf8xx
            cRangeSetCJK,   // uf9xx
            cRangeSetCJK,   // ufaxx
            cRangeArabic,   // ufbxx, includes alphabic presentation form
            cRangeArabic,   // ufcxx
            cRangeArabic,   // ufdxx
            cRangeArabic,   // ufexx, includes Combining half marks,
            //                CJK compatibility forms,
            //                CJK compatibility forms,
            //                small form variants
            cRangeTableBase +
                8,  // uffxx, halfwidth and fullwidth forms, includes Specials
        },
        {
            // table for 0x0500 - 0x05ff
            cRangeCyrillic,  // u050x
            cRangeCyrillic,  // u051x
            cRangeCyrillic,  // u052x
            cRangeArmenian,  // u053x
            cRangeArmenian,  // u054x
            cRangeArmenian,  // u055x
            cRangeArmenian,  // u056x
            cRangeArmenian,  // u057x
            cRangeArmenian,  // u058x
            cRangeHebrew,    // u059x
            cRangeHebrew,    // u05ax
            cRangeHebrew,    // u05bx
            cRangeHebrew,    // u05cx
            cRangeHebrew,    // u05dx
            cRangeHebrew,    // u05ex
            cRangeHebrew,    // u05fx
        },
        {
            // table for 0xff00 - 0xffff
            cRangeSetCJK,    // uff0x, fullwidth latin
            cRangeSetCJK,    // uff1x, fullwidth latin
            cRangeSetCJK,    // uff2x, fullwidth latin
            cRangeSetCJK,    // uff3x, fullwidth latin
            cRangeSetCJK,    // uff4x, fullwidth latin
            cRangeSetCJK,    // uff5x, fullwidth latin
            cRangeSetCJK,    // uff6x, halfwidth katakana
            cRangeSetCJK,    // uff7x, halfwidth katakana
            cRangeSetCJK,    // uff8x, halfwidth katakana
            cRangeSetCJK,    // uff9x, halfwidth katakana
            cRangeSetCJK,    // uffax, halfwidth hangul jamo
            cRangeSetCJK,    // uffbx, halfwidth hangul jamo
            cRangeSetCJK,    // uffcx, halfwidth hangul jamo
            cRangeSetCJK,    // uffdx, halfwidth hangul jamo
            cRangeSetCJK,    // uffex, fullwidth symbols
            cRangeSpecials,  // ufffx, Specials
        },
};

// Most scripts between U+0700 and U+16FF are assigned a chunk of 128 (0x80)
// code points so that the number of entries in the tertiary range
// table for that range is obtained by dividing (0x1700 - 0x0700) by 128.
// Exceptions: Ethiopic, Tibetan, Hangul Jamo and Canadian aboriginal
// syllabaries take multiple chunks and Ogham and Runic share a single chunk.
static const unsigned cTertiaryTableSize = ((0x1700 - 0x0700) / 0x80);

static const unsigned char gUnicodeTertiaryRangeTable[cTertiaryTableSize] = {
    // table for 0x0700 - 0x1600
    cRangeSyriac,      // u070x
    cRangeThaana,      // u078x
    cRangeUnassigned,  // u080x  place holder(resolved in the 2ndary tab.)
    cRangeUnassigned,  // u088x  place holder(resolved in the 2ndary tab.)
    cRangeDevanagari,  // u090x
    cRangeBengali,     // u098x
    cRangeGurmukhi,    // u0a0x
    cRangeGujarati,    // u0a8x
    cRangeOriya,       // u0b0x
    cRangeTamil,       // u0b8x
    cRangeTelugu,      // u0c0x
    cRangeKannada,     // u0c8x
    cRangeMalayalam,   // u0d0x
    cRangeSinhala,     // u0d8x
    cRangeThai,        // u0e0x
    cRangeLao,         // u0e8x
    cRangeTibetan,     // u0f0x  place holder(resolved in the 2ndary tab.)
    cRangeTibetan,     // u0f8x  place holder(resolved in the 2ndary tab.)
    cRangeMyanmar,     // u100x
    cRangeGeorgian,    // u108x
    cRangeKorean,      // u110x  place holder(resolved in the 2ndary tab.)
    cRangeKorean,      // u118x  place holder(resolved in the 2ndary tab.)
    cRangeEthiopic,    // u120x  place holder(resolved in the 2ndary tab.)
    cRangeEthiopic,    // u128x  place holder(resolved in the 2ndary tab.)
    cRangeEthiopic,    // u130x
    cRangeCherokee,    // u138x
    cRangeCanadian,    // u140x  place holder(resolved in the 2ndary tab.)
    cRangeCanadian,    // u148x  place holder(resolved in the 2ndary tab.)
    cRangeCanadian,    // u150x  place holder(resolved in the 2ndary tab.)
    cRangeCanadian,    // u158x  place holder(resolved in the 2ndary tab.)
    cRangeCanadian,    // u160x
    cRangeOghamRunic,  // u168x  this contains two scripts, Ogham & Runic
};

// A two level index is almost enough for locating a range, with the
// exception of u03xx and u05xx. Since we don't really care about range for
// combining diacritical marks in our font application, they are
// not discriminated further.  Future adoption of this method for other use
// should be aware of this limitation. The implementation can be extended if
// there is such a need.
// For Indic, Southeast Asian scripts and some other scripts between
// U+0700 and U+16FF, it's extended to the third level.
unsigned findCharUnicodeRange(UChar32 ch) {
  if (ch >= 0xFFFF)
    return 0;

  unsigned range;

  // search the first table
  range = gUnicodeSubrangeTable[0][ch >> 12];

  if (range < cRangeTableBase)
    // we try to get a specific range
    return range;

  // otherwise, we have one more table to look at
  range = gUnicodeSubrangeTable[range - cRangeTableBase][(ch & 0x0f00) >> 8];
  if (range < cRangeTableBase)
    return range;
  if (range < cRangeTertiaryTable)
    return gUnicodeSubrangeTable[range - cRangeTableBase][(ch & 0x00f0) >> 4];

  // Yet another table to look at : U+0700 - U+16FF : 128 code point blocks
  return gUnicodeTertiaryRangeTable[(ch - 0x0700) >> 7];
}

}  // namespace blink
