1435 lines
56 KiB
Plaintext
1435 lines
56 KiB
Plaintext
|
# This software is Copyright (c) 2012 magnum, and it is hereby
|
||
|
# released to the general public under the following terms:
|
||
|
# Redistribution and use in source and binary forms, with or without
|
||
|
# modification, are permitted.
|
||
|
#
|
||
|
# Generic implementation of "dumb" exhaustive search of FULL Unicode and
|
||
|
# an arbitrary charset. Default is to try *all* allocated characters (there's
|
||
|
# 109070 of them). Even if a fast format can exhaust two characters in one
|
||
|
# hour, three characters would take 12 years...
|
||
|
#
|
||
|
# The output is UTF-8, so for 16-bit formats you need to give --enc=utf8
|
||
|
[List.External:Dumb32]
|
||
|
int maxlength; // Maximum password length to try
|
||
|
int last; // Last character position, zero-based
|
||
|
int lastid; // Character index in the last position
|
||
|
int id[0x7f]; // Current character indices for other positions
|
||
|
int charset[0x20000], c0; // Characters
|
||
|
int utf32[0x7F]; // Word in UTF32
|
||
|
|
||
|
void init()
|
||
|
{
|
||
|
int minlength;
|
||
|
int i, c;
|
||
|
|
||
|
minlength = 1; // Initial password length to try, must be at least 1
|
||
|
maxlength = 2; // Must be at least same as minlength
|
||
|
|
||
|
/*
|
||
|
* This defines the character set. This is auto-generated from UnicodeData.txt
|
||
|
* and we skip control characters.
|
||
|
*/
|
||
|
i = 0;
|
||
|
c = 0x20; // from SPACE
|
||
|
while (c < 0x7f) // ..to TILDE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa0; // from NO-BREAK SPACE
|
||
|
while (c < 0x378) // ..to GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x37a; // from GREEK YPOGEGRAMMENI
|
||
|
while (c < 0x37f) // ..to GREEK QUESTION MARK
|
||
|
charset[i++] = c++;
|
||
|
c = 0x384; // from GREEK TONOS
|
||
|
while (c < 0x38b) // ..to GREEK CAPITAL LETTER IOTA WITH TONOS
|
||
|
charset[i++] = c++;
|
||
|
c = 0x38e; // from GREEK CAPITAL LETTER UPSILON WITH TONOS
|
||
|
while (c < 0x3a2) // ..to GREEK CAPITAL LETTER RHO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x3a3; // from GREEK CAPITAL LETTER SIGMA
|
||
|
while (c < 0x528) // ..to CYRILLIC SMALL LETTER SHHA WITH DESCENDER
|
||
|
charset[i++] = c++;
|
||
|
c = 0x531; // from ARMENIAN CAPITAL LETTER AYB
|
||
|
while (c < 0x557) // ..to ARMENIAN CAPITAL LETTER FEH
|
||
|
charset[i++] = c++;
|
||
|
c = 0x559; // from ARMENIAN MODIFIER LETTER LEFT HALF RING
|
||
|
while (c < 0x560) // ..to ARMENIAN ABBREVIATION MARK
|
||
|
charset[i++] = c++;
|
||
|
c = 0x561; // from ARMENIAN SMALL LETTER AYB
|
||
|
while (c < 0x588) // ..to ARMENIAN SMALL LIGATURE ECH YIWN
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x589; // ARMENIAN FULL STOP
|
||
|
charset[i++] = 0x58a; // ARMENIAN HYPHEN
|
||
|
c = 0x591; // from HEBREW ACCENT ETNAHTA
|
||
|
while (c < 0x5c8) // ..to HEBREW POINT QAMATS QATAN
|
||
|
charset[i++] = c++;
|
||
|
c = 0x5d0; // from HEBREW LETTER ALEF
|
||
|
while (c < 0x5eb) // ..to HEBREW LETTER TAV
|
||
|
charset[i++] = c++;
|
||
|
c = 0x5f0; // from HEBREW LIGATURE YIDDISH DOUBLE VAV
|
||
|
while (c < 0x5f5) // ..to HEBREW PUNCTUATION GERSHAYIM
|
||
|
charset[i++] = c++;
|
||
|
c = 0x600; // from ARABIC NUMBER SIGN
|
||
|
while (c < 0x604) // ..to ARABIC SIGN SAFHA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x606; // from ARABIC-INDIC CUBE ROOT
|
||
|
while (c < 0x61c) // ..to ARABIC SEMICOLON
|
||
|
charset[i++] = c++;
|
||
|
c = 0x61e; // from ARABIC TRIPLE DOT PUNCTUATION MARK
|
||
|
while (c < 0x70e) // ..to SYRIAC HARKLEAN ASTERISCUS
|
||
|
charset[i++] = c++;
|
||
|
c = 0x70f; // from SYRIAC ABBREVIATION MARK
|
||
|
while (c < 0x74b) // ..to SYRIAC BARREKH
|
||
|
charset[i++] = c++;
|
||
|
c = 0x74d; // from SYRIAC LETTER SOGDIAN ZHAIN
|
||
|
while (c < 0x7b2) // ..to THAANA LETTER NAA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x7c0; // from NKO DIGIT ZERO
|
||
|
while (c < 0x7fb) // ..to NKO LAJANYALAN
|
||
|
charset[i++] = c++;
|
||
|
c = 0x800; // from SAMARITAN LETTER ALAF
|
||
|
while (c < 0x82e) // ..to SAMARITAN MARK NEQUDAA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x830; // from SAMARITAN PUNCTUATION NEQUDAA
|
||
|
while (c < 0x83f) // ..to SAMARITAN PUNCTUATION ANNAAU
|
||
|
charset[i++] = c++;
|
||
|
c = 0x840; // from MANDAIC LETTER HALQA
|
||
|
while (c < 0x85c) // ..to MANDAIC GEMINATION MARK
|
||
|
charset[i++] = c++;
|
||
|
c = 0x900; // from DEVANAGARI SIGN INVERTED CANDRABINDU
|
||
|
while (c < 0x978) // ..to DEVANAGARI LETTER UUE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x979; // from DEVANAGARI LETTER ZHA
|
||
|
while (c < 0x980) // ..to DEVANAGARI LETTER BBA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x981; // BENGALI SIGN CANDRABINDU
|
||
|
charset[i++] = 0x983; // BENGALI SIGN VISARGA
|
||
|
c = 0x985; // from BENGALI LETTER A
|
||
|
while (c < 0x98d) // ..to BENGALI LETTER VOCALIC L
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x98f; // BENGALI LETTER E
|
||
|
charset[i++] = 0x990; // BENGALI LETTER AI
|
||
|
c = 0x993; // from BENGALI LETTER O
|
||
|
while (c < 0x9a9) // ..to BENGALI LETTER NA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x9aa; // from BENGALI LETTER PA
|
||
|
while (c < 0x9b1) // ..to BENGALI LETTER RA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x9b6; // from BENGALI LETTER SHA
|
||
|
while (c < 0x9ba) // ..to BENGALI LETTER HA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x9bc; // from BENGALI SIGN NUKTA
|
||
|
while (c < 0x9c5) // ..to BENGALI VOWEL SIGN VOCALIC RR
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x9c7; // BENGALI VOWEL SIGN E
|
||
|
charset[i++] = 0x9c8; // BENGALI VOWEL SIGN AI
|
||
|
c = 0x9cb; // from BENGALI VOWEL SIGN O
|
||
|
while (c < 0x9cf) // ..to BENGALI LETTER KHANDA TA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x9dc; // BENGALI LETTER RRA
|
||
|
charset[i++] = 0x9dd; // BENGALI LETTER RHA
|
||
|
c = 0x9df; // from BENGALI LETTER YYA
|
||
|
while (c < 0x9e4) // ..to BENGALI VOWEL SIGN VOCALIC LL
|
||
|
charset[i++] = c++;
|
||
|
c = 0x9e6; // from BENGALI DIGIT ZERO
|
||
|
while (c < 0x9fc) // ..to BENGALI GANDA MARK
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xa01; // GURMUKHI SIGN ADAK BINDI
|
||
|
charset[i++] = 0xa03; // GURMUKHI SIGN VISARGA
|
||
|
c = 0xa05; // from GURMUKHI LETTER A
|
||
|
while (c < 0xa0b) // ..to GURMUKHI LETTER UU
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xa0f; // GURMUKHI LETTER EE
|
||
|
charset[i++] = 0xa10; // GURMUKHI LETTER AI
|
||
|
c = 0xa13; // from GURMUKHI LETTER OO
|
||
|
while (c < 0xa29) // ..to GURMUKHI LETTER NA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa2a; // from GURMUKHI LETTER PA
|
||
|
while (c < 0xa31) // ..to GURMUKHI LETTER RA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xa32; // GURMUKHI LETTER LA
|
||
|
charset[i++] = 0xa33; // GURMUKHI LETTER LLA
|
||
|
charset[i++] = 0xa35; // GURMUKHI LETTER VA
|
||
|
charset[i++] = 0xa36; // GURMUKHI LETTER SHA
|
||
|
charset[i++] = 0xa38; // GURMUKHI LETTER SA
|
||
|
charset[i++] = 0xa39; // GURMUKHI LETTER HA
|
||
|
c = 0xa3e; // from GURMUKHI VOWEL SIGN AA
|
||
|
while (c < 0xa43) // ..to GURMUKHI VOWEL SIGN UU
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xa47; // GURMUKHI VOWEL SIGN EE
|
||
|
charset[i++] = 0xa48; // GURMUKHI VOWEL SIGN AI
|
||
|
charset[i++] = 0xa4b; // GURMUKHI VOWEL SIGN OO
|
||
|
charset[i++] = 0xa4d; // GURMUKHI SIGN VIRAMA
|
||
|
c = 0xa59; // from GURMUKHI LETTER KHHA
|
||
|
while (c < 0xa5d) // ..to GURMUKHI LETTER RRA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa66; // from GURMUKHI DIGIT ZERO
|
||
|
while (c < 0xa76) // ..to GURMUKHI SIGN YAKASH
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xa81; // GUJARATI SIGN CANDRABINDU
|
||
|
charset[i++] = 0xa83; // GUJARATI SIGN VISARGA
|
||
|
c = 0xa85; // from GUJARATI LETTER A
|
||
|
while (c < 0xa8e) // ..to GUJARATI VOWEL CANDRA E
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xa8f; // GUJARATI LETTER E
|
||
|
charset[i++] = 0xa91; // GUJARATI VOWEL CANDRA O
|
||
|
c = 0xa93; // from GUJARATI LETTER O
|
||
|
while (c < 0xaa9) // ..to GUJARATI LETTER NA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xaaa; // from GUJARATI LETTER PA
|
||
|
while (c < 0xab1) // ..to GUJARATI LETTER RA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xab2; // GUJARATI LETTER LA
|
||
|
charset[i++] = 0xab3; // GUJARATI LETTER LLA
|
||
|
c = 0xab5; // from GUJARATI LETTER VA
|
||
|
while (c < 0xaba) // ..to GUJARATI LETTER HA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xabc; // from GUJARATI SIGN NUKTA
|
||
|
while (c < 0xac6) // ..to GUJARATI VOWEL SIGN CANDRA E
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xac7; // GUJARATI VOWEL SIGN E
|
||
|
charset[i++] = 0xac9; // GUJARATI VOWEL SIGN CANDRA O
|
||
|
charset[i++] = 0xacb; // GUJARATI VOWEL SIGN O
|
||
|
charset[i++] = 0xacd; // GUJARATI SIGN VIRAMA
|
||
|
c = 0xae0; // from GUJARATI LETTER VOCALIC RR
|
||
|
while (c < 0xae4) // ..to GUJARATI VOWEL SIGN VOCALIC LL
|
||
|
charset[i++] = c++;
|
||
|
c = 0xae6; // from GUJARATI DIGIT ZERO
|
||
|
while (c < 0xaf0) // ..to GUJARATI DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xb01; // ORIYA SIGN CANDRABINDU
|
||
|
charset[i++] = 0xb03; // ORIYA SIGN VISARGA
|
||
|
c = 0xb05; // from ORIYA LETTER A
|
||
|
while (c < 0xb0d) // ..to ORIYA LETTER VOCALIC L
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xb0f; // ORIYA LETTER E
|
||
|
charset[i++] = 0xb10; // ORIYA LETTER AI
|
||
|
c = 0xb13; // from ORIYA LETTER O
|
||
|
while (c < 0xb29) // ..to ORIYA LETTER NA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xb2a; // from ORIYA LETTER PA
|
||
|
while (c < 0xb31) // ..to ORIYA LETTER RA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xb32; // ORIYA LETTER LA
|
||
|
charset[i++] = 0xb33; // ORIYA LETTER LLA
|
||
|
c = 0xb35; // from ORIYA LETTER VA
|
||
|
while (c < 0xb3a) // ..to ORIYA LETTER HA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xb3c; // from ORIYA SIGN NUKTA
|
||
|
while (c < 0xb45) // ..to ORIYA VOWEL SIGN VOCALIC RR
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xb47; // ORIYA VOWEL SIGN E
|
||
|
charset[i++] = 0xb48; // ORIYA VOWEL SIGN AI
|
||
|
charset[i++] = 0xb4b; // ORIYA VOWEL SIGN O
|
||
|
charset[i++] = 0xb4d; // ORIYA SIGN VIRAMA
|
||
|
charset[i++] = 0xb56; // ORIYA AI LENGTH MARK
|
||
|
charset[i++] = 0xb57; // ORIYA AU LENGTH MARK
|
||
|
charset[i++] = 0xb5c; // ORIYA LETTER RRA
|
||
|
charset[i++] = 0xb5d; // ORIYA LETTER RHA
|
||
|
c = 0xb5f; // from ORIYA LETTER YYA
|
||
|
while (c < 0xb64) // ..to ORIYA VOWEL SIGN VOCALIC LL
|
||
|
charset[i++] = c++;
|
||
|
c = 0xb66; // from ORIYA DIGIT ZERO
|
||
|
while (c < 0xb78) // ..to ORIYA FRACTION THREE SIXTEENTHS
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xb82; // TAMIL SIGN ANUSVARA
|
||
|
charset[i++] = 0xb83; // TAMIL SIGN VISARGA
|
||
|
c = 0xb85; // from TAMIL LETTER A
|
||
|
while (c < 0xb8b) // ..to TAMIL LETTER UU
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xb8e; // TAMIL LETTER E
|
||
|
charset[i++] = 0xb90; // TAMIL LETTER AI
|
||
|
c = 0xb92; // from TAMIL LETTER O
|
||
|
while (c < 0xb96) // ..to TAMIL LETTER KA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xb99; // TAMIL LETTER NGA
|
||
|
charset[i++] = 0xb9a; // TAMIL LETTER CA
|
||
|
charset[i++] = 0xb9e; // TAMIL LETTER NYA
|
||
|
charset[i++] = 0xb9f; // TAMIL LETTER TTA
|
||
|
charset[i++] = 0xba3; // TAMIL LETTER NNA
|
||
|
charset[i++] = 0xba4; // TAMIL LETTER TA
|
||
|
charset[i++] = 0xba8; // TAMIL LETTER NA
|
||
|
charset[i++] = 0xbaa; // TAMIL LETTER PA
|
||
|
c = 0xbae; // from TAMIL LETTER MA
|
||
|
while (c < 0xbba) // ..to TAMIL LETTER HA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xbbe; // from TAMIL VOWEL SIGN AA
|
||
|
while (c < 0xbc3) // ..to TAMIL VOWEL SIGN UU
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xbc6; // TAMIL VOWEL SIGN E
|
||
|
charset[i++] = 0xbc8; // TAMIL VOWEL SIGN AI
|
||
|
c = 0xbca; // from TAMIL VOWEL SIGN O
|
||
|
while (c < 0xbce) // ..to TAMIL SIGN VIRAMA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xbe6; // from TAMIL DIGIT ZERO
|
||
|
while (c < 0xbfb) // ..to TAMIL NUMBER SIGN
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xc01; // TELUGU SIGN CANDRABINDU
|
||
|
charset[i++] = 0xc03; // TELUGU SIGN VISARGA
|
||
|
c = 0xc05; // from TELUGU LETTER A
|
||
|
while (c < 0xc0d) // ..to TELUGU LETTER VOCALIC L
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xc0e; // TELUGU LETTER E
|
||
|
charset[i++] = 0xc10; // TELUGU LETTER AI
|
||
|
c = 0xc12; // from TELUGU LETTER O
|
||
|
while (c < 0xc29) // ..to TELUGU LETTER NA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xc2a; // from TELUGU LETTER PA
|
||
|
while (c < 0xc34) // ..to TELUGU LETTER LLA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xc35; // from TELUGU LETTER VA
|
||
|
while (c < 0xc3a) // ..to TELUGU LETTER HA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xc3d; // from TELUGU SIGN AVAGRAHA
|
||
|
while (c < 0xc45) // ..to TELUGU VOWEL SIGN VOCALIC RR
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xc46; // TELUGU VOWEL SIGN E
|
||
|
charset[i++] = 0xc48; // TELUGU VOWEL SIGN AI
|
||
|
c = 0xc4a; // from TELUGU VOWEL SIGN O
|
||
|
while (c < 0xc4e) // ..to TELUGU SIGN VIRAMA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xc55; // TELUGU LENGTH MARK
|
||
|
charset[i++] = 0xc56; // TELUGU AI LENGTH MARK
|
||
|
charset[i++] = 0xc58; // TELUGU LETTER TSA
|
||
|
charset[i++] = 0xc59; // TELUGU LETTER DZA
|
||
|
c = 0xc60; // from TELUGU LETTER VOCALIC RR
|
||
|
while (c < 0xc64) // ..to TELUGU VOWEL SIGN VOCALIC LL
|
||
|
charset[i++] = c++;
|
||
|
c = 0xc66; // from TELUGU DIGIT ZERO
|
||
|
while (c < 0xc70) // ..to TELUGU DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xc78; // from TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR
|
||
|
while (c < 0xc80) // ..to TELUGU SIGN TUUMU
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xc82; // KANNADA SIGN ANUSVARA
|
||
|
charset[i++] = 0xc83; // KANNADA SIGN VISARGA
|
||
|
c = 0xc85; // from KANNADA LETTER A
|
||
|
while (c < 0xc8d) // ..to KANNADA LETTER VOCALIC L
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xc8e; // KANNADA LETTER E
|
||
|
charset[i++] = 0xc90; // KANNADA LETTER AI
|
||
|
c = 0xc92; // from KANNADA LETTER O
|
||
|
while (c < 0xca9) // ..to KANNADA LETTER NA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xcaa; // from KANNADA LETTER PA
|
||
|
while (c < 0xcb4) // ..to KANNADA LETTER LLA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xcb5; // from KANNADA LETTER VA
|
||
|
while (c < 0xcba) // ..to KANNADA LETTER HA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xcbc; // from KANNADA SIGN NUKTA
|
||
|
while (c < 0xcc5) // ..to KANNADA VOWEL SIGN VOCALIC RR
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xcc6; // KANNADA VOWEL SIGN E
|
||
|
charset[i++] = 0xcc8; // KANNADA VOWEL SIGN AI
|
||
|
c = 0xcca; // from KANNADA VOWEL SIGN O
|
||
|
while (c < 0xcce) // ..to KANNADA SIGN VIRAMA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xcd5; // KANNADA LENGTH MARK
|
||
|
charset[i++] = 0xcd6; // KANNADA AI LENGTH MARK
|
||
|
c = 0xce0; // from KANNADA LETTER VOCALIC RR
|
||
|
while (c < 0xce4) // ..to KANNADA VOWEL SIGN VOCALIC LL
|
||
|
charset[i++] = c++;
|
||
|
c = 0xce6; // from KANNADA DIGIT ZERO
|
||
|
while (c < 0xcf0) // ..to KANNADA DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xcf1; // KANNADA SIGN JIHVAMULIYA
|
||
|
charset[i++] = 0xcf2; // KANNADA SIGN UPADHMANIYA
|
||
|
charset[i++] = 0xd02; // MALAYALAM SIGN ANUSVARA
|
||
|
charset[i++] = 0xd03; // MALAYALAM SIGN VISARGA
|
||
|
c = 0xd05; // from MALAYALAM LETTER A
|
||
|
while (c < 0xd0d) // ..to MALAYALAM LETTER VOCALIC L
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xd0e; // MALAYALAM LETTER E
|
||
|
charset[i++] = 0xd10; // MALAYALAM LETTER AI
|
||
|
c = 0xd12; // from MALAYALAM LETTER O
|
||
|
while (c < 0xd3b) // ..to MALAYALAM LETTER TTTA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xd3d; // from MALAYALAM SIGN AVAGRAHA
|
||
|
while (c < 0xd45) // ..to MALAYALAM VOWEL SIGN VOCALIC RR
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xd46; // MALAYALAM VOWEL SIGN E
|
||
|
charset[i++] = 0xd48; // MALAYALAM VOWEL SIGN AI
|
||
|
c = 0xd4a; // from MALAYALAM VOWEL SIGN O
|
||
|
while (c < 0xd4f) // ..to MALAYALAM LETTER DOT REPH
|
||
|
charset[i++] = c++;
|
||
|
c = 0xd60; // from MALAYALAM LETTER VOCALIC RR
|
||
|
while (c < 0xd64) // ..to MALAYALAM VOWEL SIGN VOCALIC LL
|
||
|
charset[i++] = c++;
|
||
|
c = 0xd66; // from MALAYALAM DIGIT ZERO
|
||
|
while (c < 0xd76) // ..to MALAYALAM FRACTION THREE QUARTERS
|
||
|
charset[i++] = c++;
|
||
|
c = 0xd79; // from MALAYALAM DATE MARK
|
||
|
while (c < 0xd80) // ..to MALAYALAM LETTER CHILLU K
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xd82; // SINHALA SIGN ANUSVARAYA
|
||
|
charset[i++] = 0xd83; // SINHALA SIGN VISARGAYA
|
||
|
c = 0xd85; // from SINHALA LETTER AYANNA
|
||
|
while (c < 0xd97) // ..to SINHALA LETTER AUYANNA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xd9a; // from SINHALA LETTER ALPAPRAANA KAYANNA
|
||
|
while (c < 0xdb2) // ..to SINHALA LETTER DANTAJA NAYANNA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xdb3; // from SINHALA LETTER SANYAKA DAYANNA
|
||
|
while (c < 0xdbc) // ..to SINHALA LETTER RAYANNA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xdc0; // from SINHALA LETTER VAYANNA
|
||
|
while (c < 0xdc7) // ..to SINHALA LETTER FAYANNA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xdcf; // from SINHALA VOWEL SIGN AELA-PILLA
|
||
|
while (c < 0xdd5) // ..to SINHALA VOWEL SIGN KETTI PAA-PILLA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xdd8; // from SINHALA VOWEL SIGN GAETTA-PILLA
|
||
|
while (c < 0xde0) // ..to SINHALA VOWEL SIGN GAYANUKITTA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xdf2; // SINHALA VOWEL SIGN DIGA GAETTA-PILLA
|
||
|
charset[i++] = 0xdf4; // SINHALA PUNCTUATION KUNDDALIYA
|
||
|
c = 0xe01; // from THAI CHARACTER KO KAI
|
||
|
while (c < 0xe3b) // ..to THAI CHARACTER PHINTHU
|
||
|
charset[i++] = c++;
|
||
|
c = 0xe3f; // from THAI CURRENCY SYMBOL BAHT
|
||
|
while (c < 0xe5c) // ..to THAI CHARACTER KHOMUT
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xe81; // LAO LETTER KO
|
||
|
charset[i++] = 0xe82; // LAO LETTER KHO SUNG
|
||
|
charset[i++] = 0xe87; // LAO LETTER NGO
|
||
|
charset[i++] = 0xe88; // LAO LETTER CO
|
||
|
c = 0xe94; // from LAO LETTER DO
|
||
|
while (c < 0xe98) // ..to LAO LETTER THO TAM
|
||
|
charset[i++] = c++;
|
||
|
c = 0xe99; // from LAO LETTER NO
|
||
|
while (c < 0xea0) // ..to LAO LETTER FO SUNG
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xea1; // LAO LETTER MO
|
||
|
charset[i++] = 0xea3; // LAO LETTER LO LING
|
||
|
charset[i++] = 0xeaa; // LAO LETTER SO SUNG
|
||
|
charset[i++] = 0xeab; // LAO LETTER HO SUNG
|
||
|
c = 0xead; // from LAO LETTER O
|
||
|
while (c < 0xeba) // ..to LAO VOWEL SIGN UU
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xebb; // LAO VOWEL SIGN MAI KON
|
||
|
charset[i++] = 0xebd; // LAO SEMIVOWEL SIGN NYO
|
||
|
c = 0xec0; // from LAO VOWEL SIGN E
|
||
|
while (c < 0xec5) // ..to LAO VOWEL SIGN AI
|
||
|
charset[i++] = c++;
|
||
|
c = 0xec8; // from LAO TONE MAI EK
|
||
|
while (c < 0xece) // ..to LAO NIGGAHITA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xed0; // from LAO DIGIT ZERO
|
||
|
while (c < 0xeda) // ..to LAO DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xedc; // LAO HO NO
|
||
|
charset[i++] = 0xedd; // LAO HO MO
|
||
|
c = 0xf00; // from TIBETAN SYLLABLE OM
|
||
|
while (c < 0xf48) // ..to TIBETAN LETTER JA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xf49; // from TIBETAN LETTER NYA
|
||
|
while (c < 0xf6d) // ..to TIBETAN LETTER RRA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xf71; // from TIBETAN VOWEL SIGN AA
|
||
|
while (c < 0xf98) // ..to TIBETAN SUBJOINED LETTER JA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xf99; // from TIBETAN SUBJOINED LETTER NYA
|
||
|
while (c < 0xfbd) // ..to TIBETAN SUBJOINED LETTER FIXED-FORM RA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfbe; // from TIBETAN KU RU KHA
|
||
|
while (c < 0xfcd) // ..to TIBETAN SYMBOL NOR BU BZHI -KHYIL
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfce; // from TIBETAN SIGN RDEL NAG RDEL DKAR
|
||
|
while (c < 0xfdb) // ..to TIBETAN MARK TRAILING MCHAN RTAGS
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1000; // from MYANMAR LETTER KA
|
||
|
while (c < 0x10c6) // ..to GEORGIAN CAPITAL LETTER HOE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10d0; // from GEORGIAN LETTER AN
|
||
|
while (c < 0x10fd) // ..to MODIFIER LETTER GEORGIAN NAR
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1100; // from HANGUL CHOSEONG KIYEOK
|
||
|
while (c < 0x1249) // ..to ETHIOPIC SYLLABLE QWA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x124a; // from ETHIOPIC SYLLABLE QWI
|
||
|
while (c < 0x124e) // ..to ETHIOPIC SYLLABLE QWE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1250; // from ETHIOPIC SYLLABLE QHA
|
||
|
while (c < 0x1257) // ..to ETHIOPIC SYLLABLE QHO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x125a; // from ETHIOPIC SYLLABLE QHWI
|
||
|
while (c < 0x125e) // ..to ETHIOPIC SYLLABLE QHWE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1260; // from ETHIOPIC SYLLABLE BA
|
||
|
while (c < 0x1289) // ..to ETHIOPIC SYLLABLE XWA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x128a; // from ETHIOPIC SYLLABLE XWI
|
||
|
while (c < 0x128e) // ..to ETHIOPIC SYLLABLE XWE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1290; // from ETHIOPIC SYLLABLE NA
|
||
|
while (c < 0x12b1) // ..to ETHIOPIC SYLLABLE KWA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x12b2; // from ETHIOPIC SYLLABLE KWI
|
||
|
while (c < 0x12b6) // ..to ETHIOPIC SYLLABLE KWE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x12b8; // from ETHIOPIC SYLLABLE KXA
|
||
|
while (c < 0x12bf) // ..to ETHIOPIC SYLLABLE KXO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x12c2; // from ETHIOPIC SYLLABLE KXWI
|
||
|
while (c < 0x12c6) // ..to ETHIOPIC SYLLABLE KXWE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x12c8; // from ETHIOPIC SYLLABLE WA
|
||
|
while (c < 0x12d7) // ..to ETHIOPIC SYLLABLE PHARYNGEAL O
|
||
|
charset[i++] = c++;
|
||
|
c = 0x12d8; // from ETHIOPIC SYLLABLE ZA
|
||
|
while (c < 0x1311) // ..to ETHIOPIC SYLLABLE GWA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1312; // from ETHIOPIC SYLLABLE GWI
|
||
|
while (c < 0x1316) // ..to ETHIOPIC SYLLABLE GWE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1318; // from ETHIOPIC SYLLABLE GGA
|
||
|
while (c < 0x135b) // ..to ETHIOPIC SYLLABLE FYA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x135d; // from ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK
|
||
|
while (c < 0x137d) // ..to ETHIOPIC NUMBER TEN THOUSAND
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1380; // from ETHIOPIC SYLLABLE SEBATBEIT MWA
|
||
|
while (c < 0x139a) // ..to ETHIOPIC TONAL MARK KURT
|
||
|
charset[i++] = c++;
|
||
|
c = 0x13a0; // from CHEROKEE LETTER A
|
||
|
while (c < 0x13f5) // ..to CHEROKEE LETTER YV
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1400; // from CANADIAN SYLLABICS HYPHEN
|
||
|
while (c < 0x169d) // ..to OGHAM REVERSED FEATHER MARK
|
||
|
charset[i++] = c++;
|
||
|
c = 0x16a0; // from RUNIC LETTER FEHU FEOH FE F
|
||
|
while (c < 0x16f1) // ..to RUNIC BELGTHOR SYMBOL
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1700; // from TAGALOG LETTER A
|
||
|
while (c < 0x170d) // ..to TAGALOG LETTER YA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x170e; // from TAGALOG LETTER LA
|
||
|
while (c < 0x1715) // ..to TAGALOG SIGN VIRAMA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1720; // from HANUNOO LETTER A
|
||
|
while (c < 0x1737) // ..to PHILIPPINE DOUBLE PUNCTUATION
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1740; // from BUHID LETTER A
|
||
|
while (c < 0x1754) // ..to BUHID VOWEL SIGN U
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1760; // from TAGBANWA LETTER A
|
||
|
while (c < 0x176d) // ..to TAGBANWA LETTER YA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x176e; // TAGBANWA LETTER LA
|
||
|
charset[i++] = 0x1770; // TAGBANWA LETTER SA
|
||
|
charset[i++] = 0x1772; // TAGBANWA VOWEL SIGN I
|
||
|
charset[i++] = 0x1773; // TAGBANWA VOWEL SIGN U
|
||
|
c = 0x1780; // from KHMER LETTER KA
|
||
|
while (c < 0x17de) // ..to KHMER SIGN ATTHACAN
|
||
|
charset[i++] = c++;
|
||
|
c = 0x17e0; // from KHMER DIGIT ZERO
|
||
|
while (c < 0x17ea) // ..to KHMER DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x17f0; // from KHMER SYMBOL LEK ATTAK SON
|
||
|
while (c < 0x17fa) // ..to KHMER SYMBOL LEK ATTAK PRAM-BUON
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1800; // from MONGOLIAN BIRGA
|
||
|
while (c < 0x180f) // ..to MONGOLIAN VOWEL SEPARATOR
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1810; // from MONGOLIAN DIGIT ZERO
|
||
|
while (c < 0x181a) // ..to MONGOLIAN DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1820; // from MONGOLIAN LETTER A
|
||
|
while (c < 0x1878) // ..to MONGOLIAN LETTER MANCHU ZHA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1880; // from MONGOLIAN LETTER ALI GALI ANUSVARA ONE
|
||
|
while (c < 0x18ab) // ..to MONGOLIAN LETTER MANCHU ALI GALI LHA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x18b0; // from CANADIAN SYLLABICS OY
|
||
|
while (c < 0x18f6) // ..to CANADIAN SYLLABICS CARRIER DENTAL S
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1900; // from LIMBU VOWEL-CARRIER LETTER
|
||
|
while (c < 0x191d) // ..to LIMBU LETTER HA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1920; // from LIMBU VOWEL SIGN A
|
||
|
while (c < 0x192c) // ..to LIMBU SUBJOINED LETTER WA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1930; // from LIMBU SMALL LETTER KA
|
||
|
while (c < 0x193c) // ..to LIMBU SIGN SA-I
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1944; // from LIMBU EXCLAMATION MARK
|
||
|
while (c < 0x196e) // ..to TAI LE LETTER AI
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1970; // from TAI LE LETTER TONE-2
|
||
|
while (c < 0x1975) // ..to TAI LE LETTER TONE-6
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1980; // from NEW TAI LUE LETTER HIGH QA
|
||
|
while (c < 0x19ac) // ..to NEW TAI LUE LETTER LOW SUA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x19b0; // from NEW TAI LUE VOWEL SIGN VOWEL SHORTENER
|
||
|
while (c < 0x19ca) // ..to NEW TAI LUE TONE MARK-2
|
||
|
charset[i++] = c++;
|
||
|
c = 0x19d0; // from NEW TAI LUE DIGIT ZERO
|
||
|
while (c < 0x19db) // ..to NEW TAI LUE THAM DIGIT ONE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x19de; // from NEW TAI LUE SIGN LAE
|
||
|
while (c < 0x1a1c) // ..to BUGINESE VOWEL SIGN AE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1a1e; // from BUGINESE PALLAWA
|
||
|
while (c < 0x1a5f) // ..to TAI THAM CONSONANT SIGN SA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1a60; // from TAI THAM SIGN SAKOT
|
||
|
while (c < 0x1a7d) // ..to TAI THAM SIGN KHUEN-LUE KARAN
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1a7f; // from TAI THAM COMBINING CRYPTOGRAMMIC DOT
|
||
|
while (c < 0x1a8a) // ..to TAI THAM HORA DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1a90; // from TAI THAM THAM DIGIT ZERO
|
||
|
while (c < 0x1a9a) // ..to TAI THAM THAM DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1aa0; // from TAI THAM SIGN WIANG
|
||
|
while (c < 0x1aae) // ..to TAI THAM SIGN CAANG
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1b00; // from BALINESE SIGN ULU RICEM
|
||
|
while (c < 0x1b4c) // ..to BALINESE LETTER ASYURA SASAK
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1b50; // from BALINESE DIGIT ZERO
|
||
|
while (c < 0x1b7d) // ..to BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1b80; // from SUNDANESE SIGN PANYECEK
|
||
|
while (c < 0x1bab) // ..to SUNDANESE SIGN PAMAAEH
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1bae; // from SUNDANESE LETTER KHA
|
||
|
while (c < 0x1bba) // ..to SUNDANESE DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1bc0; // from BATAK LETTER A
|
||
|
while (c < 0x1bf4) // ..to BATAK PANONGONAN
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1bfc; // from BATAK SYMBOL BINDU NA METEK
|
||
|
while (c < 0x1c38) // ..to LEPCHA SIGN NUKTA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1c3b; // from LEPCHA PUNCTUATION TA-ROL
|
||
|
while (c < 0x1c4a) // ..to LEPCHA DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1c4d; // from LEPCHA LETTER TTA
|
||
|
while (c < 0x1c80) // ..to OL CHIKI PUNCTUATION DOUBLE MUCAAD
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1cd0; // from VEDIC TONE KARSHANA
|
||
|
while (c < 0x1cf3) // ..to VEDIC SIGN ARDHAVISARGA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1d00; // from LATIN LETTER SMALL CAPITAL A
|
||
|
while (c < 0x1de7) // ..to COMBINING LATIN SMALL LETTER Z
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1dfc; // from COMBINING DOUBLE INVERTED BREVE BELOW
|
||
|
while (c < 0x1f16) // ..to GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f18; // from GREEK CAPITAL LETTER EPSILON WITH PSILI
|
||
|
while (c < 0x1f1e) // ..to GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f20; // from GREEK SMALL LETTER ETA WITH PSILI
|
||
|
while (c < 0x1f46) // ..to GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f48; // from GREEK CAPITAL LETTER OMICRON WITH PSILI
|
||
|
while (c < 0x1f4e) // ..to GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f50; // from GREEK SMALL LETTER UPSILON WITH PSILI
|
||
|
while (c < 0x1f58) // ..to GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f5f; // from GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
|
||
|
while (c < 0x1f7e) // ..to GREEK SMALL LETTER OMEGA WITH OXIA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f80; // from GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
|
||
|
while (c < 0x1fb5) // ..to GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1fb6; // from GREEK SMALL LETTER ALPHA WITH PERISPOMENI
|
||
|
while (c < 0x1fc5) // ..to GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1fc6; // from GREEK SMALL LETTER ETA WITH PERISPOMENI
|
||
|
while (c < 0x1fd4) // ..to GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1fd6; // from GREEK SMALL LETTER IOTA WITH PERISPOMENI
|
||
|
while (c < 0x1fdc) // ..to GREEK CAPITAL LETTER IOTA WITH OXIA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1fdd; // from GREEK DASIA AND VARIA
|
||
|
while (c < 0x1ff0) // ..to GREEK VARIA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x1ff2; // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
|
||
|
charset[i++] = 0x1ff4; // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
|
||
|
c = 0x1ff6; // from GREEK SMALL LETTER OMEGA WITH PERISPOMENI
|
||
|
while (c < 0x1fff) // ..to GREEK DASIA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2000; // from EN QUAD
|
||
|
while (c < 0x2065) // ..to INVISIBLE PLUS
|
||
|
charset[i++] = c++;
|
||
|
c = 0x206a; // from INHIBIT SYMMETRIC SWAPPING
|
||
|
while (c < 0x2072) // ..to SUPERSCRIPT LATIN SMALL LETTER I
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2074; // from SUPERSCRIPT FOUR
|
||
|
while (c < 0x208f) // ..to SUBSCRIPT RIGHT PARENTHESIS
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2090; // from LATIN SUBSCRIPT SMALL LETTER A
|
||
|
while (c < 0x209d) // ..to LATIN SUBSCRIPT SMALL LETTER T
|
||
|
charset[i++] = c++;
|
||
|
c = 0x20a0; // from EURO-CURRENCY SIGN
|
||
|
while (c < 0x20ba) // ..to INDIAN RUPEE SIGN
|
||
|
charset[i++] = c++;
|
||
|
c = 0x20d0; // from COMBINING LEFT HARPOON ABOVE
|
||
|
while (c < 0x20f1) // ..to COMBINING ASTERISK ABOVE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2100; // from ACCOUNT OF
|
||
|
while (c < 0x218a) // ..to VULGAR FRACTION ZERO THIRDS
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2190; // from LEFTWARDS ARROW
|
||
|
while (c < 0x23f4) // ..to HOURGLASS WITH FLOWING SAND
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2400; // from SYMBOL FOR NULL
|
||
|
while (c < 0x2427) // ..to SYMBOL FOR SUBSTITUTE FORM TWO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2440; // from OCR HOOK
|
||
|
while (c < 0x244b) // ..to OCR DOUBLE BACKSLASH
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2460; // from CIRCLED DIGIT ONE
|
||
|
while (c < 0x2700) // ..to WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2701; // from UPPER BLADE SCISSORS
|
||
|
while (c < 0x27cb) // ..to VERTICAL BAR WITH HORIZONTAL STROKE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x27ce; // from SQUARED LOGICAL AND
|
||
|
while (c < 0x2b4d) // ..to RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2b50; // from WHITE MEDIUM STAR
|
||
|
while (c < 0x2b5a) // ..to HEAVY CIRCLED SALTIRE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2c00; // from GLAGOLITIC CAPITAL LETTER AZU
|
||
|
while (c < 0x2c2f) // ..to GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2c30; // from GLAGOLITIC SMALL LETTER AZU
|
||
|
while (c < 0x2c5f) // ..to GLAGOLITIC SMALL LETTER LATINATE MYSLITE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2c60; // from LATIN CAPITAL LETTER L WITH DOUBLE BAR
|
||
|
while (c < 0x2cf2) // ..to COPTIC COMBINING SPIRITUS LENIS
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2cf9; // from COPTIC OLD NUBIAN FULL STOP
|
||
|
while (c < 0x2d26) // ..to GEORGIAN SMALL LETTER HOE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2d30; // from TIFINAGH LETTER YA
|
||
|
while (c < 0x2d66) // ..to TIFINAGH LETTER YAZZ
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x2d6f; // TIFINAGH MODIFIER LETTER LABIALIZATION MARK
|
||
|
charset[i++] = 0x2d70; // TIFINAGH SEPARATOR MARK
|
||
|
c = 0x2d7f; // from TIFINAGH CONSONANT JOINER
|
||
|
while (c < 0x2d97) // ..to ETHIOPIC SYLLABLE GGWE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2da0; // from ETHIOPIC SYLLABLE SSA
|
||
|
while (c < 0x2da7) // ..to ETHIOPIC SYLLABLE SSO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2da8; // from ETHIOPIC SYLLABLE CCA
|
||
|
while (c < 0x2daf) // ..to ETHIOPIC SYLLABLE CCO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2db0; // from ETHIOPIC SYLLABLE ZZA
|
||
|
while (c < 0x2db7) // ..to ETHIOPIC SYLLABLE ZZO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2db8; // from ETHIOPIC SYLLABLE CCHA
|
||
|
while (c < 0x2dbf) // ..to ETHIOPIC SYLLABLE CCHO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2dc0; // from ETHIOPIC SYLLABLE QYA
|
||
|
while (c < 0x2dc7) // ..to ETHIOPIC SYLLABLE QYO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2dc8; // from ETHIOPIC SYLLABLE KYA
|
||
|
while (c < 0x2dcf) // ..to ETHIOPIC SYLLABLE KYO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2dd0; // from ETHIOPIC SYLLABLE XYA
|
||
|
while (c < 0x2dd7) // ..to ETHIOPIC SYLLABLE XYO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2dd8; // from ETHIOPIC SYLLABLE GYA
|
||
|
while (c < 0x2ddf) // ..to ETHIOPIC SYLLABLE GYO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2de0; // from COMBINING CYRILLIC LETTER BE
|
||
|
while (c < 0x2e32) // ..to WORD SEPARATOR MIDDLE DOT
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2e80; // from CJK RADICAL REPEAT
|
||
|
while (c < 0x2e9a) // ..to CJK RADICAL RAP
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2e9b; // from CJK RADICAL CHOKE
|
||
|
while (c < 0x2ef4) // ..to CJK RADICAL C-SIMPLIFIED TURTLE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2f00; // from KANGXI RADICAL ONE
|
||
|
while (c < 0x2fd6) // ..to KANGXI RADICAL FLUTE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2ff0; // from IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT
|
||
|
while (c < 0x2ffc) // ..to IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
|
||
|
charset[i++] = c++;
|
||
|
c = 0x3000; // from IDEOGRAPHIC SPACE
|
||
|
while (c < 0x3040) // ..to IDEOGRAPHIC HALF FILL SPACE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x3041; // from HIRAGANA LETTER SMALL A
|
||
|
while (c < 0x3097) // ..to HIRAGANA LETTER SMALL KE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x3099; // from COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
|
||
|
while (c < 0x3100) // ..to KATAKANA DIGRAPH KOTO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x3105; // from BOPOMOFO LETTER B
|
||
|
while (c < 0x312e) // ..to BOPOMOFO LETTER IH
|
||
|
charset[i++] = c++;
|
||
|
c = 0x3131; // from HANGUL LETTER KIYEOK
|
||
|
while (c < 0x318f) // ..to HANGUL LETTER ARAEAE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x3190; // from IDEOGRAPHIC ANNOTATION LINKING MARK
|
||
|
while (c < 0x31bb) // ..to BOPOMOFO LETTER ZY
|
||
|
charset[i++] = c++;
|
||
|
c = 0x31c0; // from CJK STROKE T
|
||
|
while (c < 0x31e4) // ..to CJK STROKE Q
|
||
|
charset[i++] = c++;
|
||
|
c = 0x31f0; // from KATAKANA LETTER SMALL KU
|
||
|
while (c < 0x321f) // ..to PARENTHESIZED KOREAN CHARACTER O HU
|
||
|
charset[i++] = c++;
|
||
|
c = 0x3220; // from PARENTHESIZED IDEOGRAPH ONE
|
||
|
while (c < 0x32ff) // ..to CIRCLED KATAKANA WO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x3300; // from SQUARE APAATO
|
||
|
while (c < 0x3400) // ..to SQUARE GAL
|
||
|
charset[i++] = c++;
|
||
|
c = 0x3400; // from <CJK Ideograph Extension A, First>
|
||
|
while (c < 0x4db6) // ..to <CJK Ideograph Extension A, Last>
|
||
|
charset[i++] = c++;
|
||
|
c = 0x4dc0; // from HEXAGRAM FOR THE CREATIVE HEAVEN
|
||
|
while (c < 0x4e00) // ..to HEXAGRAM FOR BEFORE COMPLETION
|
||
|
charset[i++] = c++;
|
||
|
c = 0x4e00; // from <CJK Ideograph, First>
|
||
|
while (c < 0x9fcc) // ..to <CJK Ideograph, Last>
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa000; // from YI SYLLABLE IT
|
||
|
while (c < 0xa48d) // ..to YI SYLLABLE YYR
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa490; // from YI RADICAL QOT
|
||
|
while (c < 0xa4c7) // ..to YI RADICAL KE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa4d0; // from LISU LETTER BA
|
||
|
while (c < 0xa62c) // ..to VAI SYLLABLE NDOLE DO
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa640; // from CYRILLIC CAPITAL LETTER ZEMLYA
|
||
|
while (c < 0xa674) // ..to SLAVONIC ASTERISK
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa67c; // from COMBINING CYRILLIC KAVYKA
|
||
|
while (c < 0xa698) // ..to CYRILLIC SMALL LETTER SHWE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa6a0; // from BAMUM LETTER A
|
||
|
while (c < 0xa6f8) // ..to BAMUM QUESTION MARK
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa700; // from MODIFIER LETTER CHINESE TONE YIN PING
|
||
|
while (c < 0xa78f) // ..to LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xa790; // LATIN CAPITAL LETTER N WITH DESCENDER
|
||
|
charset[i++] = 0xa791; // LATIN SMALL LETTER N WITH DESCENDER
|
||
|
c = 0xa7a0; // from LATIN CAPITAL LETTER G WITH OBLIQUE STROKE
|
||
|
while (c < 0xa7aa) // ..to LATIN SMALL LETTER S WITH OBLIQUE STROKE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa7fa; // from LATIN LETTER SMALL CAPITAL TURNED M
|
||
|
while (c < 0xa82c) // ..to SYLOTI NAGRI POETRY MARK-4
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa830; // from NORTH INDIC FRACTION ONE QUARTER
|
||
|
while (c < 0xa83a) // ..to NORTH INDIC QUANTITY MARK
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa840; // from PHAGS-PA LETTER KA
|
||
|
while (c < 0xa878) // ..to PHAGS-PA MARK DOUBLE SHAD
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa880; // from SAURASHTRA SIGN ANUSVARA
|
||
|
while (c < 0xa8c5) // ..to SAURASHTRA SIGN VIRAMA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa8ce; // from SAURASHTRA DANDA
|
||
|
while (c < 0xa8da) // ..to SAURASHTRA DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa8e0; // from COMBINING DEVANAGARI DIGIT ZERO
|
||
|
while (c < 0xa8fc) // ..to DEVANAGARI HEADSTROKE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa900; // from KAYAH LI DIGIT ZERO
|
||
|
while (c < 0xa954) // ..to REJANG VIRAMA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa95f; // from REJANG SECTION MARK
|
||
|
while (c < 0xa97d) // ..to HANGUL CHOSEONG SSANGYEORINHIEUH
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa980; // from JAVANESE SIGN PANYANGGA
|
||
|
while (c < 0xa9ce) // ..to JAVANESE TURNED PADA PISELEH
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa9cf; // from JAVANESE PANGRANGKEP
|
||
|
while (c < 0xa9da) // ..to JAVANESE DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xa9de; // JAVANESE PADA TIRTA TUMETES
|
||
|
charset[i++] = 0xa9df; // JAVANESE PADA ISEN-ISEN
|
||
|
c = 0xaa00; // from CHAM LETTER A
|
||
|
while (c < 0xaa37) // ..to CHAM CONSONANT SIGN WA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xaa40; // from CHAM LETTER FINAL K
|
||
|
while (c < 0xaa4e) // ..to CHAM CONSONANT SIGN FINAL H
|
||
|
charset[i++] = c++;
|
||
|
c = 0xaa50; // from CHAM DIGIT ZERO
|
||
|
while (c < 0xaa5a) // ..to CHAM DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xaa5c; // from CHAM PUNCTUATION SPIRAL
|
||
|
while (c < 0xaa7c) // ..to MYANMAR SIGN PAO KAREN TONE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xaa80; // from TAI VIET LETTER LOW KO
|
||
|
while (c < 0xaac3) // ..to TAI VIET TONE MAI SONG
|
||
|
charset[i++] = c++;
|
||
|
c = 0xaadb; // from TAI VIET SYMBOL KON
|
||
|
while (c < 0xaae0) // ..to TAI VIET SYMBOL KOI KOI
|
||
|
charset[i++] = c++;
|
||
|
c = 0xab01; // from ETHIOPIC SYLLABLE TTHU
|
||
|
while (c < 0xab07) // ..to ETHIOPIC SYLLABLE TTHO
|
||
|
charset[i++] = c++;
|
||
|
c = 0xab09; // from ETHIOPIC SYLLABLE DDHU
|
||
|
while (c < 0xab0f) // ..to ETHIOPIC SYLLABLE DDHO
|
||
|
charset[i++] = c++;
|
||
|
c = 0xab11; // from ETHIOPIC SYLLABLE DZU
|
||
|
while (c < 0xab17) // ..to ETHIOPIC SYLLABLE DZO
|
||
|
charset[i++] = c++;
|
||
|
c = 0xab20; // from ETHIOPIC SYLLABLE CCHHA
|
||
|
while (c < 0xab27) // ..to ETHIOPIC SYLLABLE CCHHO
|
||
|
charset[i++] = c++;
|
||
|
c = 0xab28; // from ETHIOPIC SYLLABLE BBA
|
||
|
while (c < 0xab2f) // ..to ETHIOPIC SYLLABLE BBO
|
||
|
charset[i++] = c++;
|
||
|
c = 0xabc0; // from MEETEI MAYEK LETTER KOK
|
||
|
while (c < 0xabee) // ..to MEETEI MAYEK APUN IYEK
|
||
|
charset[i++] = c++;
|
||
|
c = 0xabf0; // from MEETEI MAYEK DIGIT ZERO
|
||
|
while (c < 0xabfa) // ..to MEETEI MAYEK DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xac00; // from <Hangul Syllable, First>
|
||
|
while (c < 0xd7a4) // ..to <Hangul Syllable, Last>
|
||
|
charset[i++] = c++;
|
||
|
c = 0xd7b0; // from HANGUL JUNGSEONG O-YEO
|
||
|
while (c < 0xd7c7) // ..to HANGUL JUNGSEONG ARAEA-E
|
||
|
charset[i++] = c++;
|
||
|
c = 0xd7cb; // from HANGUL JONGSEONG NIEUN-RIEUL
|
||
|
while (c < 0xd7fc) // ..to HANGUL JONGSEONG PHIEUPH-THIEUTH
|
||
|
charset[i++] = c++;
|
||
|
c = 0xf900; // from CJK COMPATIBILITY IDEOGRAPH-F900
|
||
|
while (c < 0xfa2e) // ..to CJK COMPATIBILITY IDEOGRAPH-FA2D
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfa30; // from CJK COMPATIBILITY IDEOGRAPH-FA30
|
||
|
while (c < 0xfa6e) // ..to CJK COMPATIBILITY IDEOGRAPH-FA6D
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfa70; // from CJK COMPATIBILITY IDEOGRAPH-FA70
|
||
|
while (c < 0xfada) // ..to CJK COMPATIBILITY IDEOGRAPH-FAD9
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfb00; // from LATIN SMALL LIGATURE FF
|
||
|
while (c < 0xfb07) // ..to LATIN SMALL LIGATURE ST
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfb13; // from ARMENIAN SMALL LIGATURE MEN NOW
|
||
|
while (c < 0xfb18) // ..to ARMENIAN SMALL LIGATURE MEN XEH
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfb1d; // from HEBREW LETTER YOD WITH HIRIQ
|
||
|
while (c < 0xfb37) // ..to HEBREW LETTER ZAYIN WITH DAGESH
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfb38; // from HEBREW LETTER TET WITH DAGESH
|
||
|
while (c < 0xfb3d) // ..to HEBREW LETTER LAMED WITH DAGESH
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xfb40; // HEBREW LETTER NUN WITH DAGESH
|
||
|
charset[i++] = 0xfb41; // HEBREW LETTER SAMEKH WITH DAGESH
|
||
|
charset[i++] = 0xfb43; // HEBREW LETTER FINAL PE WITH DAGESH
|
||
|
charset[i++] = 0xfb44; // HEBREW LETTER PE WITH DAGESH
|
||
|
c = 0xfb46; // from HEBREW LETTER TSADI WITH DAGESH
|
||
|
while (c < 0xfbc2) // ..to ARABIC SYMBOL SMALL TAH BELOW
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfbd3; // from ARABIC LETTER NG ISOLATED FORM
|
||
|
while (c < 0xfd40) // ..to ORNATE RIGHT PARENTHESIS
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfd50; // from ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM
|
||
|
while (c < 0xfd90) // ..to ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfd92; // from ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM
|
||
|
while (c < 0xfdc8) // ..to ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfdf0; // from ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM
|
||
|
while (c < 0xfdfe) // ..to ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfe00; // from VARIATION SELECTOR-1
|
||
|
while (c < 0xfe1a) // ..to PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfe20; // from COMBINING LIGATURE LEFT HALF
|
||
|
while (c < 0xfe27) // ..to COMBINING CONJOINING MACRON
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfe30; // from PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
|
||
|
while (c < 0xfe53) // ..to SMALL FULL STOP
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfe54; // from SMALL SEMICOLON
|
||
|
while (c < 0xfe67) // ..to SMALL EQUALS SIGN
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfe68; // from SMALL REVERSE SOLIDUS
|
||
|
while (c < 0xfe6c) // ..to SMALL COMMERCIAL AT
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfe70; // from ARABIC FATHATAN ISOLATED FORM
|
||
|
while (c < 0xfe75) // ..to ARABIC KASRATAN ISOLATED FORM
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfe76; // from ARABIC FATHA ISOLATED FORM
|
||
|
while (c < 0xfefd) // ..to ARABIC LIGATURE LAM WITH ALEF FINAL FORM
|
||
|
charset[i++] = c++;
|
||
|
c = 0xff01; // from FULLWIDTH EXCLAMATION MARK
|
||
|
while (c < 0xffbf) // ..to HALFWIDTH HANGUL LETTER HIEUH
|
||
|
charset[i++] = c++;
|
||
|
c = 0xffc2; // from HALFWIDTH HANGUL LETTER A
|
||
|
while (c < 0xffc8) // ..to HALFWIDTH HANGUL LETTER E
|
||
|
charset[i++] = c++;
|
||
|
c = 0xffca; // from HALFWIDTH HANGUL LETTER YEO
|
||
|
while (c < 0xffd0) // ..to HALFWIDTH HANGUL LETTER OE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xffd2; // from HALFWIDTH HANGUL LETTER YO
|
||
|
while (c < 0xffd8) // ..to HALFWIDTH HANGUL LETTER YU
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xffda; // HALFWIDTH HANGUL LETTER EU
|
||
|
charset[i++] = 0xffdc; // HALFWIDTH HANGUL LETTER I
|
||
|
c = 0xffe0; // from FULLWIDTH CENT SIGN
|
||
|
while (c < 0xffe7) // ..to FULLWIDTH WON SIGN
|
||
|
charset[i++] = c++;
|
||
|
c = 0xffe8; // from HALFWIDTH FORMS LIGHT VERTICAL
|
||
|
while (c < 0xffef) // ..to HALFWIDTH WHITE CIRCLE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfff9; // from INTERLINEAR ANNOTATION ANCHOR
|
||
|
while (c < 0xfffe) // ..to REPLACEMENT CHARACTER
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10000; // from LINEAR B SYLLABLE B008 A
|
||
|
while (c < 0x1000c) // ..to LINEAR B SYLLABLE B046 JE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1000d; // from LINEAR B SYLLABLE B036 JO
|
||
|
while (c < 0x10027) // ..to LINEAR B SYLLABLE B032 QO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10028; // from LINEAR B SYLLABLE B060 RA
|
||
|
while (c < 0x1003b) // ..to LINEAR B SYLLABLE B042 WO
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x1003c; // LINEAR B SYLLABLE B017 ZA
|
||
|
charset[i++] = 0x1003d; // LINEAR B SYLLABLE B074 ZE
|
||
|
c = 0x1003f; // from LINEAR B SYLLABLE B020 ZO
|
||
|
while (c < 0x1004e) // ..to LINEAR B SYLLABLE B091 TWO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10050; // from LINEAR B SYMBOL B018
|
||
|
while (c < 0x1005e) // ..to LINEAR B SYMBOL B089
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10080; // from LINEAR B IDEOGRAM B100 MAN
|
||
|
while (c < 0x100fb) // ..to LINEAR B IDEOGRAM VESSEL B305
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x10100; // AEGEAN WORD SEPARATOR LINE
|
||
|
charset[i++] = 0x10102; // AEGEAN CHECK MARK
|
||
|
c = 0x10107; // from AEGEAN NUMBER ONE
|
||
|
while (c < 0x10134) // ..to AEGEAN NUMBER NINETY THOUSAND
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10137; // from AEGEAN WEIGHT BASE UNIT
|
||
|
while (c < 0x1018b) // ..to GREEK ZERO SIGN
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10190; // from ROMAN SEXTANS SIGN
|
||
|
while (c < 0x1019c) // ..to ROMAN CENTURIAL SIGN
|
||
|
charset[i++] = c++;
|
||
|
c = 0x101d0; // from PHAISTOS DISC SIGN PEDESTRIAN
|
||
|
while (c < 0x101fe) // ..to PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10280; // from LYCIAN LETTER A
|
||
|
while (c < 0x1029d) // ..to LYCIAN LETTER X
|
||
|
charset[i++] = c++;
|
||
|
c = 0x102a0; // from CARIAN LETTER A
|
||
|
while (c < 0x102d1) // ..to CARIAN LETTER UUU3
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10300; // from OLD ITALIC LETTER A
|
||
|
while (c < 0x1031f) // ..to OLD ITALIC LETTER UU
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10320; // from OLD ITALIC NUMERAL ONE
|
||
|
while (c < 0x10324) // ..to OLD ITALIC NUMERAL FIFTY
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10330; // from GOTHIC LETTER AHSA
|
||
|
while (c < 0x1034b) // ..to GOTHIC LETTER NINE HUNDRED
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10380; // from UGARITIC LETTER ALPA
|
||
|
while (c < 0x1039e) // ..to UGARITIC LETTER SSU
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1039f; // from UGARITIC WORD DIVIDER
|
||
|
while (c < 0x103c4) // ..to OLD PERSIAN SIGN HA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x103c8; // from OLD PERSIAN SIGN AURAMAZDAA
|
||
|
while (c < 0x103d6) // ..to OLD PERSIAN NUMBER HUNDRED
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10400; // from DESERET CAPITAL LETTER LONG I
|
||
|
while (c < 0x1049e) // ..to OSMANYA LETTER OO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x104a0; // from OSMANYA DIGIT ZERO
|
||
|
while (c < 0x104aa) // ..to OSMANYA DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10800; // from CYPRIOT SYLLABLE A
|
||
|
while (c < 0x10806) // ..to CYPRIOT SYLLABLE JA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1080a; // from CYPRIOT SYLLABLE KA
|
||
|
while (c < 0x10836) // ..to CYPRIOT SYLLABLE WO
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x10837; // CYPRIOT SYLLABLE XA
|
||
|
charset[i++] = 0x10838; // CYPRIOT SYLLABLE XE
|
||
|
c = 0x1083f; // from CYPRIOT SYLLABLE ZO
|
||
|
while (c < 0x10856) // ..to IMPERIAL ARAMAIC LETTER TAW
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10857; // from IMPERIAL ARAMAIC SECTION SIGN
|
||
|
while (c < 0x10860) // ..to IMPERIAL ARAMAIC NUMBER TEN THOUSAND
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10900; // from PHOENICIAN LETTER ALF
|
||
|
while (c < 0x1091c) // ..to PHOENICIAN NUMBER THREE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1091f; // from PHOENICIAN WORD SEPARATOR
|
||
|
while (c < 0x1093a) // ..to LYDIAN LETTER C
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10a00; // from KHAROSHTHI LETTER A
|
||
|
while (c < 0x10a04) // ..to KHAROSHTHI VOWEL SIGN VOCALIC R
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x10a05; // KHAROSHTHI VOWEL SIGN E
|
||
|
charset[i++] = 0x10a06; // KHAROSHTHI VOWEL SIGN O
|
||
|
c = 0x10a0c; // from KHAROSHTHI VOWEL LENGTH MARK
|
||
|
while (c < 0x10a14) // ..to KHAROSHTHI LETTER GHA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x10a15; // KHAROSHTHI LETTER CA
|
||
|
charset[i++] = 0x10a17; // KHAROSHTHI LETTER JA
|
||
|
c = 0x10a19; // from KHAROSHTHI LETTER NYA
|
||
|
while (c < 0x10a34) // ..to KHAROSHTHI LETTER TTTHA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x10a38; // KHAROSHTHI SIGN BAR ABOVE
|
||
|
charset[i++] = 0x10a3a; // KHAROSHTHI SIGN DOT BELOW
|
||
|
c = 0x10a3f; // from KHAROSHTHI VIRAMA
|
||
|
while (c < 0x10a48) // ..to KHAROSHTHI NUMBER ONE THOUSAND
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10a50; // from KHAROSHTHI PUNCTUATION DOT
|
||
|
while (c < 0x10a59) // ..to KHAROSHTHI PUNCTUATION LINES
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10a60; // from OLD SOUTH ARABIAN LETTER HE
|
||
|
while (c < 0x10a80) // ..to OLD SOUTH ARABIAN NUMERIC INDICATOR
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10b00; // from AVESTAN LETTER A
|
||
|
while (c < 0x10b36) // ..to AVESTAN LETTER HE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10b39; // from AVESTAN ABBREVIATION MARK
|
||
|
while (c < 0x10b56) // ..to INSCRIPTIONAL PARTHIAN LETTER TAW
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10b58; // from INSCRIPTIONAL PARTHIAN NUMBER ONE
|
||
|
while (c < 0x10b73) // ..to INSCRIPTIONAL PAHLAVI LETTER TAW
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10b78; // from INSCRIPTIONAL PAHLAVI NUMBER ONE
|
||
|
while (c < 0x10b80) // ..to INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10c00; // from OLD TURKIC LETTER ORKHON A
|
||
|
while (c < 0x10c49) // ..to OLD TURKIC LETTER ORKHON BASH
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10e60; // from RUMI DIGIT ONE
|
||
|
while (c < 0x10e7f) // ..to RUMI FRACTION TWO THIRDS
|
||
|
charset[i++] = c++;
|
||
|
c = 0x11000; // from BRAHMI SIGN CANDRABINDU
|
||
|
while (c < 0x1104e) // ..to BRAHMI PUNCTUATION LOTUS
|
||
|
charset[i++] = c++;
|
||
|
c = 0x11052; // from BRAHMI NUMBER ONE
|
||
|
while (c < 0x11070) // ..to BRAHMI DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x11080; // from KAITHI SIGN CANDRABINDU
|
||
|
while (c < 0x110c2) // ..to KAITHI DOUBLE DANDA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x12000; // from CUNEIFORM SIGN A
|
||
|
while (c < 0x1236f) // ..to CUNEIFORM SIGN ZUM
|
||
|
charset[i++] = c++;
|
||
|
c = 0x12400; // from CUNEIFORM NUMERIC SIGN TWO ASH
|
||
|
while (c < 0x12463) // ..to CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER
|
||
|
charset[i++] = c++;
|
||
|
c = 0x12470; // from CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER
|
||
|
while (c < 0x12474) // ..to CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON
|
||
|
charset[i++] = c++;
|
||
|
c = 0x13000; // from EGYPTIAN HIEROGLYPH A001
|
||
|
while (c < 0x1342f) // ..to EGYPTIAN HIEROGLYPH AA032
|
||
|
charset[i++] = c++;
|
||
|
c = 0x16800; // from BAMUM LETTER PHASE-A NGKUE MFON
|
||
|
while (c < 0x16a39) // ..to BAMUM LETTER PHASE-F VUEQ
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x1b000; // KATAKANA LETTER ARCHAIC E
|
||
|
charset[i++] = 0x1b001; // HIRAGANA LETTER ARCHAIC YE
|
||
|
c = 0x1d000; // from BYZANTINE MUSICAL SYMBOL PSILI
|
||
|
while (c < 0x1d0f6) // ..to BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1d100; // from MUSICAL SYMBOL SINGLE BARLINE
|
||
|
while (c < 0x1d127) // ..to MUSICAL SYMBOL DRUM CLEF-2
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1d129; // from MUSICAL SYMBOL MULTIPLE MEASURE REST
|
||
|
while (c < 0x1d1de) // ..to MUSICAL SYMBOL PES SUBPUNCTIS
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1d200; // from GREEK VOCAL NOTATION SYMBOL-1
|
||
|
while (c < 0x1d246) // ..to GREEK MUSICAL LEIMMA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1d300; // from MONOGRAM FOR EARTH
|
||
|
while (c < 0x1d357) // ..to TETRAGRAM FOR FOSTERING
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1d360; // from COUNTING ROD UNIT DIGIT ONE
|
||
|
while (c < 0x1d372) // ..to COUNTING ROD TENS DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1d400; // from MATHEMATICAL BOLD CAPITAL A
|
||
|
while (c < 0x1d455) // ..to MATHEMATICAL ITALIC SMALL G
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1d456; // from MATHEMATICAL ITALIC SMALL I
|
||
|
while (c < 0x1d49d) // ..to MATHEMATICAL SCRIPT CAPITAL A
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x1d49e; // MATHEMATICAL SCRIPT CAPITAL C
|
||
|
charset[i++] = 0x1d49f; // MATHEMATICAL SCRIPT CAPITAL D
|
||
|
charset[i++] = 0x1d4a5; // MATHEMATICAL SCRIPT CAPITAL J
|
||
|
charset[i++] = 0x1d4a6; // MATHEMATICAL SCRIPT CAPITAL K
|
||
|
c = 0x1d4a9; // from MATHEMATICAL SCRIPT CAPITAL N
|
||
|
while (c < 0x1d4ad) // ..to MATHEMATICAL SCRIPT CAPITAL Q
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1d4ae; // from MATHEMATICAL SCRIPT CAPITAL S
|
||
|
while (c < 0x1d4ba) // ..to MATHEMATICAL SCRIPT SMALL D
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1d4bd; // from MATHEMATICAL SCRIPT SMALL H
|
||
|
while (c < 0x1d4c4) // ..to MATHEMATICAL SCRIPT SMALL N
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1d4c5; // from MATHEMATICAL SCRIPT SMALL P
|
||
|
while (c < 0x1d506) // ..to MATHEMATICAL FRAKTUR CAPITAL B
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1d507; // from MATHEMATICAL FRAKTUR CAPITAL D
|
||
|
while (c < 0x1d50b) // ..to MATHEMATICAL FRAKTUR CAPITAL G
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1d50d; // from MATHEMATICAL FRAKTUR CAPITAL J
|
||
|
while (c < 0x1d515) // ..to MATHEMATICAL FRAKTUR CAPITAL Q
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1d516; // from MATHEMATICAL FRAKTUR CAPITAL S
|
||
|
while (c < 0x1d51d) // ..to MATHEMATICAL FRAKTUR CAPITAL Y
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1d51e; // from MATHEMATICAL FRAKTUR SMALL A
|
||
|
while (c < 0x1d53a) // ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL B
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1d53b; // from MATHEMATICAL DOUBLE-STRUCK CAPITAL D
|
||
|
while (c < 0x1d53f) // ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL G
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1d540; // from MATHEMATICAL DOUBLE-STRUCK CAPITAL I
|
||
|
while (c < 0x1d545) // ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL M
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1d54a; // from MATHEMATICAL DOUBLE-STRUCK CAPITAL S
|
||
|
while (c < 0x1d551) // ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL Y
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1d552; // from MATHEMATICAL DOUBLE-STRUCK SMALL A
|
||
|
while (c < 0x1d6a6) // ..to MATHEMATICAL ITALIC SMALL DOTLESS J
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1d6a8; // from MATHEMATICAL BOLD CAPITAL ALPHA
|
||
|
while (c < 0x1d7cc) // ..to MATHEMATICAL BOLD SMALL DIGAMMA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1d7ce; // from MATHEMATICAL BOLD DIGIT ZERO
|
||
|
while (c < 0x1d800) // ..to MATHEMATICAL MONOSPACE DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f000; // from MAHJONG TILE EAST WIND
|
||
|
while (c < 0x1f02c) // ..to MAHJONG TILE BACK
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f030; // from DOMINO TILE HORIZONTAL BACK
|
||
|
while (c < 0x1f094) // ..to DOMINO TILE VERTICAL-06-06
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f0a0; // from PLAYING CARD BACK
|
||
|
while (c < 0x1f0af) // ..to PLAYING CARD KING OF SPADES
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f0b1; // from PLAYING CARD ACE OF HEARTS
|
||
|
while (c < 0x1f0bf) // ..to PLAYING CARD KING OF HEARTS
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f0c1; // from PLAYING CARD ACE OF DIAMONDS
|
||
|
while (c < 0x1f0d0) // ..to PLAYING CARD BLACK JOKER
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f0d1; // from PLAYING CARD ACE OF CLUBS
|
||
|
while (c < 0x1f0e0) // ..to PLAYING CARD WHITE JOKER
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f100; // from DIGIT ZERO FULL STOP
|
||
|
while (c < 0x1f10b) // ..to DIGIT NINE COMMA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f110; // from PARENTHESIZED LATIN CAPITAL LETTER A
|
||
|
while (c < 0x1f12f) // ..to CIRCLED WZ
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f130; // from SQUARED LATIN CAPITAL LETTER A
|
||
|
while (c < 0x1f16a) // ..to NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f170; // from NEGATIVE SQUARED LATIN CAPITAL LETTER A
|
||
|
while (c < 0x1f19b) // ..to SQUARED VS
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f1e6; // from REGIONAL INDICATOR SYMBOL LETTER A
|
||
|
while (c < 0x1f203) // ..to SQUARED KATAKANA SA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f210; // from SQUARED CJK UNIFIED IDEOGRAPH-624B
|
||
|
while (c < 0x1f23b) // ..to SQUARED CJK UNIFIED IDEOGRAPH-55B6
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f240; // from TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C
|
||
|
while (c < 0x1f249) // ..to TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x1f250; // CIRCLED IDEOGRAPH ADVANTAGE
|
||
|
charset[i++] = 0x1f251; // CIRCLED IDEOGRAPH ACCEPT
|
||
|
c = 0x1f300; // from CYCLONE
|
||
|
while (c < 0x1f321) // ..to SHOOTING STAR
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f330; // from CHESTNUT
|
||
|
while (c < 0x1f336) // ..to CACTUS
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f337; // from TULIP
|
||
|
while (c < 0x1f37d) // ..to BABY BOTTLE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f380; // from RIBBON
|
||
|
while (c < 0x1f394) // ..to GRADUATION CAP
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f3a0; // from CAROUSEL HORSE
|
||
|
while (c < 0x1f3c5) // ..to SURFER
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f3c6; // from TROPHY
|
||
|
while (c < 0x1f3cb) // ..to SWIMMER
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f3e0; // from HOUSE BUILDING
|
||
|
while (c < 0x1f3f1) // ..to EUROPEAN CASTLE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f400; // from RAT
|
||
|
while (c < 0x1f43f) // ..to PAW PRINTS
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f442; // from EAR
|
||
|
while (c < 0x1f4f8) // ..to CAMERA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f4f9; // from VIDEO CAMERA
|
||
|
while (c < 0x1f4fd) // ..to VIDEOCASSETTE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f500; // from TWISTED RIGHTWARDS ARROWS
|
||
|
while (c < 0x1f53e) // ..to DOWN-POINTING SMALL RED TRIANGLE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f550; // from CLOCK FACE ONE OCLOCK
|
||
|
while (c < 0x1f568) // ..to CLOCK FACE TWELVE-THIRTY
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f5fb; // from MOUNT FUJI
|
||
|
while (c < 0x1f600) // ..to MOYAI
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f601; // from GRINNING FACE WITH SMILING EYES
|
||
|
while (c < 0x1f611) // ..to NEUTRAL FACE
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x1f612; // UNAMUSED FACE
|
||
|
charset[i++] = 0x1f614; // PENSIVE FACE
|
||
|
charset[i++] = 0x1f61c; // FACE WITH STUCK-OUT TONGUE AND WINKING EYE
|
||
|
charset[i++] = 0x1f61e; // DISAPPOINTED FACE
|
||
|
c = 0x1f620; // from ANGRY FACE
|
||
|
while (c < 0x1f626) // ..to DISAPPOINTED BUT RELIEVED FACE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f628; // from FEARFUL FACE
|
||
|
while (c < 0x1f62c) // ..to TIRED FACE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f630; // from FACE WITH OPEN MOUTH AND COLD SWEAT
|
||
|
while (c < 0x1f634) // ..to FLUSHED FACE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f635; // from DIZZY FACE
|
||
|
while (c < 0x1f641) // ..to WEARY CAT FACE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f645; // from FACE WITH NO GOOD GESTURE
|
||
|
while (c < 0x1f650) // ..to PERSON WITH FOLDED HANDS
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f680; // from ROCKET
|
||
|
while (c < 0x1f6c6) // ..to LEFT LUGGAGE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f700; // from ALCHEMICAL SYMBOL FOR QUINTESSENCE
|
||
|
while (c < 0x1f774) // ..to ALCHEMICAL SYMBOL FOR HALF OUNCE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x20000; // from <CJK Ideograph Extension B, First>
|
||
|
while (c < 0x2a6d7) // ..to <CJK Ideograph Extension B, Last>
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2a700; // from <CJK Ideograph Extension C, First>
|
||
|
while (c < 0x2b735) // ..to <CJK Ideograph Extension C, Last>
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2b740; // from <CJK Ideograph Extension D, First>
|
||
|
while (c < 0x2b81e) // ..to <CJK Ideograph Extension D, Last>
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2f800; // from CJK COMPATIBILITY IDEOGRAPH-2F800
|
||
|
while (c < 0x2fa1e) // ..to CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||
|
charset[i++] = c++;
|
||
|
c = 0xe0020; // from TAG SPACE
|
||
|
while (c < 0xe0080) // ..to CANCEL TAG
|
||
|
charset[i++] = c++;
|
||
|
|
||
|
/* Zero-terminate it, and cache the first character */
|
||
|
charset[i] = 0;
|
||
|
c0 = charset[0];
|
||
|
|
||
|
last = minlength - 1;
|
||
|
i = 0;
|
||
|
while (i <= last) {
|
||
|
id[i] = 0;
|
||
|
utf32[i++] = c0;
|
||
|
}
|
||
|
lastid = -1;
|
||
|
utf32[i] = 0;
|
||
|
|
||
|
/* We must init word with dummy data, it doesn't get set until filter() */
|
||
|
word = 1;
|
||
|
}
|
||
|
|
||
|
void generate()
|
||
|
{
|
||
|
int i;
|
||
|
|
||
|
/* Handle the typical case specially */
|
||
|
if (utf32[last] = charset[++lastid]) return;
|
||
|
|
||
|
lastid = 0;
|
||
|
utf32[i = last] = c0;
|
||
|
while (i--) { // Have a preceding position?
|
||
|
if (utf32[i] = charset[++id[i]]) return;
|
||
|
id[i] = 0;
|
||
|
utf32[i] = c0;
|
||
|
}
|
||
|
|
||
|
if (++last < maxlength) { // Next length?
|
||
|
id[last] = lastid = 0;
|
||
|
utf32[last] = c0;
|
||
|
utf32[last + 1] = 0;
|
||
|
} else // We're done
|
||
|
utf32 = 0;
|
||
|
}
|
||
|
|
||
|
void restore()
|
||
|
{
|
||
|
int i, o, c;
|
||
|
|
||
|
/* Convert the restored word back from UTF-8 to UTF-32 */
|
||
|
i = o = 0;
|
||
|
while (c = word[i]) {
|
||
|
if (c >= 0xf0) {
|
||
|
c = (c << 6) + word[++i];
|
||
|
c = (c << 6) + word[++i];
|
||
|
c = (c << 6) + word[++i];
|
||
|
c -= 0x3C82080;
|
||
|
} else if (c >= 0xe0) {
|
||
|
c = (c << 6) + word[++i];
|
||
|
c = (c << 6) + word[++i];
|
||
|
c -= 0xE2080;
|
||
|
} else if (c >= 0xc0) {
|
||
|
c = (c << 6) + word[++i];
|
||
|
c -= 0x3080;
|
||
|
}
|
||
|
i++;
|
||
|
utf32[o++] = c;
|
||
|
}
|
||
|
utf32[o] = 0;
|
||
|
|
||
|
/* Calculate the current length and infer the character indices */
|
||
|
last = 0;
|
||
|
while (c = utf32[last]) {
|
||
|
i = 0; while (charset[i] != c && charset[i]) i++;
|
||
|
if (!charset[i]) i = 0; // Not found
|
||
|
id[last++] = i;
|
||
|
}
|
||
|
lastid = id[--last];
|
||
|
}
|
||
|
|
||
|
/* Convert from UTF-32 to UTF-8 */
|
||
|
void filter()
|
||
|
{
|
||
|
int i, c;
|
||
|
i = -1; c = 0;
|
||
|
|
||
|
while (utf32[++i]) {
|
||
|
if (utf32[i] >= 0x10000) {
|
||
|
word[c++] = 0xf0 | (utf32[i]>>18);
|
||
|
word[c++] = 0x80 | (utf32[i]>>12 & 0x3f);
|
||
|
word[c++] = 0x80 | (utf32[i]>>6 & 0x3f);
|
||
|
word[c++] = 0x80 | (utf32[i] & 0x3f);
|
||
|
}
|
||
|
else if (utf32[i] >= 0x0800) {
|
||
|
word[c++] = 0xe0 | (utf32[i]>>12);
|
||
|
word[c++] = 0x80 | (utf32[i]>>6 & 0x3f);
|
||
|
word[c++] = 0x80 | (utf32[i] & 0x3f);
|
||
|
}
|
||
|
else if (utf32[i] >= 0x80) {
|
||
|
word[c++] = 0xc0 | (utf32[i]>>6);
|
||
|
word[c++] = 0x80 | (utf32[i] & 0x3f);
|
||
|
}
|
||
|
else {
|
||
|
word[c++] = utf32[i];
|
||
|
}
|
||
|
}
|
||
|
word[c] = 0;
|
||
|
}
|