1079 lines
41 KiB
Plaintext
1079 lines
41 KiB
Plaintext
|
# This software is Copyright (c) 2012 magnum, and it is hereby
|
||
|
# released to the general public under the following terms:
|
||
|
# Redistribution and use in source and binary forms, with or without
|
||
|
# modification, are permitted.
|
||
|
#
|
||
|
# Generic implementation of "dumb" exhaustive search of Unicode/UCS-2 and
|
||
|
# an arbitrary charset. Default is to try *all* allocated characters (there's
|
||
|
# 54473 of them). Even if a fast format can exhaust two characters in 15
|
||
|
# minutes, three characters would take 1,5 years...
|
||
|
#
|
||
|
# The output is UTF-8, so for 16-bit formats you need to give --enc=utf8
|
||
|
[List.External:Dumb16]
|
||
|
int maxlength; // Maximum password length to try
|
||
|
int last; // Last character position, zero-based
|
||
|
int lastid; // Character index in the last position
|
||
|
int id[0x7f]; // Current character indices for other positions
|
||
|
int charset[0x10000], c0; // Characters
|
||
|
int ucs2[0x7F]; // Word in UCS-2
|
||
|
|
||
|
void init()
|
||
|
{
|
||
|
int minlength;
|
||
|
int i, c;
|
||
|
|
||
|
minlength = 1; // Initial password length to try, must be at least 1
|
||
|
maxlength = 2; // Must be at least same as minlength
|
||
|
|
||
|
/*
|
||
|
* This defines the character set. This is auto-generated from UnicodeData.txt
|
||
|
* and we skip control characters.
|
||
|
*/
|
||
|
i = 0;
|
||
|
c = 0x20; // from SPACE
|
||
|
while (c < 0x7f) // ..to TILDE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa0; // from NO-BREAK SPACE
|
||
|
while (c < 0x378) // ..to GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x37a; // from GREEK YPOGEGRAMMENI
|
||
|
while (c < 0x37f) // ..to GREEK QUESTION MARK
|
||
|
charset[i++] = c++;
|
||
|
c = 0x384; // from GREEK TONOS
|
||
|
while (c < 0x38b) // ..to GREEK CAPITAL LETTER IOTA WITH TONOS
|
||
|
charset[i++] = c++;
|
||
|
c = 0x38e; // from GREEK CAPITAL LETTER UPSILON WITH TONOS
|
||
|
while (c < 0x3a2) // ..to GREEK CAPITAL LETTER RHO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x3a3; // from GREEK CAPITAL LETTER SIGMA
|
||
|
while (c < 0x528) // ..to CYRILLIC SMALL LETTER SHHA WITH DESCENDER
|
||
|
charset[i++] = c++;
|
||
|
c = 0x531; // from ARMENIAN CAPITAL LETTER AYB
|
||
|
while (c < 0x557) // ..to ARMENIAN CAPITAL LETTER FEH
|
||
|
charset[i++] = c++;
|
||
|
c = 0x559; // from ARMENIAN MODIFIER LETTER LEFT HALF RING
|
||
|
while (c < 0x560) // ..to ARMENIAN ABBREVIATION MARK
|
||
|
charset[i++] = c++;
|
||
|
c = 0x561; // from ARMENIAN SMALL LETTER AYB
|
||
|
while (c < 0x588) // ..to ARMENIAN SMALL LIGATURE ECH YIWN
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x589; // ARMENIAN FULL STOP
|
||
|
charset[i++] = 0x58a; // ARMENIAN HYPHEN
|
||
|
c = 0x591; // from HEBREW ACCENT ETNAHTA
|
||
|
while (c < 0x5c8) // ..to HEBREW POINT QAMATS QATAN
|
||
|
charset[i++] = c++;
|
||
|
c = 0x5d0; // from HEBREW LETTER ALEF
|
||
|
while (c < 0x5eb) // ..to HEBREW LETTER TAV
|
||
|
charset[i++] = c++;
|
||
|
c = 0x5f0; // from HEBREW LIGATURE YIDDISH DOUBLE VAV
|
||
|
while (c < 0x5f5) // ..to HEBREW PUNCTUATION GERSHAYIM
|
||
|
charset[i++] = c++;
|
||
|
c = 0x600; // from ARABIC NUMBER SIGN
|
||
|
while (c < 0x604) // ..to ARABIC SIGN SAFHA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x606; // from ARABIC-INDIC CUBE ROOT
|
||
|
while (c < 0x61c) // ..to ARABIC SEMICOLON
|
||
|
charset[i++] = c++;
|
||
|
c = 0x61e; // from ARABIC TRIPLE DOT PUNCTUATION MARK
|
||
|
while (c < 0x70e) // ..to SYRIAC HARKLEAN ASTERISCUS
|
||
|
charset[i++] = c++;
|
||
|
c = 0x70f; // from SYRIAC ABBREVIATION MARK
|
||
|
while (c < 0x74b) // ..to SYRIAC BARREKH
|
||
|
charset[i++] = c++;
|
||
|
c = 0x74d; // from SYRIAC LETTER SOGDIAN ZHAIN
|
||
|
while (c < 0x7b2) // ..to THAANA LETTER NAA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x7c0; // from NKO DIGIT ZERO
|
||
|
while (c < 0x7fb) // ..to NKO LAJANYALAN
|
||
|
charset[i++] = c++;
|
||
|
c = 0x800; // from SAMARITAN LETTER ALAF
|
||
|
while (c < 0x82e) // ..to SAMARITAN MARK NEQUDAA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x830; // from SAMARITAN PUNCTUATION NEQUDAA
|
||
|
while (c < 0x83f) // ..to SAMARITAN PUNCTUATION ANNAAU
|
||
|
charset[i++] = c++;
|
||
|
c = 0x840; // from MANDAIC LETTER HALQA
|
||
|
while (c < 0x85c) // ..to MANDAIC GEMINATION MARK
|
||
|
charset[i++] = c++;
|
||
|
c = 0x900; // from DEVANAGARI SIGN INVERTED CANDRABINDU
|
||
|
while (c < 0x978) // ..to DEVANAGARI LETTER UUE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x979; // from DEVANAGARI LETTER ZHA
|
||
|
while (c < 0x980) // ..to DEVANAGARI LETTER BBA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x981; // BENGALI SIGN CANDRABINDU
|
||
|
charset[i++] = 0x983; // BENGALI SIGN VISARGA
|
||
|
c = 0x985; // from BENGALI LETTER A
|
||
|
while (c < 0x98d) // ..to BENGALI LETTER VOCALIC L
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x98f; // BENGALI LETTER E
|
||
|
charset[i++] = 0x990; // BENGALI LETTER AI
|
||
|
c = 0x993; // from BENGALI LETTER O
|
||
|
while (c < 0x9a9) // ..to BENGALI LETTER NA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x9aa; // from BENGALI LETTER PA
|
||
|
while (c < 0x9b1) // ..to BENGALI LETTER RA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x9b6; // from BENGALI LETTER SHA
|
||
|
while (c < 0x9ba) // ..to BENGALI LETTER HA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x9bc; // from BENGALI SIGN NUKTA
|
||
|
while (c < 0x9c5) // ..to BENGALI VOWEL SIGN VOCALIC RR
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x9c7; // BENGALI VOWEL SIGN E
|
||
|
charset[i++] = 0x9c8; // BENGALI VOWEL SIGN AI
|
||
|
c = 0x9cb; // from BENGALI VOWEL SIGN O
|
||
|
while (c < 0x9cf) // ..to BENGALI LETTER KHANDA TA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x9dc; // BENGALI LETTER RRA
|
||
|
charset[i++] = 0x9dd; // BENGALI LETTER RHA
|
||
|
c = 0x9df; // from BENGALI LETTER YYA
|
||
|
while (c < 0x9e4) // ..to BENGALI VOWEL SIGN VOCALIC LL
|
||
|
charset[i++] = c++;
|
||
|
c = 0x9e6; // from BENGALI DIGIT ZERO
|
||
|
while (c < 0x9fc) // ..to BENGALI GANDA MARK
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xa01; // GURMUKHI SIGN ADAK BINDI
|
||
|
charset[i++] = 0xa03; // GURMUKHI SIGN VISARGA
|
||
|
c = 0xa05; // from GURMUKHI LETTER A
|
||
|
while (c < 0xa0b) // ..to GURMUKHI LETTER UU
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xa0f; // GURMUKHI LETTER EE
|
||
|
charset[i++] = 0xa10; // GURMUKHI LETTER AI
|
||
|
c = 0xa13; // from GURMUKHI LETTER OO
|
||
|
while (c < 0xa29) // ..to GURMUKHI LETTER NA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa2a; // from GURMUKHI LETTER PA
|
||
|
while (c < 0xa31) // ..to GURMUKHI LETTER RA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xa32; // GURMUKHI LETTER LA
|
||
|
charset[i++] = 0xa33; // GURMUKHI LETTER LLA
|
||
|
charset[i++] = 0xa35; // GURMUKHI LETTER VA
|
||
|
charset[i++] = 0xa36; // GURMUKHI LETTER SHA
|
||
|
charset[i++] = 0xa38; // GURMUKHI LETTER SA
|
||
|
charset[i++] = 0xa39; // GURMUKHI LETTER HA
|
||
|
c = 0xa3e; // from GURMUKHI VOWEL SIGN AA
|
||
|
while (c < 0xa43) // ..to GURMUKHI VOWEL SIGN UU
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xa47; // GURMUKHI VOWEL SIGN EE
|
||
|
charset[i++] = 0xa48; // GURMUKHI VOWEL SIGN AI
|
||
|
charset[i++] = 0xa4b; // GURMUKHI VOWEL SIGN OO
|
||
|
charset[i++] = 0xa4d; // GURMUKHI SIGN VIRAMA
|
||
|
c = 0xa59; // from GURMUKHI LETTER KHHA
|
||
|
while (c < 0xa5d) // ..to GURMUKHI LETTER RRA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa66; // from GURMUKHI DIGIT ZERO
|
||
|
while (c < 0xa76) // ..to GURMUKHI SIGN YAKASH
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xa81; // GUJARATI SIGN CANDRABINDU
|
||
|
charset[i++] = 0xa83; // GUJARATI SIGN VISARGA
|
||
|
c = 0xa85; // from GUJARATI LETTER A
|
||
|
while (c < 0xa8e) // ..to GUJARATI VOWEL CANDRA E
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xa8f; // GUJARATI LETTER E
|
||
|
charset[i++] = 0xa91; // GUJARATI VOWEL CANDRA O
|
||
|
c = 0xa93; // from GUJARATI LETTER O
|
||
|
while (c < 0xaa9) // ..to GUJARATI LETTER NA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xaaa; // from GUJARATI LETTER PA
|
||
|
while (c < 0xab1) // ..to GUJARATI LETTER RA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xab2; // GUJARATI LETTER LA
|
||
|
charset[i++] = 0xab3; // GUJARATI LETTER LLA
|
||
|
c = 0xab5; // from GUJARATI LETTER VA
|
||
|
while (c < 0xaba) // ..to GUJARATI LETTER HA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xabc; // from GUJARATI SIGN NUKTA
|
||
|
while (c < 0xac6) // ..to GUJARATI VOWEL SIGN CANDRA E
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xac7; // GUJARATI VOWEL SIGN E
|
||
|
charset[i++] = 0xac9; // GUJARATI VOWEL SIGN CANDRA O
|
||
|
charset[i++] = 0xacb; // GUJARATI VOWEL SIGN O
|
||
|
charset[i++] = 0xacd; // GUJARATI SIGN VIRAMA
|
||
|
c = 0xae0; // from GUJARATI LETTER VOCALIC RR
|
||
|
while (c < 0xae4) // ..to GUJARATI VOWEL SIGN VOCALIC LL
|
||
|
charset[i++] = c++;
|
||
|
c = 0xae6; // from GUJARATI DIGIT ZERO
|
||
|
while (c < 0xaf0) // ..to GUJARATI DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xb01; // ORIYA SIGN CANDRABINDU
|
||
|
charset[i++] = 0xb03; // ORIYA SIGN VISARGA
|
||
|
c = 0xb05; // from ORIYA LETTER A
|
||
|
while (c < 0xb0d) // ..to ORIYA LETTER VOCALIC L
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xb0f; // ORIYA LETTER E
|
||
|
charset[i++] = 0xb10; // ORIYA LETTER AI
|
||
|
c = 0xb13; // from ORIYA LETTER O
|
||
|
while (c < 0xb29) // ..to ORIYA LETTER NA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xb2a; // from ORIYA LETTER PA
|
||
|
while (c < 0xb31) // ..to ORIYA LETTER RA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xb32; // ORIYA LETTER LA
|
||
|
charset[i++] = 0xb33; // ORIYA LETTER LLA
|
||
|
c = 0xb35; // from ORIYA LETTER VA
|
||
|
while (c < 0xb3a) // ..to ORIYA LETTER HA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xb3c; // from ORIYA SIGN NUKTA
|
||
|
while (c < 0xb45) // ..to ORIYA VOWEL SIGN VOCALIC RR
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xb47; // ORIYA VOWEL SIGN E
|
||
|
charset[i++] = 0xb48; // ORIYA VOWEL SIGN AI
|
||
|
charset[i++] = 0xb4b; // ORIYA VOWEL SIGN O
|
||
|
charset[i++] = 0xb4d; // ORIYA SIGN VIRAMA
|
||
|
charset[i++] = 0xb56; // ORIYA AI LENGTH MARK
|
||
|
charset[i++] = 0xb57; // ORIYA AU LENGTH MARK
|
||
|
charset[i++] = 0xb5c; // ORIYA LETTER RRA
|
||
|
charset[i++] = 0xb5d; // ORIYA LETTER RHA
|
||
|
c = 0xb5f; // from ORIYA LETTER YYA
|
||
|
while (c < 0xb64) // ..to ORIYA VOWEL SIGN VOCALIC LL
|
||
|
charset[i++] = c++;
|
||
|
c = 0xb66; // from ORIYA DIGIT ZERO
|
||
|
while (c < 0xb78) // ..to ORIYA FRACTION THREE SIXTEENTHS
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xb82; // TAMIL SIGN ANUSVARA
|
||
|
charset[i++] = 0xb83; // TAMIL SIGN VISARGA
|
||
|
c = 0xb85; // from TAMIL LETTER A
|
||
|
while (c < 0xb8b) // ..to TAMIL LETTER UU
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xb8e; // TAMIL LETTER E
|
||
|
charset[i++] = 0xb90; // TAMIL LETTER AI
|
||
|
c = 0xb92; // from TAMIL LETTER O
|
||
|
while (c < 0xb96) // ..to TAMIL LETTER KA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xb99; // TAMIL LETTER NGA
|
||
|
charset[i++] = 0xb9a; // TAMIL LETTER CA
|
||
|
charset[i++] = 0xb9e; // TAMIL LETTER NYA
|
||
|
charset[i++] = 0xb9f; // TAMIL LETTER TTA
|
||
|
charset[i++] = 0xba3; // TAMIL LETTER NNA
|
||
|
charset[i++] = 0xba4; // TAMIL LETTER TA
|
||
|
charset[i++] = 0xba8; // TAMIL LETTER NA
|
||
|
charset[i++] = 0xbaa; // TAMIL LETTER PA
|
||
|
c = 0xbae; // from TAMIL LETTER MA
|
||
|
while (c < 0xbba) // ..to TAMIL LETTER HA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xbbe; // from TAMIL VOWEL SIGN AA
|
||
|
while (c < 0xbc3) // ..to TAMIL VOWEL SIGN UU
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xbc6; // TAMIL VOWEL SIGN E
|
||
|
charset[i++] = 0xbc8; // TAMIL VOWEL SIGN AI
|
||
|
c = 0xbca; // from TAMIL VOWEL SIGN O
|
||
|
while (c < 0xbce) // ..to TAMIL SIGN VIRAMA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xbe6; // from TAMIL DIGIT ZERO
|
||
|
while (c < 0xbfb) // ..to TAMIL NUMBER SIGN
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xc01; // TELUGU SIGN CANDRABINDU
|
||
|
charset[i++] = 0xc03; // TELUGU SIGN VISARGA
|
||
|
c = 0xc05; // from TELUGU LETTER A
|
||
|
while (c < 0xc0d) // ..to TELUGU LETTER VOCALIC L
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xc0e; // TELUGU LETTER E
|
||
|
charset[i++] = 0xc10; // TELUGU LETTER AI
|
||
|
c = 0xc12; // from TELUGU LETTER O
|
||
|
while (c < 0xc29) // ..to TELUGU LETTER NA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xc2a; // from TELUGU LETTER PA
|
||
|
while (c < 0xc34) // ..to TELUGU LETTER LLA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xc35; // from TELUGU LETTER VA
|
||
|
while (c < 0xc3a) // ..to TELUGU LETTER HA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xc3d; // from TELUGU SIGN AVAGRAHA
|
||
|
while (c < 0xc45) // ..to TELUGU VOWEL SIGN VOCALIC RR
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xc46; // TELUGU VOWEL SIGN E
|
||
|
charset[i++] = 0xc48; // TELUGU VOWEL SIGN AI
|
||
|
c = 0xc4a; // from TELUGU VOWEL SIGN O
|
||
|
while (c < 0xc4e) // ..to TELUGU SIGN VIRAMA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xc55; // TELUGU LENGTH MARK
|
||
|
charset[i++] = 0xc56; // TELUGU AI LENGTH MARK
|
||
|
charset[i++] = 0xc58; // TELUGU LETTER TSA
|
||
|
charset[i++] = 0xc59; // TELUGU LETTER DZA
|
||
|
c = 0xc60; // from TELUGU LETTER VOCALIC RR
|
||
|
while (c < 0xc64) // ..to TELUGU VOWEL SIGN VOCALIC LL
|
||
|
charset[i++] = c++;
|
||
|
c = 0xc66; // from TELUGU DIGIT ZERO
|
||
|
while (c < 0xc70) // ..to TELUGU DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xc78; // from TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR
|
||
|
while (c < 0xc80) // ..to TELUGU SIGN TUUMU
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xc82; // KANNADA SIGN ANUSVARA
|
||
|
charset[i++] = 0xc83; // KANNADA SIGN VISARGA
|
||
|
c = 0xc85; // from KANNADA LETTER A
|
||
|
while (c < 0xc8d) // ..to KANNADA LETTER VOCALIC L
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xc8e; // KANNADA LETTER E
|
||
|
charset[i++] = 0xc90; // KANNADA LETTER AI
|
||
|
c = 0xc92; // from KANNADA LETTER O
|
||
|
while (c < 0xca9) // ..to KANNADA LETTER NA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xcaa; // from KANNADA LETTER PA
|
||
|
while (c < 0xcb4) // ..to KANNADA LETTER LLA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xcb5; // from KANNADA LETTER VA
|
||
|
while (c < 0xcba) // ..to KANNADA LETTER HA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xcbc; // from KANNADA SIGN NUKTA
|
||
|
while (c < 0xcc5) // ..to KANNADA VOWEL SIGN VOCALIC RR
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xcc6; // KANNADA VOWEL SIGN E
|
||
|
charset[i++] = 0xcc8; // KANNADA VOWEL SIGN AI
|
||
|
c = 0xcca; // from KANNADA VOWEL SIGN O
|
||
|
while (c < 0xcce) // ..to KANNADA SIGN VIRAMA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xcd5; // KANNADA LENGTH MARK
|
||
|
charset[i++] = 0xcd6; // KANNADA AI LENGTH MARK
|
||
|
c = 0xce0; // from KANNADA LETTER VOCALIC RR
|
||
|
while (c < 0xce4) // ..to KANNADA VOWEL SIGN VOCALIC LL
|
||
|
charset[i++] = c++;
|
||
|
c = 0xce6; // from KANNADA DIGIT ZERO
|
||
|
while (c < 0xcf0) // ..to KANNADA DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xcf1; // KANNADA SIGN JIHVAMULIYA
|
||
|
charset[i++] = 0xcf2; // KANNADA SIGN UPADHMANIYA
|
||
|
charset[i++] = 0xd02; // MALAYALAM SIGN ANUSVARA
|
||
|
charset[i++] = 0xd03; // MALAYALAM SIGN VISARGA
|
||
|
c = 0xd05; // from MALAYALAM LETTER A
|
||
|
while (c < 0xd0d) // ..to MALAYALAM LETTER VOCALIC L
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xd0e; // MALAYALAM LETTER E
|
||
|
charset[i++] = 0xd10; // MALAYALAM LETTER AI
|
||
|
c = 0xd12; // from MALAYALAM LETTER O
|
||
|
while (c < 0xd3b) // ..to MALAYALAM LETTER TTTA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xd3d; // from MALAYALAM SIGN AVAGRAHA
|
||
|
while (c < 0xd45) // ..to MALAYALAM VOWEL SIGN VOCALIC RR
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xd46; // MALAYALAM VOWEL SIGN E
|
||
|
charset[i++] = 0xd48; // MALAYALAM VOWEL SIGN AI
|
||
|
c = 0xd4a; // from MALAYALAM VOWEL SIGN O
|
||
|
while (c < 0xd4f) // ..to MALAYALAM LETTER DOT REPH
|
||
|
charset[i++] = c++;
|
||
|
c = 0xd60; // from MALAYALAM LETTER VOCALIC RR
|
||
|
while (c < 0xd64) // ..to MALAYALAM VOWEL SIGN VOCALIC LL
|
||
|
charset[i++] = c++;
|
||
|
c = 0xd66; // from MALAYALAM DIGIT ZERO
|
||
|
while (c < 0xd76) // ..to MALAYALAM FRACTION THREE QUARTERS
|
||
|
charset[i++] = c++;
|
||
|
c = 0xd79; // from MALAYALAM DATE MARK
|
||
|
while (c < 0xd80) // ..to MALAYALAM LETTER CHILLU K
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xd82; // SINHALA SIGN ANUSVARAYA
|
||
|
charset[i++] = 0xd83; // SINHALA SIGN VISARGAYA
|
||
|
c = 0xd85; // from SINHALA LETTER AYANNA
|
||
|
while (c < 0xd97) // ..to SINHALA LETTER AUYANNA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xd9a; // from SINHALA LETTER ALPAPRAANA KAYANNA
|
||
|
while (c < 0xdb2) // ..to SINHALA LETTER DANTAJA NAYANNA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xdb3; // from SINHALA LETTER SANYAKA DAYANNA
|
||
|
while (c < 0xdbc) // ..to SINHALA LETTER RAYANNA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xdc0; // from SINHALA LETTER VAYANNA
|
||
|
while (c < 0xdc7) // ..to SINHALA LETTER FAYANNA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xdcf; // from SINHALA VOWEL SIGN AELA-PILLA
|
||
|
while (c < 0xdd5) // ..to SINHALA VOWEL SIGN KETTI PAA-PILLA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xdd8; // from SINHALA VOWEL SIGN GAETTA-PILLA
|
||
|
while (c < 0xde0) // ..to SINHALA VOWEL SIGN GAYANUKITTA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xdf2; // SINHALA VOWEL SIGN DIGA GAETTA-PILLA
|
||
|
charset[i++] = 0xdf4; // SINHALA PUNCTUATION KUNDDALIYA
|
||
|
c = 0xe01; // from THAI CHARACTER KO KAI
|
||
|
while (c < 0xe3b) // ..to THAI CHARACTER PHINTHU
|
||
|
charset[i++] = c++;
|
||
|
c = 0xe3f; // from THAI CURRENCY SYMBOL BAHT
|
||
|
while (c < 0xe5c) // ..to THAI CHARACTER KHOMUT
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xe81; // LAO LETTER KO
|
||
|
charset[i++] = 0xe82; // LAO LETTER KHO SUNG
|
||
|
charset[i++] = 0xe87; // LAO LETTER NGO
|
||
|
charset[i++] = 0xe88; // LAO LETTER CO
|
||
|
c = 0xe94; // from LAO LETTER DO
|
||
|
while (c < 0xe98) // ..to LAO LETTER THO TAM
|
||
|
charset[i++] = c++;
|
||
|
c = 0xe99; // from LAO LETTER NO
|
||
|
while (c < 0xea0) // ..to LAO LETTER FO SUNG
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xea1; // LAO LETTER MO
|
||
|
charset[i++] = 0xea3; // LAO LETTER LO LING
|
||
|
charset[i++] = 0xeaa; // LAO LETTER SO SUNG
|
||
|
charset[i++] = 0xeab; // LAO LETTER HO SUNG
|
||
|
c = 0xead; // from LAO LETTER O
|
||
|
while (c < 0xeba) // ..to LAO VOWEL SIGN UU
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xebb; // LAO VOWEL SIGN MAI KON
|
||
|
charset[i++] = 0xebd; // LAO SEMIVOWEL SIGN NYO
|
||
|
c = 0xec0; // from LAO VOWEL SIGN E
|
||
|
while (c < 0xec5) // ..to LAO VOWEL SIGN AI
|
||
|
charset[i++] = c++;
|
||
|
c = 0xec8; // from LAO TONE MAI EK
|
||
|
while (c < 0xece) // ..to LAO NIGGAHITA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xed0; // from LAO DIGIT ZERO
|
||
|
while (c < 0xeda) // ..to LAO DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xedc; // LAO HO NO
|
||
|
charset[i++] = 0xedd; // LAO HO MO
|
||
|
c = 0xf00; // from TIBETAN SYLLABLE OM
|
||
|
while (c < 0xf48) // ..to TIBETAN LETTER JA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xf49; // from TIBETAN LETTER NYA
|
||
|
while (c < 0xf6d) // ..to TIBETAN LETTER RRA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xf71; // from TIBETAN VOWEL SIGN AA
|
||
|
while (c < 0xf98) // ..to TIBETAN SUBJOINED LETTER JA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xf99; // from TIBETAN SUBJOINED LETTER NYA
|
||
|
while (c < 0xfbd) // ..to TIBETAN SUBJOINED LETTER FIXED-FORM RA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfbe; // from TIBETAN KU RU KHA
|
||
|
while (c < 0xfcd) // ..to TIBETAN SYMBOL NOR BU BZHI -KHYIL
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfce; // from TIBETAN SIGN RDEL NAG RDEL DKAR
|
||
|
while (c < 0xfdb) // ..to TIBETAN MARK TRAILING MCHAN RTAGS
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1000; // from MYANMAR LETTER KA
|
||
|
while (c < 0x10c6) // ..to GEORGIAN CAPITAL LETTER HOE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x10d0; // from GEORGIAN LETTER AN
|
||
|
while (c < 0x10fd) // ..to MODIFIER LETTER GEORGIAN NAR
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1100; // from HANGUL CHOSEONG KIYEOK
|
||
|
while (c < 0x1249) // ..to ETHIOPIC SYLLABLE QWA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x124a; // from ETHIOPIC SYLLABLE QWI
|
||
|
while (c < 0x124e) // ..to ETHIOPIC SYLLABLE QWE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1250; // from ETHIOPIC SYLLABLE QHA
|
||
|
while (c < 0x1257) // ..to ETHIOPIC SYLLABLE QHO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x125a; // from ETHIOPIC SYLLABLE QHWI
|
||
|
while (c < 0x125e) // ..to ETHIOPIC SYLLABLE QHWE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1260; // from ETHIOPIC SYLLABLE BA
|
||
|
while (c < 0x1289) // ..to ETHIOPIC SYLLABLE XWA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x128a; // from ETHIOPIC SYLLABLE XWI
|
||
|
while (c < 0x128e) // ..to ETHIOPIC SYLLABLE XWE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1290; // from ETHIOPIC SYLLABLE NA
|
||
|
while (c < 0x12b1) // ..to ETHIOPIC SYLLABLE KWA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x12b2; // from ETHIOPIC SYLLABLE KWI
|
||
|
while (c < 0x12b6) // ..to ETHIOPIC SYLLABLE KWE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x12b8; // from ETHIOPIC SYLLABLE KXA
|
||
|
while (c < 0x12bf) // ..to ETHIOPIC SYLLABLE KXO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x12c2; // from ETHIOPIC SYLLABLE KXWI
|
||
|
while (c < 0x12c6) // ..to ETHIOPIC SYLLABLE KXWE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x12c8; // from ETHIOPIC SYLLABLE WA
|
||
|
while (c < 0x12d7) // ..to ETHIOPIC SYLLABLE PHARYNGEAL O
|
||
|
charset[i++] = c++;
|
||
|
c = 0x12d8; // from ETHIOPIC SYLLABLE ZA
|
||
|
while (c < 0x1311) // ..to ETHIOPIC SYLLABLE GWA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1312; // from ETHIOPIC SYLLABLE GWI
|
||
|
while (c < 0x1316) // ..to ETHIOPIC SYLLABLE GWE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1318; // from ETHIOPIC SYLLABLE GGA
|
||
|
while (c < 0x135b) // ..to ETHIOPIC SYLLABLE FYA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x135d; // from ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK
|
||
|
while (c < 0x137d) // ..to ETHIOPIC NUMBER TEN THOUSAND
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1380; // from ETHIOPIC SYLLABLE SEBATBEIT MWA
|
||
|
while (c < 0x139a) // ..to ETHIOPIC TONAL MARK KURT
|
||
|
charset[i++] = c++;
|
||
|
c = 0x13a0; // from CHEROKEE LETTER A
|
||
|
while (c < 0x13f5) // ..to CHEROKEE LETTER YV
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1400; // from CANADIAN SYLLABICS HYPHEN
|
||
|
while (c < 0x169d) // ..to OGHAM REVERSED FEATHER MARK
|
||
|
charset[i++] = c++;
|
||
|
c = 0x16a0; // from RUNIC LETTER FEHU FEOH FE F
|
||
|
while (c < 0x16f1) // ..to RUNIC BELGTHOR SYMBOL
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1700; // from TAGALOG LETTER A
|
||
|
while (c < 0x170d) // ..to TAGALOG LETTER YA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x170e; // from TAGALOG LETTER LA
|
||
|
while (c < 0x1715) // ..to TAGALOG SIGN VIRAMA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1720; // from HANUNOO LETTER A
|
||
|
while (c < 0x1737) // ..to PHILIPPINE DOUBLE PUNCTUATION
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1740; // from BUHID LETTER A
|
||
|
while (c < 0x1754) // ..to BUHID VOWEL SIGN U
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1760; // from TAGBANWA LETTER A
|
||
|
while (c < 0x176d) // ..to TAGBANWA LETTER YA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x176e; // TAGBANWA LETTER LA
|
||
|
charset[i++] = 0x1770; // TAGBANWA LETTER SA
|
||
|
charset[i++] = 0x1772; // TAGBANWA VOWEL SIGN I
|
||
|
charset[i++] = 0x1773; // TAGBANWA VOWEL SIGN U
|
||
|
c = 0x1780; // from KHMER LETTER KA
|
||
|
while (c < 0x17de) // ..to KHMER SIGN ATTHACAN
|
||
|
charset[i++] = c++;
|
||
|
c = 0x17e0; // from KHMER DIGIT ZERO
|
||
|
while (c < 0x17ea) // ..to KHMER DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x17f0; // from KHMER SYMBOL LEK ATTAK SON
|
||
|
while (c < 0x17fa) // ..to KHMER SYMBOL LEK ATTAK PRAM-BUON
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1800; // from MONGOLIAN BIRGA
|
||
|
while (c < 0x180f) // ..to MONGOLIAN VOWEL SEPARATOR
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1810; // from MONGOLIAN DIGIT ZERO
|
||
|
while (c < 0x181a) // ..to MONGOLIAN DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1820; // from MONGOLIAN LETTER A
|
||
|
while (c < 0x1878) // ..to MONGOLIAN LETTER MANCHU ZHA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1880; // from MONGOLIAN LETTER ALI GALI ANUSVARA ONE
|
||
|
while (c < 0x18ab) // ..to MONGOLIAN LETTER MANCHU ALI GALI LHA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x18b0; // from CANADIAN SYLLABICS OY
|
||
|
while (c < 0x18f6) // ..to CANADIAN SYLLABICS CARRIER DENTAL S
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1900; // from LIMBU VOWEL-CARRIER LETTER
|
||
|
while (c < 0x191d) // ..to LIMBU LETTER HA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1920; // from LIMBU VOWEL SIGN A
|
||
|
while (c < 0x192c) // ..to LIMBU SUBJOINED LETTER WA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1930; // from LIMBU SMALL LETTER KA
|
||
|
while (c < 0x193c) // ..to LIMBU SIGN SA-I
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1944; // from LIMBU EXCLAMATION MARK
|
||
|
while (c < 0x196e) // ..to TAI LE LETTER AI
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1970; // from TAI LE LETTER TONE-2
|
||
|
while (c < 0x1975) // ..to TAI LE LETTER TONE-6
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1980; // from NEW TAI LUE LETTER HIGH QA
|
||
|
while (c < 0x19ac) // ..to NEW TAI LUE LETTER LOW SUA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x19b0; // from NEW TAI LUE VOWEL SIGN VOWEL SHORTENER
|
||
|
while (c < 0x19ca) // ..to NEW TAI LUE TONE MARK-2
|
||
|
charset[i++] = c++;
|
||
|
c = 0x19d0; // from NEW TAI LUE DIGIT ZERO
|
||
|
while (c < 0x19db) // ..to NEW TAI LUE THAM DIGIT ONE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x19de; // from NEW TAI LUE SIGN LAE
|
||
|
while (c < 0x1a1c) // ..to BUGINESE VOWEL SIGN AE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1a1e; // from BUGINESE PALLAWA
|
||
|
while (c < 0x1a5f) // ..to TAI THAM CONSONANT SIGN SA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1a60; // from TAI THAM SIGN SAKOT
|
||
|
while (c < 0x1a7d) // ..to TAI THAM SIGN KHUEN-LUE KARAN
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1a7f; // from TAI THAM COMBINING CRYPTOGRAMMIC DOT
|
||
|
while (c < 0x1a8a) // ..to TAI THAM HORA DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1a90; // from TAI THAM THAM DIGIT ZERO
|
||
|
while (c < 0x1a9a) // ..to TAI THAM THAM DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1aa0; // from TAI THAM SIGN WIANG
|
||
|
while (c < 0x1aae) // ..to TAI THAM SIGN CAANG
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1b00; // from BALINESE SIGN ULU RICEM
|
||
|
while (c < 0x1b4c) // ..to BALINESE LETTER ASYURA SASAK
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1b50; // from BALINESE DIGIT ZERO
|
||
|
while (c < 0x1b7d) // ..to BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1b80; // from SUNDANESE SIGN PANYECEK
|
||
|
while (c < 0x1bab) // ..to SUNDANESE SIGN PAMAAEH
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1bae; // from SUNDANESE LETTER KHA
|
||
|
while (c < 0x1bba) // ..to SUNDANESE DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1bc0; // from BATAK LETTER A
|
||
|
while (c < 0x1bf4) // ..to BATAK PANONGONAN
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1bfc; // from BATAK SYMBOL BINDU NA METEK
|
||
|
while (c < 0x1c38) // ..to LEPCHA SIGN NUKTA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1c3b; // from LEPCHA PUNCTUATION TA-ROL
|
||
|
while (c < 0x1c4a) // ..to LEPCHA DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1c4d; // from LEPCHA LETTER TTA
|
||
|
while (c < 0x1c80) // ..to OL CHIKI PUNCTUATION DOUBLE MUCAAD
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1cd0; // from VEDIC TONE KARSHANA
|
||
|
while (c < 0x1cf3) // ..to VEDIC SIGN ARDHAVISARGA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1d00; // from LATIN LETTER SMALL CAPITAL A
|
||
|
while (c < 0x1de7) // ..to COMBINING LATIN SMALL LETTER Z
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1dfc; // from COMBINING DOUBLE INVERTED BREVE BELOW
|
||
|
while (c < 0x1f16) // ..to GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f18; // from GREEK CAPITAL LETTER EPSILON WITH PSILI
|
||
|
while (c < 0x1f1e) // ..to GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f20; // from GREEK SMALL LETTER ETA WITH PSILI
|
||
|
while (c < 0x1f46) // ..to GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f48; // from GREEK CAPITAL LETTER OMICRON WITH PSILI
|
||
|
while (c < 0x1f4e) // ..to GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f50; // from GREEK SMALL LETTER UPSILON WITH PSILI
|
||
|
while (c < 0x1f58) // ..to GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f5f; // from GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
|
||
|
while (c < 0x1f7e) // ..to GREEK SMALL LETTER OMEGA WITH OXIA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1f80; // from GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
|
||
|
while (c < 0x1fb5) // ..to GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1fb6; // from GREEK SMALL LETTER ALPHA WITH PERISPOMENI
|
||
|
while (c < 0x1fc5) // ..to GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1fc6; // from GREEK SMALL LETTER ETA WITH PERISPOMENI
|
||
|
while (c < 0x1fd4) // ..to GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1fd6; // from GREEK SMALL LETTER IOTA WITH PERISPOMENI
|
||
|
while (c < 0x1fdc) // ..to GREEK CAPITAL LETTER IOTA WITH OXIA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x1fdd; // from GREEK DASIA AND VARIA
|
||
|
while (c < 0x1ff0) // ..to GREEK VARIA
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x1ff2; // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
|
||
|
charset[i++] = 0x1ff4; // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
|
||
|
c = 0x1ff6; // from GREEK SMALL LETTER OMEGA WITH PERISPOMENI
|
||
|
while (c < 0x1fff) // ..to GREEK DASIA
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2000; // from EN QUAD
|
||
|
while (c < 0x2065) // ..to INVISIBLE PLUS
|
||
|
charset[i++] = c++;
|
||
|
c = 0x206a; // from INHIBIT SYMMETRIC SWAPPING
|
||
|
while (c < 0x2072) // ..to SUPERSCRIPT LATIN SMALL LETTER I
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2074; // from SUPERSCRIPT FOUR
|
||
|
while (c < 0x208f) // ..to SUBSCRIPT RIGHT PARENTHESIS
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2090; // from LATIN SUBSCRIPT SMALL LETTER A
|
||
|
while (c < 0x209d) // ..to LATIN SUBSCRIPT SMALL LETTER T
|
||
|
charset[i++] = c++;
|
||
|
c = 0x20a0; // from EURO-CURRENCY SIGN
|
||
|
while (c < 0x20ba) // ..to INDIAN RUPEE SIGN
|
||
|
charset[i++] = c++;
|
||
|
c = 0x20d0; // from COMBINING LEFT HARPOON ABOVE
|
||
|
while (c < 0x20f1) // ..to COMBINING ASTERISK ABOVE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2100; // from ACCOUNT OF
|
||
|
while (c < 0x218a) // ..to VULGAR FRACTION ZERO THIRDS
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2190; // from LEFTWARDS ARROW
|
||
|
while (c < 0x23f4) // ..to HOURGLASS WITH FLOWING SAND
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2400; // from SYMBOL FOR NULL
|
||
|
while (c < 0x2427) // ..to SYMBOL FOR SUBSTITUTE FORM TWO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2440; // from OCR HOOK
|
||
|
while (c < 0x244b) // ..to OCR DOUBLE BACKSLASH
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2460; // from CIRCLED DIGIT ONE
|
||
|
while (c < 0x2700) // ..to WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2701; // from UPPER BLADE SCISSORS
|
||
|
while (c < 0x27cb) // ..to VERTICAL BAR WITH HORIZONTAL STROKE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x27ce; // from SQUARED LOGICAL AND
|
||
|
while (c < 0x2b4d) // ..to RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2b50; // from WHITE MEDIUM STAR
|
||
|
while (c < 0x2b5a) // ..to HEAVY CIRCLED SALTIRE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2c00; // from GLAGOLITIC CAPITAL LETTER AZU
|
||
|
while (c < 0x2c2f) // ..to GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2c30; // from GLAGOLITIC SMALL LETTER AZU
|
||
|
while (c < 0x2c5f) // ..to GLAGOLITIC SMALL LETTER LATINATE MYSLITE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2c60; // from LATIN CAPITAL LETTER L WITH DOUBLE BAR
|
||
|
while (c < 0x2cf2) // ..to COPTIC COMBINING SPIRITUS LENIS
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2cf9; // from COPTIC OLD NUBIAN FULL STOP
|
||
|
while (c < 0x2d26) // ..to GEORGIAN SMALL LETTER HOE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2d30; // from TIFINAGH LETTER YA
|
||
|
while (c < 0x2d66) // ..to TIFINAGH LETTER YAZZ
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0x2d6f; // TIFINAGH MODIFIER LETTER LABIALIZATION MARK
|
||
|
charset[i++] = 0x2d70; // TIFINAGH SEPARATOR MARK
|
||
|
c = 0x2d7f; // from TIFINAGH CONSONANT JOINER
|
||
|
while (c < 0x2d97) // ..to ETHIOPIC SYLLABLE GGWE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2da0; // from ETHIOPIC SYLLABLE SSA
|
||
|
while (c < 0x2da7) // ..to ETHIOPIC SYLLABLE SSO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2da8; // from ETHIOPIC SYLLABLE CCA
|
||
|
while (c < 0x2daf) // ..to ETHIOPIC SYLLABLE CCO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2db0; // from ETHIOPIC SYLLABLE ZZA
|
||
|
while (c < 0x2db7) // ..to ETHIOPIC SYLLABLE ZZO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2db8; // from ETHIOPIC SYLLABLE CCHA
|
||
|
while (c < 0x2dbf) // ..to ETHIOPIC SYLLABLE CCHO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2dc0; // from ETHIOPIC SYLLABLE QYA
|
||
|
while (c < 0x2dc7) // ..to ETHIOPIC SYLLABLE QYO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2dc8; // from ETHIOPIC SYLLABLE KYA
|
||
|
while (c < 0x2dcf) // ..to ETHIOPIC SYLLABLE KYO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2dd0; // from ETHIOPIC SYLLABLE XYA
|
||
|
while (c < 0x2dd7) // ..to ETHIOPIC SYLLABLE XYO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2dd8; // from ETHIOPIC SYLLABLE GYA
|
||
|
while (c < 0x2ddf) // ..to ETHIOPIC SYLLABLE GYO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2de0; // from COMBINING CYRILLIC LETTER BE
|
||
|
while (c < 0x2e32) // ..to WORD SEPARATOR MIDDLE DOT
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2e80; // from CJK RADICAL REPEAT
|
||
|
while (c < 0x2e9a) // ..to CJK RADICAL RAP
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2e9b; // from CJK RADICAL CHOKE
|
||
|
while (c < 0x2ef4) // ..to CJK RADICAL C-SIMPLIFIED TURTLE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2f00; // from KANGXI RADICAL ONE
|
||
|
while (c < 0x2fd6) // ..to KANGXI RADICAL FLUTE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x2ff0; // from IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT
|
||
|
while (c < 0x2ffc) // ..to IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
|
||
|
charset[i++] = c++;
|
||
|
c = 0x3000; // from IDEOGRAPHIC SPACE
|
||
|
while (c < 0x3040) // ..to IDEOGRAPHIC HALF FILL SPACE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x3041; // from HIRAGANA LETTER SMALL A
|
||
|
while (c < 0x3097) // ..to HIRAGANA LETTER SMALL KE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x3099; // from COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
|
||
|
while (c < 0x3100) // ..to KATAKANA DIGRAPH KOTO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x3105; // from BOPOMOFO LETTER B
|
||
|
while (c < 0x312e) // ..to BOPOMOFO LETTER IH
|
||
|
charset[i++] = c++;
|
||
|
c = 0x3131; // from HANGUL LETTER KIYEOK
|
||
|
while (c < 0x318f) // ..to HANGUL LETTER ARAEAE
|
||
|
charset[i++] = c++;
|
||
|
c = 0x3190; // from IDEOGRAPHIC ANNOTATION LINKING MARK
|
||
|
while (c < 0x31bb) // ..to BOPOMOFO LETTER ZY
|
||
|
charset[i++] = c++;
|
||
|
c = 0x31c0; // from CJK STROKE T
|
||
|
while (c < 0x31e4) // ..to CJK STROKE Q
|
||
|
charset[i++] = c++;
|
||
|
c = 0x31f0; // from KATAKANA LETTER SMALL KU
|
||
|
while (c < 0x321f) // ..to PARENTHESIZED KOREAN CHARACTER O HU
|
||
|
charset[i++] = c++;
|
||
|
c = 0x3220; // from PARENTHESIZED IDEOGRAPH ONE
|
||
|
while (c < 0x32ff) // ..to CIRCLED KATAKANA WO
|
||
|
charset[i++] = c++;
|
||
|
c = 0x3300; // from SQUARE APAATO
|
||
|
while (c < 0x3400) // ..to SQUARE GAL
|
||
|
charset[i++] = c++;
|
||
|
c = 0x3400; // from <CJK Ideograph Extension A, First>
|
||
|
while (c < 0x4db6) // ..to <CJK Ideograph Extension A, Last>
|
||
|
charset[i++] = c++;
|
||
|
c = 0x4dc0; // from HEXAGRAM FOR THE CREATIVE HEAVEN
|
||
|
while (c < 0x4e00) // ..to HEXAGRAM FOR BEFORE COMPLETION
|
||
|
charset[i++] = c++;
|
||
|
c = 0x4e00; // from <CJK Ideograph, First>
|
||
|
while (c < 0x9fcc) // ..to <CJK Ideograph, Last>
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa000; // from YI SYLLABLE IT
|
||
|
while (c < 0xa48d) // ..to YI SYLLABLE YYR
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa490; // from YI RADICAL QOT
|
||
|
while (c < 0xa4c7) // ..to YI RADICAL KE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa4d0; // from LISU LETTER BA
|
||
|
while (c < 0xa62c) // ..to VAI SYLLABLE NDOLE DO
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa640; // from CYRILLIC CAPITAL LETTER ZEMLYA
|
||
|
while (c < 0xa674) // ..to SLAVONIC ASTERISK
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa67c; // from COMBINING CYRILLIC KAVYKA
|
||
|
while (c < 0xa698) // ..to CYRILLIC SMALL LETTER SHWE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa6a0; // from BAMUM LETTER A
|
||
|
while (c < 0xa6f8) // ..to BAMUM QUESTION MARK
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa700; // from MODIFIER LETTER CHINESE TONE YIN PING
|
||
|
while (c < 0xa78f) // ..to LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xa790; // LATIN CAPITAL LETTER N WITH DESCENDER
|
||
|
charset[i++] = 0xa791; // LATIN SMALL LETTER N WITH DESCENDER
|
||
|
c = 0xa7a0; // from LATIN CAPITAL LETTER G WITH OBLIQUE STROKE
|
||
|
while (c < 0xa7aa) // ..to LATIN SMALL LETTER S WITH OBLIQUE STROKE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa7fa; // from LATIN LETTER SMALL CAPITAL TURNED M
|
||
|
while (c < 0xa82c) // ..to SYLOTI NAGRI POETRY MARK-4
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa830; // from NORTH INDIC FRACTION ONE QUARTER
|
||
|
while (c < 0xa83a) // ..to NORTH INDIC QUANTITY MARK
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa840; // from PHAGS-PA LETTER KA
|
||
|
while (c < 0xa878) // ..to PHAGS-PA MARK DOUBLE SHAD
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa880; // from SAURASHTRA SIGN ANUSVARA
|
||
|
while (c < 0xa8c5) // ..to SAURASHTRA SIGN VIRAMA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa8ce; // from SAURASHTRA DANDA
|
||
|
while (c < 0xa8da) // ..to SAURASHTRA DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa8e0; // from COMBINING DEVANAGARI DIGIT ZERO
|
||
|
while (c < 0xa8fc) // ..to DEVANAGARI HEADSTROKE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa900; // from KAYAH LI DIGIT ZERO
|
||
|
while (c < 0xa954) // ..to REJANG VIRAMA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa95f; // from REJANG SECTION MARK
|
||
|
while (c < 0xa97d) // ..to HANGUL CHOSEONG SSANGYEORINHIEUH
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa980; // from JAVANESE SIGN PANYANGGA
|
||
|
while (c < 0xa9ce) // ..to JAVANESE TURNED PADA PISELEH
|
||
|
charset[i++] = c++;
|
||
|
c = 0xa9cf; // from JAVANESE PANGRANGKEP
|
||
|
while (c < 0xa9da) // ..to JAVANESE DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xa9de; // JAVANESE PADA TIRTA TUMETES
|
||
|
charset[i++] = 0xa9df; // JAVANESE PADA ISEN-ISEN
|
||
|
c = 0xaa00; // from CHAM LETTER A
|
||
|
while (c < 0xaa37) // ..to CHAM CONSONANT SIGN WA
|
||
|
charset[i++] = c++;
|
||
|
c = 0xaa40; // from CHAM LETTER FINAL K
|
||
|
while (c < 0xaa4e) // ..to CHAM CONSONANT SIGN FINAL H
|
||
|
charset[i++] = c++;
|
||
|
c = 0xaa50; // from CHAM DIGIT ZERO
|
||
|
while (c < 0xaa5a) // ..to CHAM DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xaa5c; // from CHAM PUNCTUATION SPIRAL
|
||
|
while (c < 0xaa7c) // ..to MYANMAR SIGN PAO KAREN TONE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xaa80; // from TAI VIET LETTER LOW KO
|
||
|
while (c < 0xaac3) // ..to TAI VIET TONE MAI SONG
|
||
|
charset[i++] = c++;
|
||
|
c = 0xaadb; // from TAI VIET SYMBOL KON
|
||
|
while (c < 0xaae0) // ..to TAI VIET SYMBOL KOI KOI
|
||
|
charset[i++] = c++;
|
||
|
c = 0xab01; // from ETHIOPIC SYLLABLE TTHU
|
||
|
while (c < 0xab07) // ..to ETHIOPIC SYLLABLE TTHO
|
||
|
charset[i++] = c++;
|
||
|
c = 0xab09; // from ETHIOPIC SYLLABLE DDHU
|
||
|
while (c < 0xab0f) // ..to ETHIOPIC SYLLABLE DDHO
|
||
|
charset[i++] = c++;
|
||
|
c = 0xab11; // from ETHIOPIC SYLLABLE DZU
|
||
|
while (c < 0xab17) // ..to ETHIOPIC SYLLABLE DZO
|
||
|
charset[i++] = c++;
|
||
|
c = 0xab20; // from ETHIOPIC SYLLABLE CCHHA
|
||
|
while (c < 0xab27) // ..to ETHIOPIC SYLLABLE CCHHO
|
||
|
charset[i++] = c++;
|
||
|
c = 0xab28; // from ETHIOPIC SYLLABLE BBA
|
||
|
while (c < 0xab2f) // ..to ETHIOPIC SYLLABLE BBO
|
||
|
charset[i++] = c++;
|
||
|
c = 0xabc0; // from MEETEI MAYEK LETTER KOK
|
||
|
while (c < 0xabee) // ..to MEETEI MAYEK APUN IYEK
|
||
|
charset[i++] = c++;
|
||
|
c = 0xabf0; // from MEETEI MAYEK DIGIT ZERO
|
||
|
while (c < 0xabfa) // ..to MEETEI MAYEK DIGIT NINE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xac00; // from <Hangul Syllable, First>
|
||
|
while (c < 0xd7a4) // ..to <Hangul Syllable, Last>
|
||
|
charset[i++] = c++;
|
||
|
c = 0xd7b0; // from HANGUL JUNGSEONG O-YEO
|
||
|
while (c < 0xd7c7) // ..to HANGUL JUNGSEONG ARAEA-E
|
||
|
charset[i++] = c++;
|
||
|
c = 0xd7cb; // from HANGUL JONGSEONG NIEUN-RIEUL
|
||
|
while (c < 0xd7fc) // ..to HANGUL JONGSEONG PHIEUPH-THIEUTH
|
||
|
charset[i++] = c++;
|
||
|
c = 0xf900; // from CJK COMPATIBILITY IDEOGRAPH-F900
|
||
|
while (c < 0xfa2e) // ..to CJK COMPATIBILITY IDEOGRAPH-FA2D
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfa30; // from CJK COMPATIBILITY IDEOGRAPH-FA30
|
||
|
while (c < 0xfa6e) // ..to CJK COMPATIBILITY IDEOGRAPH-FA6D
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfa70; // from CJK COMPATIBILITY IDEOGRAPH-FA70
|
||
|
while (c < 0xfada) // ..to CJK COMPATIBILITY IDEOGRAPH-FAD9
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfb00; // from LATIN SMALL LIGATURE FF
|
||
|
while (c < 0xfb07) // ..to LATIN SMALL LIGATURE ST
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfb13; // from ARMENIAN SMALL LIGATURE MEN NOW
|
||
|
while (c < 0xfb18) // ..to ARMENIAN SMALL LIGATURE MEN XEH
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfb1d; // from HEBREW LETTER YOD WITH HIRIQ
|
||
|
while (c < 0xfb37) // ..to HEBREW LETTER ZAYIN WITH DAGESH
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfb38; // from HEBREW LETTER TET WITH DAGESH
|
||
|
while (c < 0xfb3d) // ..to HEBREW LETTER LAMED WITH DAGESH
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xfb40; // HEBREW LETTER NUN WITH DAGESH
|
||
|
charset[i++] = 0xfb41; // HEBREW LETTER SAMEKH WITH DAGESH
|
||
|
charset[i++] = 0xfb43; // HEBREW LETTER FINAL PE WITH DAGESH
|
||
|
charset[i++] = 0xfb44; // HEBREW LETTER PE WITH DAGESH
|
||
|
c = 0xfb46; // from HEBREW LETTER TSADI WITH DAGESH
|
||
|
while (c < 0xfbc2) // ..to ARABIC SYMBOL SMALL TAH BELOW
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfbd3; // from ARABIC LETTER NG ISOLATED FORM
|
||
|
while (c < 0xfd40) // ..to ORNATE RIGHT PARENTHESIS
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfd50; // from ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM
|
||
|
while (c < 0xfd90) // ..to ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfd92; // from ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM
|
||
|
while (c < 0xfdc8) // ..to ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfdf0; // from ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM
|
||
|
while (c < 0xfdfe) // ..to ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfe00; // from VARIATION SELECTOR-1
|
||
|
while (c < 0xfe1a) // ..to PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfe20; // from COMBINING LIGATURE LEFT HALF
|
||
|
while (c < 0xfe27) // ..to COMBINING CONJOINING MACRON
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfe30; // from PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
|
||
|
while (c < 0xfe53) // ..to SMALL FULL STOP
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfe54; // from SMALL SEMICOLON
|
||
|
while (c < 0xfe67) // ..to SMALL EQUALS SIGN
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfe68; // from SMALL REVERSE SOLIDUS
|
||
|
while (c < 0xfe6c) // ..to SMALL COMMERCIAL AT
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfe70; // from ARABIC FATHATAN ISOLATED FORM
|
||
|
while (c < 0xfe75) // ..to ARABIC KASRATAN ISOLATED FORM
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfe76; // from ARABIC FATHA ISOLATED FORM
|
||
|
while (c < 0xfefd) // ..to ARABIC LIGATURE LAM WITH ALEF FINAL FORM
|
||
|
charset[i++] = c++;
|
||
|
c = 0xff01; // from FULLWIDTH EXCLAMATION MARK
|
||
|
while (c < 0xffbf) // ..to HALFWIDTH HANGUL LETTER HIEUH
|
||
|
charset[i++] = c++;
|
||
|
c = 0xffc2; // from HALFWIDTH HANGUL LETTER A
|
||
|
while (c < 0xffc8) // ..to HALFWIDTH HANGUL LETTER E
|
||
|
charset[i++] = c++;
|
||
|
c = 0xffca; // from HALFWIDTH HANGUL LETTER YEO
|
||
|
while (c < 0xffd0) // ..to HALFWIDTH HANGUL LETTER OE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xffd2; // from HALFWIDTH HANGUL LETTER YO
|
||
|
while (c < 0xffd8) // ..to HALFWIDTH HANGUL LETTER YU
|
||
|
charset[i++] = c++;
|
||
|
charset[i++] = 0xffda; // HALFWIDTH HANGUL LETTER EU
|
||
|
charset[i++] = 0xffdc; // HALFWIDTH HANGUL LETTER I
|
||
|
c = 0xffe0; // from FULLWIDTH CENT SIGN
|
||
|
while (c < 0xffe7) // ..to FULLWIDTH WON SIGN
|
||
|
charset[i++] = c++;
|
||
|
c = 0xffe8; // from HALFWIDTH FORMS LIGHT VERTICAL
|
||
|
while (c < 0xffef) // ..to HALFWIDTH WHITE CIRCLE
|
||
|
charset[i++] = c++;
|
||
|
c = 0xfff9; // from INTERLINEAR ANNOTATION ANCHOR
|
||
|
while (c < 0xfffe) // ..to REPLACEMENT CHARACTER
|
||
|
charset[i++] = c++;
|
||
|
|
||
|
/* Zero-terminate it, and cache the first character */
|
||
|
charset[i] = 0;
|
||
|
c0 = charset[0];
|
||
|
|
||
|
last = minlength - 1;
|
||
|
i = 0;
|
||
|
while (i <= last) {
|
||
|
id[i] = 0;
|
||
|
ucs2[i++] = c0;
|
||
|
}
|
||
|
lastid = -1;
|
||
|
ucs2[i] = 0;
|
||
|
|
||
|
/* We must init word with dummy data, it doesn't get set until filter() */
|
||
|
word = 1;
|
||
|
}
|
||
|
|
||
|
void generate()
|
||
|
{
|
||
|
int i;
|
||
|
|
||
|
/* Handle the typical case specially */
|
||
|
if (ucs2[last] = charset[++lastid]) return;
|
||
|
|
||
|
lastid = 0;
|
||
|
ucs2[i = last] = c0;
|
||
|
while (i--) { // Have a preceding position?
|
||
|
if (ucs2[i] = charset[++id[i]]) return;
|
||
|
id[i] = 0;
|
||
|
ucs2[i] = c0;
|
||
|
}
|
||
|
|
||
|
if (++last < maxlength) { // Next length?
|
||
|
id[last] = lastid = 0;
|
||
|
ucs2[last] = c0;
|
||
|
ucs2[last + 1] = 0;
|
||
|
} else // We're done
|
||
|
ucs2 = 0;
|
||
|
}
|
||
|
|
||
|
void restore()
|
||
|
{
|
||
|
int i, o, c;
|
||
|
|
||
|
/* Convert the restored word back from UTF-8 to UCS-2 */
|
||
|
i = o = 0;
|
||
|
while (c = word[i]) {
|
||
|
if (c >= 0xe0) {
|
||
|
c = (c << 6) + word[++i];
|
||
|
c = (c << 6) + word[++i];
|
||
|
c -= 0xE2080;
|
||
|
} else if (c >= 0xc0) {
|
||
|
c = (c << 6) + word[++i];
|
||
|
c -= 0x3080;
|
||
|
}
|
||
|
i++;
|
||
|
ucs2[o++] = c;
|
||
|
}
|
||
|
ucs2[o] = 0;
|
||
|
|
||
|
/* Calculate the current length and infer the character indices */
|
||
|
last = 0;
|
||
|
while (c = ucs2[last]) {
|
||
|
i = 0; while (charset[i] != c && charset[i]) i++;
|
||
|
if (!charset[i]) i = 0; // Not found
|
||
|
id[last++] = i;
|
||
|
}
|
||
|
lastid = id[--last];
|
||
|
}
|
||
|
|
||
|
/* Convert from UCS-2 to UTF-8 */
|
||
|
void filter()
|
||
|
{
|
||
|
int i, c;
|
||
|
i = -1; c = 0;
|
||
|
|
||
|
while (ucs2[++i]) {
|
||
|
if (ucs2[i] >= 0x0800) {
|
||
|
word[c++] = 0xe0 | (ucs2[i]>>12);
|
||
|
word[c++] = 0x80 | (ucs2[i]>>6 & 0x3f);
|
||
|
word[c++] = 0x80 | (ucs2[i] & 0x3f);
|
||
|
}
|
||
|
else if (ucs2[i] >= 0x80) {
|
||
|
word[c++] = 0xc0 | (ucs2[i]>>6);
|
||
|
word[c++] = 0x80 | (ucs2[i] & 0x3f);
|
||
|
}
|
||
|
else {
|
||
|
word[c++] = ucs2[i];
|
||
|
}
|
||
|
}
|
||
|
word[c] = 0;
|
||
|
}
|