etc-gentoo/john/dumb16.conf

1079 lines
41 KiB
Plaintext

# This software is Copyright (c) 2012 magnum, and it is hereby
# released to the general public under the following terms:
# Redistribution and use in source and binary forms, with or without
# modification, are permitted.
#
# Generic implementation of "dumb" exhaustive search of Unicode/UCS-2 and
# an arbitrary charset. Default is to try *all* allocated characters (there's
# 54473 of them). Even if a fast format can exhaust two characters in 15
# minutes, three characters would take 1,5 years...
#
# The output is UTF-8, so for 16-bit formats you need to give --enc=utf8
[List.External:Dumb16]
int maxlength; // Maximum password length to try
int last; // Last character position, zero-based
int lastid; // Character index in the last position
int id[0x7f]; // Current character indices for other positions
int charset[0x10000], c0; // Characters
int ucs2[0x7F]; // Word in UCS-2
void init()
{
int minlength;
int i, c;
minlength = 1; // Initial password length to try, must be at least 1
maxlength = 2; // Must be at least same as minlength
/*
* This defines the character set. This is auto-generated from UnicodeData.txt
* and we skip control characters.
*/
i = 0;
c = 0x20; // from SPACE
while (c < 0x7f) // ..to TILDE
charset[i++] = c++;
c = 0xa0; // from NO-BREAK SPACE
while (c < 0x378) // ..to GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
charset[i++] = c++;
c = 0x37a; // from GREEK YPOGEGRAMMENI
while (c < 0x37f) // ..to GREEK QUESTION MARK
charset[i++] = c++;
c = 0x384; // from GREEK TONOS
while (c < 0x38b) // ..to GREEK CAPITAL LETTER IOTA WITH TONOS
charset[i++] = c++;
c = 0x38e; // from GREEK CAPITAL LETTER UPSILON WITH TONOS
while (c < 0x3a2) // ..to GREEK CAPITAL LETTER RHO
charset[i++] = c++;
c = 0x3a3; // from GREEK CAPITAL LETTER SIGMA
while (c < 0x528) // ..to CYRILLIC SMALL LETTER SHHA WITH DESCENDER
charset[i++] = c++;
c = 0x531; // from ARMENIAN CAPITAL LETTER AYB
while (c < 0x557) // ..to ARMENIAN CAPITAL LETTER FEH
charset[i++] = c++;
c = 0x559; // from ARMENIAN MODIFIER LETTER LEFT HALF RING
while (c < 0x560) // ..to ARMENIAN ABBREVIATION MARK
charset[i++] = c++;
c = 0x561; // from ARMENIAN SMALL LETTER AYB
while (c < 0x588) // ..to ARMENIAN SMALL LIGATURE ECH YIWN
charset[i++] = c++;
charset[i++] = 0x589; // ARMENIAN FULL STOP
charset[i++] = 0x58a; // ARMENIAN HYPHEN
c = 0x591; // from HEBREW ACCENT ETNAHTA
while (c < 0x5c8) // ..to HEBREW POINT QAMATS QATAN
charset[i++] = c++;
c = 0x5d0; // from HEBREW LETTER ALEF
while (c < 0x5eb) // ..to HEBREW LETTER TAV
charset[i++] = c++;
c = 0x5f0; // from HEBREW LIGATURE YIDDISH DOUBLE VAV
while (c < 0x5f5) // ..to HEBREW PUNCTUATION GERSHAYIM
charset[i++] = c++;
c = 0x600; // from ARABIC NUMBER SIGN
while (c < 0x604) // ..to ARABIC SIGN SAFHA
charset[i++] = c++;
c = 0x606; // from ARABIC-INDIC CUBE ROOT
while (c < 0x61c) // ..to ARABIC SEMICOLON
charset[i++] = c++;
c = 0x61e; // from ARABIC TRIPLE DOT PUNCTUATION MARK
while (c < 0x70e) // ..to SYRIAC HARKLEAN ASTERISCUS
charset[i++] = c++;
c = 0x70f; // from SYRIAC ABBREVIATION MARK
while (c < 0x74b) // ..to SYRIAC BARREKH
charset[i++] = c++;
c = 0x74d; // from SYRIAC LETTER SOGDIAN ZHAIN
while (c < 0x7b2) // ..to THAANA LETTER NAA
charset[i++] = c++;
c = 0x7c0; // from NKO DIGIT ZERO
while (c < 0x7fb) // ..to NKO LAJANYALAN
charset[i++] = c++;
c = 0x800; // from SAMARITAN LETTER ALAF
while (c < 0x82e) // ..to SAMARITAN MARK NEQUDAA
charset[i++] = c++;
c = 0x830; // from SAMARITAN PUNCTUATION NEQUDAA
while (c < 0x83f) // ..to SAMARITAN PUNCTUATION ANNAAU
charset[i++] = c++;
c = 0x840; // from MANDAIC LETTER HALQA
while (c < 0x85c) // ..to MANDAIC GEMINATION MARK
charset[i++] = c++;
c = 0x900; // from DEVANAGARI SIGN INVERTED CANDRABINDU
while (c < 0x978) // ..to DEVANAGARI LETTER UUE
charset[i++] = c++;
c = 0x979; // from DEVANAGARI LETTER ZHA
while (c < 0x980) // ..to DEVANAGARI LETTER BBA
charset[i++] = c++;
charset[i++] = 0x981; // BENGALI SIGN CANDRABINDU
charset[i++] = 0x983; // BENGALI SIGN VISARGA
c = 0x985; // from BENGALI LETTER A
while (c < 0x98d) // ..to BENGALI LETTER VOCALIC L
charset[i++] = c++;
charset[i++] = 0x98f; // BENGALI LETTER E
charset[i++] = 0x990; // BENGALI LETTER AI
c = 0x993; // from BENGALI LETTER O
while (c < 0x9a9) // ..to BENGALI LETTER NA
charset[i++] = c++;
c = 0x9aa; // from BENGALI LETTER PA
while (c < 0x9b1) // ..to BENGALI LETTER RA
charset[i++] = c++;
c = 0x9b6; // from BENGALI LETTER SHA
while (c < 0x9ba) // ..to BENGALI LETTER HA
charset[i++] = c++;
c = 0x9bc; // from BENGALI SIGN NUKTA
while (c < 0x9c5) // ..to BENGALI VOWEL SIGN VOCALIC RR
charset[i++] = c++;
charset[i++] = 0x9c7; // BENGALI VOWEL SIGN E
charset[i++] = 0x9c8; // BENGALI VOWEL SIGN AI
c = 0x9cb; // from BENGALI VOWEL SIGN O
while (c < 0x9cf) // ..to BENGALI LETTER KHANDA TA
charset[i++] = c++;
charset[i++] = 0x9dc; // BENGALI LETTER RRA
charset[i++] = 0x9dd; // BENGALI LETTER RHA
c = 0x9df; // from BENGALI LETTER YYA
while (c < 0x9e4) // ..to BENGALI VOWEL SIGN VOCALIC LL
charset[i++] = c++;
c = 0x9e6; // from BENGALI DIGIT ZERO
while (c < 0x9fc) // ..to BENGALI GANDA MARK
charset[i++] = c++;
charset[i++] = 0xa01; // GURMUKHI SIGN ADAK BINDI
charset[i++] = 0xa03; // GURMUKHI SIGN VISARGA
c = 0xa05; // from GURMUKHI LETTER A
while (c < 0xa0b) // ..to GURMUKHI LETTER UU
charset[i++] = c++;
charset[i++] = 0xa0f; // GURMUKHI LETTER EE
charset[i++] = 0xa10; // GURMUKHI LETTER AI
c = 0xa13; // from GURMUKHI LETTER OO
while (c < 0xa29) // ..to GURMUKHI LETTER NA
charset[i++] = c++;
c = 0xa2a; // from GURMUKHI LETTER PA
while (c < 0xa31) // ..to GURMUKHI LETTER RA
charset[i++] = c++;
charset[i++] = 0xa32; // GURMUKHI LETTER LA
charset[i++] = 0xa33; // GURMUKHI LETTER LLA
charset[i++] = 0xa35; // GURMUKHI LETTER VA
charset[i++] = 0xa36; // GURMUKHI LETTER SHA
charset[i++] = 0xa38; // GURMUKHI LETTER SA
charset[i++] = 0xa39; // GURMUKHI LETTER HA
c = 0xa3e; // from GURMUKHI VOWEL SIGN AA
while (c < 0xa43) // ..to GURMUKHI VOWEL SIGN UU
charset[i++] = c++;
charset[i++] = 0xa47; // GURMUKHI VOWEL SIGN EE
charset[i++] = 0xa48; // GURMUKHI VOWEL SIGN AI
charset[i++] = 0xa4b; // GURMUKHI VOWEL SIGN OO
charset[i++] = 0xa4d; // GURMUKHI SIGN VIRAMA
c = 0xa59; // from GURMUKHI LETTER KHHA
while (c < 0xa5d) // ..to GURMUKHI LETTER RRA
charset[i++] = c++;
c = 0xa66; // from GURMUKHI DIGIT ZERO
while (c < 0xa76) // ..to GURMUKHI SIGN YAKASH
charset[i++] = c++;
charset[i++] = 0xa81; // GUJARATI SIGN CANDRABINDU
charset[i++] = 0xa83; // GUJARATI SIGN VISARGA
c = 0xa85; // from GUJARATI LETTER A
while (c < 0xa8e) // ..to GUJARATI VOWEL CANDRA E
charset[i++] = c++;
charset[i++] = 0xa8f; // GUJARATI LETTER E
charset[i++] = 0xa91; // GUJARATI VOWEL CANDRA O
c = 0xa93; // from GUJARATI LETTER O
while (c < 0xaa9) // ..to GUJARATI LETTER NA
charset[i++] = c++;
c = 0xaaa; // from GUJARATI LETTER PA
while (c < 0xab1) // ..to GUJARATI LETTER RA
charset[i++] = c++;
charset[i++] = 0xab2; // GUJARATI LETTER LA
charset[i++] = 0xab3; // GUJARATI LETTER LLA
c = 0xab5; // from GUJARATI LETTER VA
while (c < 0xaba) // ..to GUJARATI LETTER HA
charset[i++] = c++;
c = 0xabc; // from GUJARATI SIGN NUKTA
while (c < 0xac6) // ..to GUJARATI VOWEL SIGN CANDRA E
charset[i++] = c++;
charset[i++] = 0xac7; // GUJARATI VOWEL SIGN E
charset[i++] = 0xac9; // GUJARATI VOWEL SIGN CANDRA O
charset[i++] = 0xacb; // GUJARATI VOWEL SIGN O
charset[i++] = 0xacd; // GUJARATI SIGN VIRAMA
c = 0xae0; // from GUJARATI LETTER VOCALIC RR
while (c < 0xae4) // ..to GUJARATI VOWEL SIGN VOCALIC LL
charset[i++] = c++;
c = 0xae6; // from GUJARATI DIGIT ZERO
while (c < 0xaf0) // ..to GUJARATI DIGIT NINE
charset[i++] = c++;
charset[i++] = 0xb01; // ORIYA SIGN CANDRABINDU
charset[i++] = 0xb03; // ORIYA SIGN VISARGA
c = 0xb05; // from ORIYA LETTER A
while (c < 0xb0d) // ..to ORIYA LETTER VOCALIC L
charset[i++] = c++;
charset[i++] = 0xb0f; // ORIYA LETTER E
charset[i++] = 0xb10; // ORIYA LETTER AI
c = 0xb13; // from ORIYA LETTER O
while (c < 0xb29) // ..to ORIYA LETTER NA
charset[i++] = c++;
c = 0xb2a; // from ORIYA LETTER PA
while (c < 0xb31) // ..to ORIYA LETTER RA
charset[i++] = c++;
charset[i++] = 0xb32; // ORIYA LETTER LA
charset[i++] = 0xb33; // ORIYA LETTER LLA
c = 0xb35; // from ORIYA LETTER VA
while (c < 0xb3a) // ..to ORIYA LETTER HA
charset[i++] = c++;
c = 0xb3c; // from ORIYA SIGN NUKTA
while (c < 0xb45) // ..to ORIYA VOWEL SIGN VOCALIC RR
charset[i++] = c++;
charset[i++] = 0xb47; // ORIYA VOWEL SIGN E
charset[i++] = 0xb48; // ORIYA VOWEL SIGN AI
charset[i++] = 0xb4b; // ORIYA VOWEL SIGN O
charset[i++] = 0xb4d; // ORIYA SIGN VIRAMA
charset[i++] = 0xb56; // ORIYA AI LENGTH MARK
charset[i++] = 0xb57; // ORIYA AU LENGTH MARK
charset[i++] = 0xb5c; // ORIYA LETTER RRA
charset[i++] = 0xb5d; // ORIYA LETTER RHA
c = 0xb5f; // from ORIYA LETTER YYA
while (c < 0xb64) // ..to ORIYA VOWEL SIGN VOCALIC LL
charset[i++] = c++;
c = 0xb66; // from ORIYA DIGIT ZERO
while (c < 0xb78) // ..to ORIYA FRACTION THREE SIXTEENTHS
charset[i++] = c++;
charset[i++] = 0xb82; // TAMIL SIGN ANUSVARA
charset[i++] = 0xb83; // TAMIL SIGN VISARGA
c = 0xb85; // from TAMIL LETTER A
while (c < 0xb8b) // ..to TAMIL LETTER UU
charset[i++] = c++;
charset[i++] = 0xb8e; // TAMIL LETTER E
charset[i++] = 0xb90; // TAMIL LETTER AI
c = 0xb92; // from TAMIL LETTER O
while (c < 0xb96) // ..to TAMIL LETTER KA
charset[i++] = c++;
charset[i++] = 0xb99; // TAMIL LETTER NGA
charset[i++] = 0xb9a; // TAMIL LETTER CA
charset[i++] = 0xb9e; // TAMIL LETTER NYA
charset[i++] = 0xb9f; // TAMIL LETTER TTA
charset[i++] = 0xba3; // TAMIL LETTER NNA
charset[i++] = 0xba4; // TAMIL LETTER TA
charset[i++] = 0xba8; // TAMIL LETTER NA
charset[i++] = 0xbaa; // TAMIL LETTER PA
c = 0xbae; // from TAMIL LETTER MA
while (c < 0xbba) // ..to TAMIL LETTER HA
charset[i++] = c++;
c = 0xbbe; // from TAMIL VOWEL SIGN AA
while (c < 0xbc3) // ..to TAMIL VOWEL SIGN UU
charset[i++] = c++;
charset[i++] = 0xbc6; // TAMIL VOWEL SIGN E
charset[i++] = 0xbc8; // TAMIL VOWEL SIGN AI
c = 0xbca; // from TAMIL VOWEL SIGN O
while (c < 0xbce) // ..to TAMIL SIGN VIRAMA
charset[i++] = c++;
c = 0xbe6; // from TAMIL DIGIT ZERO
while (c < 0xbfb) // ..to TAMIL NUMBER SIGN
charset[i++] = c++;
charset[i++] = 0xc01; // TELUGU SIGN CANDRABINDU
charset[i++] = 0xc03; // TELUGU SIGN VISARGA
c = 0xc05; // from TELUGU LETTER A
while (c < 0xc0d) // ..to TELUGU LETTER VOCALIC L
charset[i++] = c++;
charset[i++] = 0xc0e; // TELUGU LETTER E
charset[i++] = 0xc10; // TELUGU LETTER AI
c = 0xc12; // from TELUGU LETTER O
while (c < 0xc29) // ..to TELUGU LETTER NA
charset[i++] = c++;
c = 0xc2a; // from TELUGU LETTER PA
while (c < 0xc34) // ..to TELUGU LETTER LLA
charset[i++] = c++;
c = 0xc35; // from TELUGU LETTER VA
while (c < 0xc3a) // ..to TELUGU LETTER HA
charset[i++] = c++;
c = 0xc3d; // from TELUGU SIGN AVAGRAHA
while (c < 0xc45) // ..to TELUGU VOWEL SIGN VOCALIC RR
charset[i++] = c++;
charset[i++] = 0xc46; // TELUGU VOWEL SIGN E
charset[i++] = 0xc48; // TELUGU VOWEL SIGN AI
c = 0xc4a; // from TELUGU VOWEL SIGN O
while (c < 0xc4e) // ..to TELUGU SIGN VIRAMA
charset[i++] = c++;
charset[i++] = 0xc55; // TELUGU LENGTH MARK
charset[i++] = 0xc56; // TELUGU AI LENGTH MARK
charset[i++] = 0xc58; // TELUGU LETTER TSA
charset[i++] = 0xc59; // TELUGU LETTER DZA
c = 0xc60; // from TELUGU LETTER VOCALIC RR
while (c < 0xc64) // ..to TELUGU VOWEL SIGN VOCALIC LL
charset[i++] = c++;
c = 0xc66; // from TELUGU DIGIT ZERO
while (c < 0xc70) // ..to TELUGU DIGIT NINE
charset[i++] = c++;
c = 0xc78; // from TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR
while (c < 0xc80) // ..to TELUGU SIGN TUUMU
charset[i++] = c++;
charset[i++] = 0xc82; // KANNADA SIGN ANUSVARA
charset[i++] = 0xc83; // KANNADA SIGN VISARGA
c = 0xc85; // from KANNADA LETTER A
while (c < 0xc8d) // ..to KANNADA LETTER VOCALIC L
charset[i++] = c++;
charset[i++] = 0xc8e; // KANNADA LETTER E
charset[i++] = 0xc90; // KANNADA LETTER AI
c = 0xc92; // from KANNADA LETTER O
while (c < 0xca9) // ..to KANNADA LETTER NA
charset[i++] = c++;
c = 0xcaa; // from KANNADA LETTER PA
while (c < 0xcb4) // ..to KANNADA LETTER LLA
charset[i++] = c++;
c = 0xcb5; // from KANNADA LETTER VA
while (c < 0xcba) // ..to KANNADA LETTER HA
charset[i++] = c++;
c = 0xcbc; // from KANNADA SIGN NUKTA
while (c < 0xcc5) // ..to KANNADA VOWEL SIGN VOCALIC RR
charset[i++] = c++;
charset[i++] = 0xcc6; // KANNADA VOWEL SIGN E
charset[i++] = 0xcc8; // KANNADA VOWEL SIGN AI
c = 0xcca; // from KANNADA VOWEL SIGN O
while (c < 0xcce) // ..to KANNADA SIGN VIRAMA
charset[i++] = c++;
charset[i++] = 0xcd5; // KANNADA LENGTH MARK
charset[i++] = 0xcd6; // KANNADA AI LENGTH MARK
c = 0xce0; // from KANNADA LETTER VOCALIC RR
while (c < 0xce4) // ..to KANNADA VOWEL SIGN VOCALIC LL
charset[i++] = c++;
c = 0xce6; // from KANNADA DIGIT ZERO
while (c < 0xcf0) // ..to KANNADA DIGIT NINE
charset[i++] = c++;
charset[i++] = 0xcf1; // KANNADA SIGN JIHVAMULIYA
charset[i++] = 0xcf2; // KANNADA SIGN UPADHMANIYA
charset[i++] = 0xd02; // MALAYALAM SIGN ANUSVARA
charset[i++] = 0xd03; // MALAYALAM SIGN VISARGA
c = 0xd05; // from MALAYALAM LETTER A
while (c < 0xd0d) // ..to MALAYALAM LETTER VOCALIC L
charset[i++] = c++;
charset[i++] = 0xd0e; // MALAYALAM LETTER E
charset[i++] = 0xd10; // MALAYALAM LETTER AI
c = 0xd12; // from MALAYALAM LETTER O
while (c < 0xd3b) // ..to MALAYALAM LETTER TTTA
charset[i++] = c++;
c = 0xd3d; // from MALAYALAM SIGN AVAGRAHA
while (c < 0xd45) // ..to MALAYALAM VOWEL SIGN VOCALIC RR
charset[i++] = c++;
charset[i++] = 0xd46; // MALAYALAM VOWEL SIGN E
charset[i++] = 0xd48; // MALAYALAM VOWEL SIGN AI
c = 0xd4a; // from MALAYALAM VOWEL SIGN O
while (c < 0xd4f) // ..to MALAYALAM LETTER DOT REPH
charset[i++] = c++;
c = 0xd60; // from MALAYALAM LETTER VOCALIC RR
while (c < 0xd64) // ..to MALAYALAM VOWEL SIGN VOCALIC LL
charset[i++] = c++;
c = 0xd66; // from MALAYALAM DIGIT ZERO
while (c < 0xd76) // ..to MALAYALAM FRACTION THREE QUARTERS
charset[i++] = c++;
c = 0xd79; // from MALAYALAM DATE MARK
while (c < 0xd80) // ..to MALAYALAM LETTER CHILLU K
charset[i++] = c++;
charset[i++] = 0xd82; // SINHALA SIGN ANUSVARAYA
charset[i++] = 0xd83; // SINHALA SIGN VISARGAYA
c = 0xd85; // from SINHALA LETTER AYANNA
while (c < 0xd97) // ..to SINHALA LETTER AUYANNA
charset[i++] = c++;
c = 0xd9a; // from SINHALA LETTER ALPAPRAANA KAYANNA
while (c < 0xdb2) // ..to SINHALA LETTER DANTAJA NAYANNA
charset[i++] = c++;
c = 0xdb3; // from SINHALA LETTER SANYAKA DAYANNA
while (c < 0xdbc) // ..to SINHALA LETTER RAYANNA
charset[i++] = c++;
c = 0xdc0; // from SINHALA LETTER VAYANNA
while (c < 0xdc7) // ..to SINHALA LETTER FAYANNA
charset[i++] = c++;
c = 0xdcf; // from SINHALA VOWEL SIGN AELA-PILLA
while (c < 0xdd5) // ..to SINHALA VOWEL SIGN KETTI PAA-PILLA
charset[i++] = c++;
c = 0xdd8; // from SINHALA VOWEL SIGN GAETTA-PILLA
while (c < 0xde0) // ..to SINHALA VOWEL SIGN GAYANUKITTA
charset[i++] = c++;
charset[i++] = 0xdf2; // SINHALA VOWEL SIGN DIGA GAETTA-PILLA
charset[i++] = 0xdf4; // SINHALA PUNCTUATION KUNDDALIYA
c = 0xe01; // from THAI CHARACTER KO KAI
while (c < 0xe3b) // ..to THAI CHARACTER PHINTHU
charset[i++] = c++;
c = 0xe3f; // from THAI CURRENCY SYMBOL BAHT
while (c < 0xe5c) // ..to THAI CHARACTER KHOMUT
charset[i++] = c++;
charset[i++] = 0xe81; // LAO LETTER KO
charset[i++] = 0xe82; // LAO LETTER KHO SUNG
charset[i++] = 0xe87; // LAO LETTER NGO
charset[i++] = 0xe88; // LAO LETTER CO
c = 0xe94; // from LAO LETTER DO
while (c < 0xe98) // ..to LAO LETTER THO TAM
charset[i++] = c++;
c = 0xe99; // from LAO LETTER NO
while (c < 0xea0) // ..to LAO LETTER FO SUNG
charset[i++] = c++;
charset[i++] = 0xea1; // LAO LETTER MO
charset[i++] = 0xea3; // LAO LETTER LO LING
charset[i++] = 0xeaa; // LAO LETTER SO SUNG
charset[i++] = 0xeab; // LAO LETTER HO SUNG
c = 0xead; // from LAO LETTER O
while (c < 0xeba) // ..to LAO VOWEL SIGN UU
charset[i++] = c++;
charset[i++] = 0xebb; // LAO VOWEL SIGN MAI KON
charset[i++] = 0xebd; // LAO SEMIVOWEL SIGN NYO
c = 0xec0; // from LAO VOWEL SIGN E
while (c < 0xec5) // ..to LAO VOWEL SIGN AI
charset[i++] = c++;
c = 0xec8; // from LAO TONE MAI EK
while (c < 0xece) // ..to LAO NIGGAHITA
charset[i++] = c++;
c = 0xed0; // from LAO DIGIT ZERO
while (c < 0xeda) // ..to LAO DIGIT NINE
charset[i++] = c++;
charset[i++] = 0xedc; // LAO HO NO
charset[i++] = 0xedd; // LAO HO MO
c = 0xf00; // from TIBETAN SYLLABLE OM
while (c < 0xf48) // ..to TIBETAN LETTER JA
charset[i++] = c++;
c = 0xf49; // from TIBETAN LETTER NYA
while (c < 0xf6d) // ..to TIBETAN LETTER RRA
charset[i++] = c++;
c = 0xf71; // from TIBETAN VOWEL SIGN AA
while (c < 0xf98) // ..to TIBETAN SUBJOINED LETTER JA
charset[i++] = c++;
c = 0xf99; // from TIBETAN SUBJOINED LETTER NYA
while (c < 0xfbd) // ..to TIBETAN SUBJOINED LETTER FIXED-FORM RA
charset[i++] = c++;
c = 0xfbe; // from TIBETAN KU RU KHA
while (c < 0xfcd) // ..to TIBETAN SYMBOL NOR BU BZHI -KHYIL
charset[i++] = c++;
c = 0xfce; // from TIBETAN SIGN RDEL NAG RDEL DKAR
while (c < 0xfdb) // ..to TIBETAN MARK TRAILING MCHAN RTAGS
charset[i++] = c++;
c = 0x1000; // from MYANMAR LETTER KA
while (c < 0x10c6) // ..to GEORGIAN CAPITAL LETTER HOE
charset[i++] = c++;
c = 0x10d0; // from GEORGIAN LETTER AN
while (c < 0x10fd) // ..to MODIFIER LETTER GEORGIAN NAR
charset[i++] = c++;
c = 0x1100; // from HANGUL CHOSEONG KIYEOK
while (c < 0x1249) // ..to ETHIOPIC SYLLABLE QWA
charset[i++] = c++;
c = 0x124a; // from ETHIOPIC SYLLABLE QWI
while (c < 0x124e) // ..to ETHIOPIC SYLLABLE QWE
charset[i++] = c++;
c = 0x1250; // from ETHIOPIC SYLLABLE QHA
while (c < 0x1257) // ..to ETHIOPIC SYLLABLE QHO
charset[i++] = c++;
c = 0x125a; // from ETHIOPIC SYLLABLE QHWI
while (c < 0x125e) // ..to ETHIOPIC SYLLABLE QHWE
charset[i++] = c++;
c = 0x1260; // from ETHIOPIC SYLLABLE BA
while (c < 0x1289) // ..to ETHIOPIC SYLLABLE XWA
charset[i++] = c++;
c = 0x128a; // from ETHIOPIC SYLLABLE XWI
while (c < 0x128e) // ..to ETHIOPIC SYLLABLE XWE
charset[i++] = c++;
c = 0x1290; // from ETHIOPIC SYLLABLE NA
while (c < 0x12b1) // ..to ETHIOPIC SYLLABLE KWA
charset[i++] = c++;
c = 0x12b2; // from ETHIOPIC SYLLABLE KWI
while (c < 0x12b6) // ..to ETHIOPIC SYLLABLE KWE
charset[i++] = c++;
c = 0x12b8; // from ETHIOPIC SYLLABLE KXA
while (c < 0x12bf) // ..to ETHIOPIC SYLLABLE KXO
charset[i++] = c++;
c = 0x12c2; // from ETHIOPIC SYLLABLE KXWI
while (c < 0x12c6) // ..to ETHIOPIC SYLLABLE KXWE
charset[i++] = c++;
c = 0x12c8; // from ETHIOPIC SYLLABLE WA
while (c < 0x12d7) // ..to ETHIOPIC SYLLABLE PHARYNGEAL O
charset[i++] = c++;
c = 0x12d8; // from ETHIOPIC SYLLABLE ZA
while (c < 0x1311) // ..to ETHIOPIC SYLLABLE GWA
charset[i++] = c++;
c = 0x1312; // from ETHIOPIC SYLLABLE GWI
while (c < 0x1316) // ..to ETHIOPIC SYLLABLE GWE
charset[i++] = c++;
c = 0x1318; // from ETHIOPIC SYLLABLE GGA
while (c < 0x135b) // ..to ETHIOPIC SYLLABLE FYA
charset[i++] = c++;
c = 0x135d; // from ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK
while (c < 0x137d) // ..to ETHIOPIC NUMBER TEN THOUSAND
charset[i++] = c++;
c = 0x1380; // from ETHIOPIC SYLLABLE SEBATBEIT MWA
while (c < 0x139a) // ..to ETHIOPIC TONAL MARK KURT
charset[i++] = c++;
c = 0x13a0; // from CHEROKEE LETTER A
while (c < 0x13f5) // ..to CHEROKEE LETTER YV
charset[i++] = c++;
c = 0x1400; // from CANADIAN SYLLABICS HYPHEN
while (c < 0x169d) // ..to OGHAM REVERSED FEATHER MARK
charset[i++] = c++;
c = 0x16a0; // from RUNIC LETTER FEHU FEOH FE F
while (c < 0x16f1) // ..to RUNIC BELGTHOR SYMBOL
charset[i++] = c++;
c = 0x1700; // from TAGALOG LETTER A
while (c < 0x170d) // ..to TAGALOG LETTER YA
charset[i++] = c++;
c = 0x170e; // from TAGALOG LETTER LA
while (c < 0x1715) // ..to TAGALOG SIGN VIRAMA
charset[i++] = c++;
c = 0x1720; // from HANUNOO LETTER A
while (c < 0x1737) // ..to PHILIPPINE DOUBLE PUNCTUATION
charset[i++] = c++;
c = 0x1740; // from BUHID LETTER A
while (c < 0x1754) // ..to BUHID VOWEL SIGN U
charset[i++] = c++;
c = 0x1760; // from TAGBANWA LETTER A
while (c < 0x176d) // ..to TAGBANWA LETTER YA
charset[i++] = c++;
charset[i++] = 0x176e; // TAGBANWA LETTER LA
charset[i++] = 0x1770; // TAGBANWA LETTER SA
charset[i++] = 0x1772; // TAGBANWA VOWEL SIGN I
charset[i++] = 0x1773; // TAGBANWA VOWEL SIGN U
c = 0x1780; // from KHMER LETTER KA
while (c < 0x17de) // ..to KHMER SIGN ATTHACAN
charset[i++] = c++;
c = 0x17e0; // from KHMER DIGIT ZERO
while (c < 0x17ea) // ..to KHMER DIGIT NINE
charset[i++] = c++;
c = 0x17f0; // from KHMER SYMBOL LEK ATTAK SON
while (c < 0x17fa) // ..to KHMER SYMBOL LEK ATTAK PRAM-BUON
charset[i++] = c++;
c = 0x1800; // from MONGOLIAN BIRGA
while (c < 0x180f) // ..to MONGOLIAN VOWEL SEPARATOR
charset[i++] = c++;
c = 0x1810; // from MONGOLIAN DIGIT ZERO
while (c < 0x181a) // ..to MONGOLIAN DIGIT NINE
charset[i++] = c++;
c = 0x1820; // from MONGOLIAN LETTER A
while (c < 0x1878) // ..to MONGOLIAN LETTER MANCHU ZHA
charset[i++] = c++;
c = 0x1880; // from MONGOLIAN LETTER ALI GALI ANUSVARA ONE
while (c < 0x18ab) // ..to MONGOLIAN LETTER MANCHU ALI GALI LHA
charset[i++] = c++;
c = 0x18b0; // from CANADIAN SYLLABICS OY
while (c < 0x18f6) // ..to CANADIAN SYLLABICS CARRIER DENTAL S
charset[i++] = c++;
c = 0x1900; // from LIMBU VOWEL-CARRIER LETTER
while (c < 0x191d) // ..to LIMBU LETTER HA
charset[i++] = c++;
c = 0x1920; // from LIMBU VOWEL SIGN A
while (c < 0x192c) // ..to LIMBU SUBJOINED LETTER WA
charset[i++] = c++;
c = 0x1930; // from LIMBU SMALL LETTER KA
while (c < 0x193c) // ..to LIMBU SIGN SA-I
charset[i++] = c++;
c = 0x1944; // from LIMBU EXCLAMATION MARK
while (c < 0x196e) // ..to TAI LE LETTER AI
charset[i++] = c++;
c = 0x1970; // from TAI LE LETTER TONE-2
while (c < 0x1975) // ..to TAI LE LETTER TONE-6
charset[i++] = c++;
c = 0x1980; // from NEW TAI LUE LETTER HIGH QA
while (c < 0x19ac) // ..to NEW TAI LUE LETTER LOW SUA
charset[i++] = c++;
c = 0x19b0; // from NEW TAI LUE VOWEL SIGN VOWEL SHORTENER
while (c < 0x19ca) // ..to NEW TAI LUE TONE MARK-2
charset[i++] = c++;
c = 0x19d0; // from NEW TAI LUE DIGIT ZERO
while (c < 0x19db) // ..to NEW TAI LUE THAM DIGIT ONE
charset[i++] = c++;
c = 0x19de; // from NEW TAI LUE SIGN LAE
while (c < 0x1a1c) // ..to BUGINESE VOWEL SIGN AE
charset[i++] = c++;
c = 0x1a1e; // from BUGINESE PALLAWA
while (c < 0x1a5f) // ..to TAI THAM CONSONANT SIGN SA
charset[i++] = c++;
c = 0x1a60; // from TAI THAM SIGN SAKOT
while (c < 0x1a7d) // ..to TAI THAM SIGN KHUEN-LUE KARAN
charset[i++] = c++;
c = 0x1a7f; // from TAI THAM COMBINING CRYPTOGRAMMIC DOT
while (c < 0x1a8a) // ..to TAI THAM HORA DIGIT NINE
charset[i++] = c++;
c = 0x1a90; // from TAI THAM THAM DIGIT ZERO
while (c < 0x1a9a) // ..to TAI THAM THAM DIGIT NINE
charset[i++] = c++;
c = 0x1aa0; // from TAI THAM SIGN WIANG
while (c < 0x1aae) // ..to TAI THAM SIGN CAANG
charset[i++] = c++;
c = 0x1b00; // from BALINESE SIGN ULU RICEM
while (c < 0x1b4c) // ..to BALINESE LETTER ASYURA SASAK
charset[i++] = c++;
c = 0x1b50; // from BALINESE DIGIT ZERO
while (c < 0x1b7d) // ..to BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING
charset[i++] = c++;
c = 0x1b80; // from SUNDANESE SIGN PANYECEK
while (c < 0x1bab) // ..to SUNDANESE SIGN PAMAAEH
charset[i++] = c++;
c = 0x1bae; // from SUNDANESE LETTER KHA
while (c < 0x1bba) // ..to SUNDANESE DIGIT NINE
charset[i++] = c++;
c = 0x1bc0; // from BATAK LETTER A
while (c < 0x1bf4) // ..to BATAK PANONGONAN
charset[i++] = c++;
c = 0x1bfc; // from BATAK SYMBOL BINDU NA METEK
while (c < 0x1c38) // ..to LEPCHA SIGN NUKTA
charset[i++] = c++;
c = 0x1c3b; // from LEPCHA PUNCTUATION TA-ROL
while (c < 0x1c4a) // ..to LEPCHA DIGIT NINE
charset[i++] = c++;
c = 0x1c4d; // from LEPCHA LETTER TTA
while (c < 0x1c80) // ..to OL CHIKI PUNCTUATION DOUBLE MUCAAD
charset[i++] = c++;
c = 0x1cd0; // from VEDIC TONE KARSHANA
while (c < 0x1cf3) // ..to VEDIC SIGN ARDHAVISARGA
charset[i++] = c++;
c = 0x1d00; // from LATIN LETTER SMALL CAPITAL A
while (c < 0x1de7) // ..to COMBINING LATIN SMALL LETTER Z
charset[i++] = c++;
c = 0x1dfc; // from COMBINING DOUBLE INVERTED BREVE BELOW
while (c < 0x1f16) // ..to GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
charset[i++] = c++;
c = 0x1f18; // from GREEK CAPITAL LETTER EPSILON WITH PSILI
while (c < 0x1f1e) // ..to GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
charset[i++] = c++;
c = 0x1f20; // from GREEK SMALL LETTER ETA WITH PSILI
while (c < 0x1f46) // ..to GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA
charset[i++] = c++;
c = 0x1f48; // from GREEK CAPITAL LETTER OMICRON WITH PSILI
while (c < 0x1f4e) // ..to GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
charset[i++] = c++;
c = 0x1f50; // from GREEK SMALL LETTER UPSILON WITH PSILI
while (c < 0x1f58) // ..to GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI
charset[i++] = c++;
c = 0x1f5f; // from GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
while (c < 0x1f7e) // ..to GREEK SMALL LETTER OMEGA WITH OXIA
charset[i++] = c++;
c = 0x1f80; // from GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
while (c < 0x1fb5) // ..to GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
charset[i++] = c++;
c = 0x1fb6; // from GREEK SMALL LETTER ALPHA WITH PERISPOMENI
while (c < 0x1fc5) // ..to GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
charset[i++] = c++;
c = 0x1fc6; // from GREEK SMALL LETTER ETA WITH PERISPOMENI
while (c < 0x1fd4) // ..to GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
charset[i++] = c++;
c = 0x1fd6; // from GREEK SMALL LETTER IOTA WITH PERISPOMENI
while (c < 0x1fdc) // ..to GREEK CAPITAL LETTER IOTA WITH OXIA
charset[i++] = c++;
c = 0x1fdd; // from GREEK DASIA AND VARIA
while (c < 0x1ff0) // ..to GREEK VARIA
charset[i++] = c++;
charset[i++] = 0x1ff2; // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
charset[i++] = 0x1ff4; // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
c = 0x1ff6; // from GREEK SMALL LETTER OMEGA WITH PERISPOMENI
while (c < 0x1fff) // ..to GREEK DASIA
charset[i++] = c++;
c = 0x2000; // from EN QUAD
while (c < 0x2065) // ..to INVISIBLE PLUS
charset[i++] = c++;
c = 0x206a; // from INHIBIT SYMMETRIC SWAPPING
while (c < 0x2072) // ..to SUPERSCRIPT LATIN SMALL LETTER I
charset[i++] = c++;
c = 0x2074; // from SUPERSCRIPT FOUR
while (c < 0x208f) // ..to SUBSCRIPT RIGHT PARENTHESIS
charset[i++] = c++;
c = 0x2090; // from LATIN SUBSCRIPT SMALL LETTER A
while (c < 0x209d) // ..to LATIN SUBSCRIPT SMALL LETTER T
charset[i++] = c++;
c = 0x20a0; // from EURO-CURRENCY SIGN
while (c < 0x20ba) // ..to INDIAN RUPEE SIGN
charset[i++] = c++;
c = 0x20d0; // from COMBINING LEFT HARPOON ABOVE
while (c < 0x20f1) // ..to COMBINING ASTERISK ABOVE
charset[i++] = c++;
c = 0x2100; // from ACCOUNT OF
while (c < 0x218a) // ..to VULGAR FRACTION ZERO THIRDS
charset[i++] = c++;
c = 0x2190; // from LEFTWARDS ARROW
while (c < 0x23f4) // ..to HOURGLASS WITH FLOWING SAND
charset[i++] = c++;
c = 0x2400; // from SYMBOL FOR NULL
while (c < 0x2427) // ..to SYMBOL FOR SUBSTITUTE FORM TWO
charset[i++] = c++;
c = 0x2440; // from OCR HOOK
while (c < 0x244b) // ..to OCR DOUBLE BACKSLASH
charset[i++] = c++;
c = 0x2460; // from CIRCLED DIGIT ONE
while (c < 0x2700) // ..to WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
charset[i++] = c++;
c = 0x2701; // from UPPER BLADE SCISSORS
while (c < 0x27cb) // ..to VERTICAL BAR WITH HORIZONTAL STROKE
charset[i++] = c++;
c = 0x27ce; // from SQUARED LOGICAL AND
while (c < 0x2b4d) // ..to RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR
charset[i++] = c++;
c = 0x2b50; // from WHITE MEDIUM STAR
while (c < 0x2b5a) // ..to HEAVY CIRCLED SALTIRE
charset[i++] = c++;
c = 0x2c00; // from GLAGOLITIC CAPITAL LETTER AZU
while (c < 0x2c2f) // ..to GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
charset[i++] = c++;
c = 0x2c30; // from GLAGOLITIC SMALL LETTER AZU
while (c < 0x2c5f) // ..to GLAGOLITIC SMALL LETTER LATINATE MYSLITE
charset[i++] = c++;
c = 0x2c60; // from LATIN CAPITAL LETTER L WITH DOUBLE BAR
while (c < 0x2cf2) // ..to COPTIC COMBINING SPIRITUS LENIS
charset[i++] = c++;
c = 0x2cf9; // from COPTIC OLD NUBIAN FULL STOP
while (c < 0x2d26) // ..to GEORGIAN SMALL LETTER HOE
charset[i++] = c++;
c = 0x2d30; // from TIFINAGH LETTER YA
while (c < 0x2d66) // ..to TIFINAGH LETTER YAZZ
charset[i++] = c++;
charset[i++] = 0x2d6f; // TIFINAGH MODIFIER LETTER LABIALIZATION MARK
charset[i++] = 0x2d70; // TIFINAGH SEPARATOR MARK
c = 0x2d7f; // from TIFINAGH CONSONANT JOINER
while (c < 0x2d97) // ..to ETHIOPIC SYLLABLE GGWE
charset[i++] = c++;
c = 0x2da0; // from ETHIOPIC SYLLABLE SSA
while (c < 0x2da7) // ..to ETHIOPIC SYLLABLE SSO
charset[i++] = c++;
c = 0x2da8; // from ETHIOPIC SYLLABLE CCA
while (c < 0x2daf) // ..to ETHIOPIC SYLLABLE CCO
charset[i++] = c++;
c = 0x2db0; // from ETHIOPIC SYLLABLE ZZA
while (c < 0x2db7) // ..to ETHIOPIC SYLLABLE ZZO
charset[i++] = c++;
c = 0x2db8; // from ETHIOPIC SYLLABLE CCHA
while (c < 0x2dbf) // ..to ETHIOPIC SYLLABLE CCHO
charset[i++] = c++;
c = 0x2dc0; // from ETHIOPIC SYLLABLE QYA
while (c < 0x2dc7) // ..to ETHIOPIC SYLLABLE QYO
charset[i++] = c++;
c = 0x2dc8; // from ETHIOPIC SYLLABLE KYA
while (c < 0x2dcf) // ..to ETHIOPIC SYLLABLE KYO
charset[i++] = c++;
c = 0x2dd0; // from ETHIOPIC SYLLABLE XYA
while (c < 0x2dd7) // ..to ETHIOPIC SYLLABLE XYO
charset[i++] = c++;
c = 0x2dd8; // from ETHIOPIC SYLLABLE GYA
while (c < 0x2ddf) // ..to ETHIOPIC SYLLABLE GYO
charset[i++] = c++;
c = 0x2de0; // from COMBINING CYRILLIC LETTER BE
while (c < 0x2e32) // ..to WORD SEPARATOR MIDDLE DOT
charset[i++] = c++;
c = 0x2e80; // from CJK RADICAL REPEAT
while (c < 0x2e9a) // ..to CJK RADICAL RAP
charset[i++] = c++;
c = 0x2e9b; // from CJK RADICAL CHOKE
while (c < 0x2ef4) // ..to CJK RADICAL C-SIMPLIFIED TURTLE
charset[i++] = c++;
c = 0x2f00; // from KANGXI RADICAL ONE
while (c < 0x2fd6) // ..to KANGXI RADICAL FLUTE
charset[i++] = c++;
c = 0x2ff0; // from IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT
while (c < 0x2ffc) // ..to IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
charset[i++] = c++;
c = 0x3000; // from IDEOGRAPHIC SPACE
while (c < 0x3040) // ..to IDEOGRAPHIC HALF FILL SPACE
charset[i++] = c++;
c = 0x3041; // from HIRAGANA LETTER SMALL A
while (c < 0x3097) // ..to HIRAGANA LETTER SMALL KE
charset[i++] = c++;
c = 0x3099; // from COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
while (c < 0x3100) // ..to KATAKANA DIGRAPH KOTO
charset[i++] = c++;
c = 0x3105; // from BOPOMOFO LETTER B
while (c < 0x312e) // ..to BOPOMOFO LETTER IH
charset[i++] = c++;
c = 0x3131; // from HANGUL LETTER KIYEOK
while (c < 0x318f) // ..to HANGUL LETTER ARAEAE
charset[i++] = c++;
c = 0x3190; // from IDEOGRAPHIC ANNOTATION LINKING MARK
while (c < 0x31bb) // ..to BOPOMOFO LETTER ZY
charset[i++] = c++;
c = 0x31c0; // from CJK STROKE T
while (c < 0x31e4) // ..to CJK STROKE Q
charset[i++] = c++;
c = 0x31f0; // from KATAKANA LETTER SMALL KU
while (c < 0x321f) // ..to PARENTHESIZED KOREAN CHARACTER O HU
charset[i++] = c++;
c = 0x3220; // from PARENTHESIZED IDEOGRAPH ONE
while (c < 0x32ff) // ..to CIRCLED KATAKANA WO
charset[i++] = c++;
c = 0x3300; // from SQUARE APAATO
while (c < 0x3400) // ..to SQUARE GAL
charset[i++] = c++;
c = 0x3400; // from <CJK Ideograph Extension A, First>
while (c < 0x4db6) // ..to <CJK Ideograph Extension A, Last>
charset[i++] = c++;
c = 0x4dc0; // from HEXAGRAM FOR THE CREATIVE HEAVEN
while (c < 0x4e00) // ..to HEXAGRAM FOR BEFORE COMPLETION
charset[i++] = c++;
c = 0x4e00; // from <CJK Ideograph, First>
while (c < 0x9fcc) // ..to <CJK Ideograph, Last>
charset[i++] = c++;
c = 0xa000; // from YI SYLLABLE IT
while (c < 0xa48d) // ..to YI SYLLABLE YYR
charset[i++] = c++;
c = 0xa490; // from YI RADICAL QOT
while (c < 0xa4c7) // ..to YI RADICAL KE
charset[i++] = c++;
c = 0xa4d0; // from LISU LETTER BA
while (c < 0xa62c) // ..to VAI SYLLABLE NDOLE DO
charset[i++] = c++;
c = 0xa640; // from CYRILLIC CAPITAL LETTER ZEMLYA
while (c < 0xa674) // ..to SLAVONIC ASTERISK
charset[i++] = c++;
c = 0xa67c; // from COMBINING CYRILLIC KAVYKA
while (c < 0xa698) // ..to CYRILLIC SMALL LETTER SHWE
charset[i++] = c++;
c = 0xa6a0; // from BAMUM LETTER A
while (c < 0xa6f8) // ..to BAMUM QUESTION MARK
charset[i++] = c++;
c = 0xa700; // from MODIFIER LETTER CHINESE TONE YIN PING
while (c < 0xa78f) // ..to LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
charset[i++] = c++;
charset[i++] = 0xa790; // LATIN CAPITAL LETTER N WITH DESCENDER
charset[i++] = 0xa791; // LATIN SMALL LETTER N WITH DESCENDER
c = 0xa7a0; // from LATIN CAPITAL LETTER G WITH OBLIQUE STROKE
while (c < 0xa7aa) // ..to LATIN SMALL LETTER S WITH OBLIQUE STROKE
charset[i++] = c++;
c = 0xa7fa; // from LATIN LETTER SMALL CAPITAL TURNED M
while (c < 0xa82c) // ..to SYLOTI NAGRI POETRY MARK-4
charset[i++] = c++;
c = 0xa830; // from NORTH INDIC FRACTION ONE QUARTER
while (c < 0xa83a) // ..to NORTH INDIC QUANTITY MARK
charset[i++] = c++;
c = 0xa840; // from PHAGS-PA LETTER KA
while (c < 0xa878) // ..to PHAGS-PA MARK DOUBLE SHAD
charset[i++] = c++;
c = 0xa880; // from SAURASHTRA SIGN ANUSVARA
while (c < 0xa8c5) // ..to SAURASHTRA SIGN VIRAMA
charset[i++] = c++;
c = 0xa8ce; // from SAURASHTRA DANDA
while (c < 0xa8da) // ..to SAURASHTRA DIGIT NINE
charset[i++] = c++;
c = 0xa8e0; // from COMBINING DEVANAGARI DIGIT ZERO
while (c < 0xa8fc) // ..to DEVANAGARI HEADSTROKE
charset[i++] = c++;
c = 0xa900; // from KAYAH LI DIGIT ZERO
while (c < 0xa954) // ..to REJANG VIRAMA
charset[i++] = c++;
c = 0xa95f; // from REJANG SECTION MARK
while (c < 0xa97d) // ..to HANGUL CHOSEONG SSANGYEORINHIEUH
charset[i++] = c++;
c = 0xa980; // from JAVANESE SIGN PANYANGGA
while (c < 0xa9ce) // ..to JAVANESE TURNED PADA PISELEH
charset[i++] = c++;
c = 0xa9cf; // from JAVANESE PANGRANGKEP
while (c < 0xa9da) // ..to JAVANESE DIGIT NINE
charset[i++] = c++;
charset[i++] = 0xa9de; // JAVANESE PADA TIRTA TUMETES
charset[i++] = 0xa9df; // JAVANESE PADA ISEN-ISEN
c = 0xaa00; // from CHAM LETTER A
while (c < 0xaa37) // ..to CHAM CONSONANT SIGN WA
charset[i++] = c++;
c = 0xaa40; // from CHAM LETTER FINAL K
while (c < 0xaa4e) // ..to CHAM CONSONANT SIGN FINAL H
charset[i++] = c++;
c = 0xaa50; // from CHAM DIGIT ZERO
while (c < 0xaa5a) // ..to CHAM DIGIT NINE
charset[i++] = c++;
c = 0xaa5c; // from CHAM PUNCTUATION SPIRAL
while (c < 0xaa7c) // ..to MYANMAR SIGN PAO KAREN TONE
charset[i++] = c++;
c = 0xaa80; // from TAI VIET LETTER LOW KO
while (c < 0xaac3) // ..to TAI VIET TONE MAI SONG
charset[i++] = c++;
c = 0xaadb; // from TAI VIET SYMBOL KON
while (c < 0xaae0) // ..to TAI VIET SYMBOL KOI KOI
charset[i++] = c++;
c = 0xab01; // from ETHIOPIC SYLLABLE TTHU
while (c < 0xab07) // ..to ETHIOPIC SYLLABLE TTHO
charset[i++] = c++;
c = 0xab09; // from ETHIOPIC SYLLABLE DDHU
while (c < 0xab0f) // ..to ETHIOPIC SYLLABLE DDHO
charset[i++] = c++;
c = 0xab11; // from ETHIOPIC SYLLABLE DZU
while (c < 0xab17) // ..to ETHIOPIC SYLLABLE DZO
charset[i++] = c++;
c = 0xab20; // from ETHIOPIC SYLLABLE CCHHA
while (c < 0xab27) // ..to ETHIOPIC SYLLABLE CCHHO
charset[i++] = c++;
c = 0xab28; // from ETHIOPIC SYLLABLE BBA
while (c < 0xab2f) // ..to ETHIOPIC SYLLABLE BBO
charset[i++] = c++;
c = 0xabc0; // from MEETEI MAYEK LETTER KOK
while (c < 0xabee) // ..to MEETEI MAYEK APUN IYEK
charset[i++] = c++;
c = 0xabf0; // from MEETEI MAYEK DIGIT ZERO
while (c < 0xabfa) // ..to MEETEI MAYEK DIGIT NINE
charset[i++] = c++;
c = 0xac00; // from <Hangul Syllable, First>
while (c < 0xd7a4) // ..to <Hangul Syllable, Last>
charset[i++] = c++;
c = 0xd7b0; // from HANGUL JUNGSEONG O-YEO
while (c < 0xd7c7) // ..to HANGUL JUNGSEONG ARAEA-E
charset[i++] = c++;
c = 0xd7cb; // from HANGUL JONGSEONG NIEUN-RIEUL
while (c < 0xd7fc) // ..to HANGUL JONGSEONG PHIEUPH-THIEUTH
charset[i++] = c++;
c = 0xf900; // from CJK COMPATIBILITY IDEOGRAPH-F900
while (c < 0xfa2e) // ..to CJK COMPATIBILITY IDEOGRAPH-FA2D
charset[i++] = c++;
c = 0xfa30; // from CJK COMPATIBILITY IDEOGRAPH-FA30
while (c < 0xfa6e) // ..to CJK COMPATIBILITY IDEOGRAPH-FA6D
charset[i++] = c++;
c = 0xfa70; // from CJK COMPATIBILITY IDEOGRAPH-FA70
while (c < 0xfada) // ..to CJK COMPATIBILITY IDEOGRAPH-FAD9
charset[i++] = c++;
c = 0xfb00; // from LATIN SMALL LIGATURE FF
while (c < 0xfb07) // ..to LATIN SMALL LIGATURE ST
charset[i++] = c++;
c = 0xfb13; // from ARMENIAN SMALL LIGATURE MEN NOW
while (c < 0xfb18) // ..to ARMENIAN SMALL LIGATURE MEN XEH
charset[i++] = c++;
c = 0xfb1d; // from HEBREW LETTER YOD WITH HIRIQ
while (c < 0xfb37) // ..to HEBREW LETTER ZAYIN WITH DAGESH
charset[i++] = c++;
c = 0xfb38; // from HEBREW LETTER TET WITH DAGESH
while (c < 0xfb3d) // ..to HEBREW LETTER LAMED WITH DAGESH
charset[i++] = c++;
charset[i++] = 0xfb40; // HEBREW LETTER NUN WITH DAGESH
charset[i++] = 0xfb41; // HEBREW LETTER SAMEKH WITH DAGESH
charset[i++] = 0xfb43; // HEBREW LETTER FINAL PE WITH DAGESH
charset[i++] = 0xfb44; // HEBREW LETTER PE WITH DAGESH
c = 0xfb46; // from HEBREW LETTER TSADI WITH DAGESH
while (c < 0xfbc2) // ..to ARABIC SYMBOL SMALL TAH BELOW
charset[i++] = c++;
c = 0xfbd3; // from ARABIC LETTER NG ISOLATED FORM
while (c < 0xfd40) // ..to ORNATE RIGHT PARENTHESIS
charset[i++] = c++;
c = 0xfd50; // from ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM
while (c < 0xfd90) // ..to ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
charset[i++] = c++;
c = 0xfd92; // from ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM
while (c < 0xfdc8) // ..to ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
charset[i++] = c++;
c = 0xfdf0; // from ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM
while (c < 0xfdfe) // ..to ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
charset[i++] = c++;
c = 0xfe00; // from VARIATION SELECTOR-1
while (c < 0xfe1a) // ..to PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
charset[i++] = c++;
c = 0xfe20; // from COMBINING LIGATURE LEFT HALF
while (c < 0xfe27) // ..to COMBINING CONJOINING MACRON
charset[i++] = c++;
c = 0xfe30; // from PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
while (c < 0xfe53) // ..to SMALL FULL STOP
charset[i++] = c++;
c = 0xfe54; // from SMALL SEMICOLON
while (c < 0xfe67) // ..to SMALL EQUALS SIGN
charset[i++] = c++;
c = 0xfe68; // from SMALL REVERSE SOLIDUS
while (c < 0xfe6c) // ..to SMALL COMMERCIAL AT
charset[i++] = c++;
c = 0xfe70; // from ARABIC FATHATAN ISOLATED FORM
while (c < 0xfe75) // ..to ARABIC KASRATAN ISOLATED FORM
charset[i++] = c++;
c = 0xfe76; // from ARABIC FATHA ISOLATED FORM
while (c < 0xfefd) // ..to ARABIC LIGATURE LAM WITH ALEF FINAL FORM
charset[i++] = c++;
c = 0xff01; // from FULLWIDTH EXCLAMATION MARK
while (c < 0xffbf) // ..to HALFWIDTH HANGUL LETTER HIEUH
charset[i++] = c++;
c = 0xffc2; // from HALFWIDTH HANGUL LETTER A
while (c < 0xffc8) // ..to HALFWIDTH HANGUL LETTER E
charset[i++] = c++;
c = 0xffca; // from HALFWIDTH HANGUL LETTER YEO
while (c < 0xffd0) // ..to HALFWIDTH HANGUL LETTER OE
charset[i++] = c++;
c = 0xffd2; // from HALFWIDTH HANGUL LETTER YO
while (c < 0xffd8) // ..to HALFWIDTH HANGUL LETTER YU
charset[i++] = c++;
charset[i++] = 0xffda; // HALFWIDTH HANGUL LETTER EU
charset[i++] = 0xffdc; // HALFWIDTH HANGUL LETTER I
c = 0xffe0; // from FULLWIDTH CENT SIGN
while (c < 0xffe7) // ..to FULLWIDTH WON SIGN
charset[i++] = c++;
c = 0xffe8; // from HALFWIDTH FORMS LIGHT VERTICAL
while (c < 0xffef) // ..to HALFWIDTH WHITE CIRCLE
charset[i++] = c++;
c = 0xfff9; // from INTERLINEAR ANNOTATION ANCHOR
while (c < 0xfffe) // ..to REPLACEMENT CHARACTER
charset[i++] = c++;
/* Zero-terminate it, and cache the first character */
charset[i] = 0;
c0 = charset[0];
last = minlength - 1;
i = 0;
while (i <= last) {
id[i] = 0;
ucs2[i++] = c0;
}
lastid = -1;
ucs2[i] = 0;
/* We must init word with dummy data, it doesn't get set until filter() */
word = 1;
}
void generate()
{
int i;
/* Handle the typical case specially */
if (ucs2[last] = charset[++lastid]) return;
lastid = 0;
ucs2[i = last] = c0;
while (i--) { // Have a preceding position?
if (ucs2[i] = charset[++id[i]]) return;
id[i] = 0;
ucs2[i] = c0;
}
if (++last < maxlength) { // Next length?
id[last] = lastid = 0;
ucs2[last] = c0;
ucs2[last + 1] = 0;
} else // We're done
ucs2 = 0;
}
void restore()
{
int i, o, c;
/* Convert the restored word back from UTF-8 to UCS-2 */
i = o = 0;
while (c = word[i]) {
if (c >= 0xe0) {
c = (c << 6) + word[++i];
c = (c << 6) + word[++i];
c -= 0xE2080;
} else if (c >= 0xc0) {
c = (c << 6) + word[++i];
c -= 0x3080;
}
i++;
ucs2[o++] = c;
}
ucs2[o] = 0;
/* Calculate the current length and infer the character indices */
last = 0;
while (c = ucs2[last]) {
i = 0; while (charset[i] != c && charset[i]) i++;
if (!charset[i]) i = 0; // Not found
id[last++] = i;
}
lastid = id[--last];
}
/* Convert from UCS-2 to UTF-8 */
void filter()
{
int i, c;
i = -1; c = 0;
while (ucs2[++i]) {
if (ucs2[i] >= 0x0800) {
word[c++] = 0xe0 | (ucs2[i]>>12);
word[c++] = 0x80 | (ucs2[i]>>6 & 0x3f);
word[c++] = 0x80 | (ucs2[i] & 0x3f);
}
else if (ucs2[i] >= 0x80) {
word[c++] = 0xc0 | (ucs2[i]>>6);
word[c++] = 0x80 | (ucs2[i] & 0x3f);
}
else {
word[c++] = ucs2[i];
}
}
word[c] = 0;
}