# This software is Copyright (c) 2012 magnum, and it is hereby # released to the general public under the following terms: # Redistribution and use in source and binary forms, with or without # modification, are permitted. # # Generic implementation of "dumb" exhaustive search of FULL Unicode and # an arbitrary charset. Default is to try *all* allocated characters (there's # 109070 of them). Even if a fast format can exhaust two characters in one # hour, three characters would take 12 years... # # The output is UTF-8, so for 16-bit formats you need to give --enc=utf8 [List.External:Dumb32] int maxlength; // Maximum password length to try int last; // Last character position, zero-based int lastid; // Character index in the last position int id[0x7f]; // Current character indices for other positions int charset[0x20000], c0; // Characters int utf32[0x7F]; // Word in UTF32 void init() { int minlength; int i, c; minlength = 1; // Initial password length to try, must be at least 1 maxlength = 2; // Must be at least same as minlength /* * This defines the character set. This is auto-generated from UnicodeData.txt * and we skip control characters. */ i = 0; c = 0x20; // from SPACE while (c < 0x7f) // ..to TILDE charset[i++] = c++; c = 0xa0; // from NO-BREAK SPACE while (c < 0x378) // ..to GREEK SMALL LETTER PAMPHYLIAN DIGAMMA charset[i++] = c++; c = 0x37a; // from GREEK YPOGEGRAMMENI while (c < 0x37f) // ..to GREEK QUESTION MARK charset[i++] = c++; c = 0x384; // from GREEK TONOS while (c < 0x38b) // ..to GREEK CAPITAL LETTER IOTA WITH TONOS charset[i++] = c++; c = 0x38e; // from GREEK CAPITAL LETTER UPSILON WITH TONOS while (c < 0x3a2) // ..to GREEK CAPITAL LETTER RHO charset[i++] = c++; c = 0x3a3; // from GREEK CAPITAL LETTER SIGMA while (c < 0x528) // ..to CYRILLIC SMALL LETTER SHHA WITH DESCENDER charset[i++] = c++; c = 0x531; // from ARMENIAN CAPITAL LETTER AYB while (c < 0x557) // ..to ARMENIAN CAPITAL LETTER FEH charset[i++] = c++; c = 0x559; // from ARMENIAN MODIFIER LETTER LEFT HALF RING while (c < 0x560) // ..to ARMENIAN ABBREVIATION MARK charset[i++] = c++; c = 0x561; // from ARMENIAN SMALL LETTER AYB while (c < 0x588) // ..to ARMENIAN SMALL LIGATURE ECH YIWN charset[i++] = c++; charset[i++] = 0x589; // ARMENIAN FULL STOP charset[i++] = 0x58a; // ARMENIAN HYPHEN c = 0x591; // from HEBREW ACCENT ETNAHTA while (c < 0x5c8) // ..to HEBREW POINT QAMATS QATAN charset[i++] = c++; c = 0x5d0; // from HEBREW LETTER ALEF while (c < 0x5eb) // ..to HEBREW LETTER TAV charset[i++] = c++; c = 0x5f0; // from HEBREW LIGATURE YIDDISH DOUBLE VAV while (c < 0x5f5) // ..to HEBREW PUNCTUATION GERSHAYIM charset[i++] = c++; c = 0x600; // from ARABIC NUMBER SIGN while (c < 0x604) // ..to ARABIC SIGN SAFHA charset[i++] = c++; c = 0x606; // from ARABIC-INDIC CUBE ROOT while (c < 0x61c) // ..to ARABIC SEMICOLON charset[i++] = c++; c = 0x61e; // from ARABIC TRIPLE DOT PUNCTUATION MARK while (c < 0x70e) // ..to SYRIAC HARKLEAN ASTERISCUS charset[i++] = c++; c = 0x70f; // from SYRIAC ABBREVIATION MARK while (c < 0x74b) // ..to SYRIAC BARREKH charset[i++] = c++; c = 0x74d; // from SYRIAC LETTER SOGDIAN ZHAIN while (c < 0x7b2) // ..to THAANA LETTER NAA charset[i++] = c++; c = 0x7c0; // from NKO DIGIT ZERO while (c < 0x7fb) // ..to NKO LAJANYALAN charset[i++] = c++; c = 0x800; // from SAMARITAN LETTER ALAF while (c < 0x82e) // ..to SAMARITAN MARK NEQUDAA charset[i++] = c++; c = 0x830; // from SAMARITAN PUNCTUATION NEQUDAA while (c < 0x83f) // ..to SAMARITAN PUNCTUATION ANNAAU charset[i++] = c++; c = 0x840; // from MANDAIC LETTER HALQA while (c < 0x85c) // ..to MANDAIC GEMINATION MARK charset[i++] = c++; c = 0x900; // from DEVANAGARI SIGN INVERTED CANDRABINDU while (c < 0x978) // ..to DEVANAGARI LETTER UUE charset[i++] = c++; c = 0x979; // from DEVANAGARI LETTER ZHA while (c < 0x980) // ..to DEVANAGARI LETTER BBA charset[i++] = c++; charset[i++] = 0x981; // BENGALI SIGN CANDRABINDU charset[i++] = 0x983; // BENGALI SIGN VISARGA c = 0x985; // from BENGALI LETTER A while (c < 0x98d) // ..to BENGALI LETTER VOCALIC L charset[i++] = c++; charset[i++] = 0x98f; // BENGALI LETTER E charset[i++] = 0x990; // BENGALI LETTER AI c = 0x993; // from BENGALI LETTER O while (c < 0x9a9) // ..to BENGALI LETTER NA charset[i++] = c++; c = 0x9aa; // from BENGALI LETTER PA while (c < 0x9b1) // ..to BENGALI LETTER RA charset[i++] = c++; c = 0x9b6; // from BENGALI LETTER SHA while (c < 0x9ba) // ..to BENGALI LETTER HA charset[i++] = c++; c = 0x9bc; // from BENGALI SIGN NUKTA while (c < 0x9c5) // ..to BENGALI VOWEL SIGN VOCALIC RR charset[i++] = c++; charset[i++] = 0x9c7; // BENGALI VOWEL SIGN E charset[i++] = 0x9c8; // BENGALI VOWEL SIGN AI c = 0x9cb; // from BENGALI VOWEL SIGN O while (c < 0x9cf) // ..to BENGALI LETTER KHANDA TA charset[i++] = c++; charset[i++] = 0x9dc; // BENGALI LETTER RRA charset[i++] = 0x9dd; // BENGALI LETTER RHA c = 0x9df; // from BENGALI LETTER YYA while (c < 0x9e4) // ..to BENGALI VOWEL SIGN VOCALIC LL charset[i++] = c++; c = 0x9e6; // from BENGALI DIGIT ZERO while (c < 0x9fc) // ..to BENGALI GANDA MARK charset[i++] = c++; charset[i++] = 0xa01; // GURMUKHI SIGN ADAK BINDI charset[i++] = 0xa03; // GURMUKHI SIGN VISARGA c = 0xa05; // from GURMUKHI LETTER A while (c < 0xa0b) // ..to GURMUKHI LETTER UU charset[i++] = c++; charset[i++] = 0xa0f; // GURMUKHI LETTER EE charset[i++] = 0xa10; // GURMUKHI LETTER AI c = 0xa13; // from GURMUKHI LETTER OO while (c < 0xa29) // ..to GURMUKHI LETTER NA charset[i++] = c++; c = 0xa2a; // from GURMUKHI LETTER PA while (c < 0xa31) // ..to GURMUKHI LETTER RA charset[i++] = c++; charset[i++] = 0xa32; // GURMUKHI LETTER LA charset[i++] = 0xa33; // GURMUKHI LETTER LLA charset[i++] = 0xa35; // GURMUKHI LETTER VA charset[i++] = 0xa36; // GURMUKHI LETTER SHA charset[i++] = 0xa38; // GURMUKHI LETTER SA charset[i++] = 0xa39; // GURMUKHI LETTER HA c = 0xa3e; // from GURMUKHI VOWEL SIGN AA while (c < 0xa43) // ..to GURMUKHI VOWEL SIGN UU charset[i++] = c++; charset[i++] = 0xa47; // GURMUKHI VOWEL SIGN EE charset[i++] = 0xa48; // GURMUKHI VOWEL SIGN AI charset[i++] = 0xa4b; // GURMUKHI VOWEL SIGN OO charset[i++] = 0xa4d; // GURMUKHI SIGN VIRAMA c = 0xa59; // from GURMUKHI LETTER KHHA while (c < 0xa5d) // ..to GURMUKHI LETTER RRA charset[i++] = c++; c = 0xa66; // from GURMUKHI DIGIT ZERO while (c < 0xa76) // ..to GURMUKHI SIGN YAKASH charset[i++] = c++; charset[i++] = 0xa81; // GUJARATI SIGN CANDRABINDU charset[i++] = 0xa83; // GUJARATI SIGN VISARGA c = 0xa85; // from GUJARATI LETTER A while (c < 0xa8e) // ..to GUJARATI VOWEL CANDRA E charset[i++] = c++; charset[i++] = 0xa8f; // GUJARATI LETTER E charset[i++] = 0xa91; // GUJARATI VOWEL CANDRA O c = 0xa93; // from GUJARATI LETTER O while (c < 0xaa9) // ..to GUJARATI LETTER NA charset[i++] = c++; c = 0xaaa; // from GUJARATI LETTER PA while (c < 0xab1) // ..to GUJARATI LETTER RA charset[i++] = c++; charset[i++] = 0xab2; // GUJARATI LETTER LA charset[i++] = 0xab3; // GUJARATI LETTER LLA c = 0xab5; // from GUJARATI LETTER VA while (c < 0xaba) // ..to GUJARATI LETTER HA charset[i++] = c++; c = 0xabc; // from GUJARATI SIGN NUKTA while (c < 0xac6) // ..to GUJARATI VOWEL SIGN CANDRA E charset[i++] = c++; charset[i++] = 0xac7; // GUJARATI VOWEL SIGN E charset[i++] = 0xac9; // GUJARATI VOWEL SIGN CANDRA O charset[i++] = 0xacb; // GUJARATI VOWEL SIGN O charset[i++] = 0xacd; // GUJARATI SIGN VIRAMA c = 0xae0; // from GUJARATI LETTER VOCALIC RR while (c < 0xae4) // ..to GUJARATI VOWEL SIGN VOCALIC LL charset[i++] = c++; c = 0xae6; // from GUJARATI DIGIT ZERO while (c < 0xaf0) // ..to GUJARATI DIGIT NINE charset[i++] = c++; charset[i++] = 0xb01; // ORIYA SIGN CANDRABINDU charset[i++] = 0xb03; // ORIYA SIGN VISARGA c = 0xb05; // from ORIYA LETTER A while (c < 0xb0d) // ..to ORIYA LETTER VOCALIC L charset[i++] = c++; charset[i++] = 0xb0f; // ORIYA LETTER E charset[i++] = 0xb10; // ORIYA LETTER AI c = 0xb13; // from ORIYA LETTER O while (c < 0xb29) // ..to ORIYA LETTER NA charset[i++] = c++; c = 0xb2a; // from ORIYA LETTER PA while (c < 0xb31) // ..to ORIYA LETTER RA charset[i++] = c++; charset[i++] = 0xb32; // ORIYA LETTER LA charset[i++] = 0xb33; // ORIYA LETTER LLA c = 0xb35; // from ORIYA LETTER VA while (c < 0xb3a) // ..to ORIYA LETTER HA charset[i++] = c++; c = 0xb3c; // from ORIYA SIGN NUKTA while (c < 0xb45) // ..to ORIYA VOWEL SIGN VOCALIC RR charset[i++] = c++; charset[i++] = 0xb47; // ORIYA VOWEL SIGN E charset[i++] = 0xb48; // ORIYA VOWEL SIGN AI charset[i++] = 0xb4b; // ORIYA VOWEL SIGN O charset[i++] = 0xb4d; // ORIYA SIGN VIRAMA charset[i++] = 0xb56; // ORIYA AI LENGTH MARK charset[i++] = 0xb57; // ORIYA AU LENGTH MARK charset[i++] = 0xb5c; // ORIYA LETTER RRA charset[i++] = 0xb5d; // ORIYA LETTER RHA c = 0xb5f; // from ORIYA LETTER YYA while (c < 0xb64) // ..to ORIYA VOWEL SIGN VOCALIC LL charset[i++] = c++; c = 0xb66; // from ORIYA DIGIT ZERO while (c < 0xb78) // ..to ORIYA FRACTION THREE SIXTEENTHS charset[i++] = c++; charset[i++] = 0xb82; // TAMIL SIGN ANUSVARA charset[i++] = 0xb83; // TAMIL SIGN VISARGA c = 0xb85; // from TAMIL LETTER A while (c < 0xb8b) // ..to TAMIL LETTER UU charset[i++] = c++; charset[i++] = 0xb8e; // TAMIL LETTER E charset[i++] = 0xb90; // TAMIL LETTER AI c = 0xb92; // from TAMIL LETTER O while (c < 0xb96) // ..to TAMIL LETTER KA charset[i++] = c++; charset[i++] = 0xb99; // TAMIL LETTER NGA charset[i++] = 0xb9a; // TAMIL LETTER CA charset[i++] = 0xb9e; // TAMIL LETTER NYA charset[i++] = 0xb9f; // TAMIL LETTER TTA charset[i++] = 0xba3; // TAMIL LETTER NNA charset[i++] = 0xba4; // TAMIL LETTER TA charset[i++] = 0xba8; // TAMIL LETTER NA charset[i++] = 0xbaa; // TAMIL LETTER PA c = 0xbae; // from TAMIL LETTER MA while (c < 0xbba) // ..to TAMIL LETTER HA charset[i++] = c++; c = 0xbbe; // from TAMIL VOWEL SIGN AA while (c < 0xbc3) // ..to TAMIL VOWEL SIGN UU charset[i++] = c++; charset[i++] = 0xbc6; // TAMIL VOWEL SIGN E charset[i++] = 0xbc8; // TAMIL VOWEL SIGN AI c = 0xbca; // from TAMIL VOWEL SIGN O while (c < 0xbce) // ..to TAMIL SIGN VIRAMA charset[i++] = c++; c = 0xbe6; // from TAMIL DIGIT ZERO while (c < 0xbfb) // ..to TAMIL NUMBER SIGN charset[i++] = c++; charset[i++] = 0xc01; // TELUGU SIGN CANDRABINDU charset[i++] = 0xc03; // TELUGU SIGN VISARGA c = 0xc05; // from TELUGU LETTER A while (c < 0xc0d) // ..to TELUGU LETTER VOCALIC L charset[i++] = c++; charset[i++] = 0xc0e; // TELUGU LETTER E charset[i++] = 0xc10; // TELUGU LETTER AI c = 0xc12; // from TELUGU LETTER O while (c < 0xc29) // ..to TELUGU LETTER NA charset[i++] = c++; c = 0xc2a; // from TELUGU LETTER PA while (c < 0xc34) // ..to TELUGU LETTER LLA charset[i++] = c++; c = 0xc35; // from TELUGU LETTER VA while (c < 0xc3a) // ..to TELUGU LETTER HA charset[i++] = c++; c = 0xc3d; // from TELUGU SIGN AVAGRAHA while (c < 0xc45) // ..to TELUGU VOWEL SIGN VOCALIC RR charset[i++] = c++; charset[i++] = 0xc46; // TELUGU VOWEL SIGN E charset[i++] = 0xc48; // TELUGU VOWEL SIGN AI c = 0xc4a; // from TELUGU VOWEL SIGN O while (c < 0xc4e) // ..to TELUGU SIGN VIRAMA charset[i++] = c++; charset[i++] = 0xc55; // TELUGU LENGTH MARK charset[i++] = 0xc56; // TELUGU AI LENGTH MARK charset[i++] = 0xc58; // TELUGU LETTER TSA charset[i++] = 0xc59; // TELUGU LETTER DZA c = 0xc60; // from TELUGU LETTER VOCALIC RR while (c < 0xc64) // ..to TELUGU VOWEL SIGN VOCALIC LL charset[i++] = c++; c = 0xc66; // from TELUGU DIGIT ZERO while (c < 0xc70) // ..to TELUGU DIGIT NINE charset[i++] = c++; c = 0xc78; // from TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR while (c < 0xc80) // ..to TELUGU SIGN TUUMU charset[i++] = c++; charset[i++] = 0xc82; // KANNADA SIGN ANUSVARA charset[i++] = 0xc83; // KANNADA SIGN VISARGA c = 0xc85; // from KANNADA LETTER A while (c < 0xc8d) // ..to KANNADA LETTER VOCALIC L charset[i++] = c++; charset[i++] = 0xc8e; // KANNADA LETTER E charset[i++] = 0xc90; // KANNADA LETTER AI c = 0xc92; // from KANNADA LETTER O while (c < 0xca9) // ..to KANNADA LETTER NA charset[i++] = c++; c = 0xcaa; // from KANNADA LETTER PA while (c < 0xcb4) // ..to KANNADA LETTER LLA charset[i++] = c++; c = 0xcb5; // from KANNADA LETTER VA while (c < 0xcba) // ..to KANNADA LETTER HA charset[i++] = c++; c = 0xcbc; // from KANNADA SIGN NUKTA while (c < 0xcc5) // ..to KANNADA VOWEL SIGN VOCALIC RR charset[i++] = c++; charset[i++] = 0xcc6; // KANNADA VOWEL SIGN E charset[i++] = 0xcc8; // KANNADA VOWEL SIGN AI c = 0xcca; // from KANNADA VOWEL SIGN O while (c < 0xcce) // ..to KANNADA SIGN VIRAMA charset[i++] = c++; charset[i++] = 0xcd5; // KANNADA LENGTH MARK charset[i++] = 0xcd6; // KANNADA AI LENGTH MARK c = 0xce0; // from KANNADA LETTER VOCALIC RR while (c < 0xce4) // ..to KANNADA VOWEL SIGN VOCALIC LL charset[i++] = c++; c = 0xce6; // from KANNADA DIGIT ZERO while (c < 0xcf0) // ..to KANNADA DIGIT NINE charset[i++] = c++; charset[i++] = 0xcf1; // KANNADA SIGN JIHVAMULIYA charset[i++] = 0xcf2; // KANNADA SIGN UPADHMANIYA charset[i++] = 0xd02; // MALAYALAM SIGN ANUSVARA charset[i++] = 0xd03; // MALAYALAM SIGN VISARGA c = 0xd05; // from MALAYALAM LETTER A while (c < 0xd0d) // ..to MALAYALAM LETTER VOCALIC L charset[i++] = c++; charset[i++] = 0xd0e; // MALAYALAM LETTER E charset[i++] = 0xd10; // MALAYALAM LETTER AI c = 0xd12; // from MALAYALAM LETTER O while (c < 0xd3b) // ..to MALAYALAM LETTER TTTA charset[i++] = c++; c = 0xd3d; // from MALAYALAM SIGN AVAGRAHA while (c < 0xd45) // ..to MALAYALAM VOWEL SIGN VOCALIC RR charset[i++] = c++; charset[i++] = 0xd46; // MALAYALAM VOWEL SIGN E charset[i++] = 0xd48; // MALAYALAM VOWEL SIGN AI c = 0xd4a; // from MALAYALAM VOWEL SIGN O while (c < 0xd4f) // ..to MALAYALAM LETTER DOT REPH charset[i++] = c++; c = 0xd60; // from MALAYALAM LETTER VOCALIC RR while (c < 0xd64) // ..to MALAYALAM VOWEL SIGN VOCALIC LL charset[i++] = c++; c = 0xd66; // from MALAYALAM DIGIT ZERO while (c < 0xd76) // ..to MALAYALAM FRACTION THREE QUARTERS charset[i++] = c++; c = 0xd79; // from MALAYALAM DATE MARK while (c < 0xd80) // ..to MALAYALAM LETTER CHILLU K charset[i++] = c++; charset[i++] = 0xd82; // SINHALA SIGN ANUSVARAYA charset[i++] = 0xd83; // SINHALA SIGN VISARGAYA c = 0xd85; // from SINHALA LETTER AYANNA while (c < 0xd97) // ..to SINHALA LETTER AUYANNA charset[i++] = c++; c = 0xd9a; // from SINHALA LETTER ALPAPRAANA KAYANNA while (c < 0xdb2) // ..to SINHALA LETTER DANTAJA NAYANNA charset[i++] = c++; c = 0xdb3; // from SINHALA LETTER SANYAKA DAYANNA while (c < 0xdbc) // ..to SINHALA LETTER RAYANNA charset[i++] = c++; c = 0xdc0; // from SINHALA LETTER VAYANNA while (c < 0xdc7) // ..to SINHALA LETTER FAYANNA charset[i++] = c++; c = 0xdcf; // from SINHALA VOWEL SIGN AELA-PILLA while (c < 0xdd5) // ..to SINHALA VOWEL SIGN KETTI PAA-PILLA charset[i++] = c++; c = 0xdd8; // from SINHALA VOWEL SIGN GAETTA-PILLA while (c < 0xde0) // ..to SINHALA VOWEL SIGN GAYANUKITTA charset[i++] = c++; charset[i++] = 0xdf2; // SINHALA VOWEL SIGN DIGA GAETTA-PILLA charset[i++] = 0xdf4; // SINHALA PUNCTUATION KUNDDALIYA c = 0xe01; // from THAI CHARACTER KO KAI while (c < 0xe3b) // ..to THAI CHARACTER PHINTHU charset[i++] = c++; c = 0xe3f; // from THAI CURRENCY SYMBOL BAHT while (c < 0xe5c) // ..to THAI CHARACTER KHOMUT charset[i++] = c++; charset[i++] = 0xe81; // LAO LETTER KO charset[i++] = 0xe82; // LAO LETTER KHO SUNG charset[i++] = 0xe87; // LAO LETTER NGO charset[i++] = 0xe88; // LAO LETTER CO c = 0xe94; // from LAO LETTER DO while (c < 0xe98) // ..to LAO LETTER THO TAM charset[i++] = c++; c = 0xe99; // from LAO LETTER NO while (c < 0xea0) // ..to LAO LETTER FO SUNG charset[i++] = c++; charset[i++] = 0xea1; // LAO LETTER MO charset[i++] = 0xea3; // LAO LETTER LO LING charset[i++] = 0xeaa; // LAO LETTER SO SUNG charset[i++] = 0xeab; // LAO LETTER HO SUNG c = 0xead; // from LAO LETTER O while (c < 0xeba) // ..to LAO VOWEL SIGN UU charset[i++] = c++; charset[i++] = 0xebb; // LAO VOWEL SIGN MAI KON charset[i++] = 0xebd; // LAO SEMIVOWEL SIGN NYO c = 0xec0; // from LAO VOWEL SIGN E while (c < 0xec5) // ..to LAO VOWEL SIGN AI charset[i++] = c++; c = 0xec8; // from LAO TONE MAI EK while (c < 0xece) // ..to LAO NIGGAHITA charset[i++] = c++; c = 0xed0; // from LAO DIGIT ZERO while (c < 0xeda) // ..to LAO DIGIT NINE charset[i++] = c++; charset[i++] = 0xedc; // LAO HO NO charset[i++] = 0xedd; // LAO HO MO c = 0xf00; // from TIBETAN SYLLABLE OM while (c < 0xf48) // ..to TIBETAN LETTER JA charset[i++] = c++; c = 0xf49; // from TIBETAN LETTER NYA while (c < 0xf6d) // ..to TIBETAN LETTER RRA charset[i++] = c++; c = 0xf71; // from TIBETAN VOWEL SIGN AA while (c < 0xf98) // ..to TIBETAN SUBJOINED LETTER JA charset[i++] = c++; c = 0xf99; // from TIBETAN SUBJOINED LETTER NYA while (c < 0xfbd) // ..to TIBETAN SUBJOINED LETTER FIXED-FORM RA charset[i++] = c++; c = 0xfbe; // from TIBETAN KU RU KHA while (c < 0xfcd) // ..to TIBETAN SYMBOL NOR BU BZHI -KHYIL charset[i++] = c++; c = 0xfce; // from TIBETAN SIGN RDEL NAG RDEL DKAR while (c < 0xfdb) // ..to TIBETAN MARK TRAILING MCHAN RTAGS charset[i++] = c++; c = 0x1000; // from MYANMAR LETTER KA while (c < 0x10c6) // ..to GEORGIAN CAPITAL LETTER HOE charset[i++] = c++; c = 0x10d0; // from GEORGIAN LETTER AN while (c < 0x10fd) // ..to MODIFIER LETTER GEORGIAN NAR charset[i++] = c++; c = 0x1100; // from HANGUL CHOSEONG KIYEOK while (c < 0x1249) // ..to ETHIOPIC SYLLABLE QWA charset[i++] = c++; c = 0x124a; // from ETHIOPIC SYLLABLE QWI while (c < 0x124e) // ..to ETHIOPIC SYLLABLE QWE charset[i++] = c++; c = 0x1250; // from ETHIOPIC SYLLABLE QHA while (c < 0x1257) // ..to ETHIOPIC SYLLABLE QHO charset[i++] = c++; c = 0x125a; // from ETHIOPIC SYLLABLE QHWI while (c < 0x125e) // ..to ETHIOPIC SYLLABLE QHWE charset[i++] = c++; c = 0x1260; // from ETHIOPIC SYLLABLE BA while (c < 0x1289) // ..to ETHIOPIC SYLLABLE XWA charset[i++] = c++; c = 0x128a; // from ETHIOPIC SYLLABLE XWI while (c < 0x128e) // ..to ETHIOPIC SYLLABLE XWE charset[i++] = c++; c = 0x1290; // from ETHIOPIC SYLLABLE NA while (c < 0x12b1) // ..to ETHIOPIC SYLLABLE KWA charset[i++] = c++; c = 0x12b2; // from ETHIOPIC SYLLABLE KWI while (c < 0x12b6) // ..to ETHIOPIC SYLLABLE KWE charset[i++] = c++; c = 0x12b8; // from ETHIOPIC SYLLABLE KXA while (c < 0x12bf) // ..to ETHIOPIC SYLLABLE KXO charset[i++] = c++; c = 0x12c2; // from ETHIOPIC SYLLABLE KXWI while (c < 0x12c6) // ..to ETHIOPIC SYLLABLE KXWE charset[i++] = c++; c = 0x12c8; // from ETHIOPIC SYLLABLE WA while (c < 0x12d7) // ..to ETHIOPIC SYLLABLE PHARYNGEAL O charset[i++] = c++; c = 0x12d8; // from ETHIOPIC SYLLABLE ZA while (c < 0x1311) // ..to ETHIOPIC SYLLABLE GWA charset[i++] = c++; c = 0x1312; // from ETHIOPIC SYLLABLE GWI while (c < 0x1316) // ..to ETHIOPIC SYLLABLE GWE charset[i++] = c++; c = 0x1318; // from ETHIOPIC SYLLABLE GGA while (c < 0x135b) // ..to ETHIOPIC SYLLABLE FYA charset[i++] = c++; c = 0x135d; // from ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK while (c < 0x137d) // ..to ETHIOPIC NUMBER TEN THOUSAND charset[i++] = c++; c = 0x1380; // from ETHIOPIC SYLLABLE SEBATBEIT MWA while (c < 0x139a) // ..to ETHIOPIC TONAL MARK KURT charset[i++] = c++; c = 0x13a0; // from CHEROKEE LETTER A while (c < 0x13f5) // ..to CHEROKEE LETTER YV charset[i++] = c++; c = 0x1400; // from CANADIAN SYLLABICS HYPHEN while (c < 0x169d) // ..to OGHAM REVERSED FEATHER MARK charset[i++] = c++; c = 0x16a0; // from RUNIC LETTER FEHU FEOH FE F while (c < 0x16f1) // ..to RUNIC BELGTHOR SYMBOL charset[i++] = c++; c = 0x1700; // from TAGALOG LETTER A while (c < 0x170d) // ..to TAGALOG LETTER YA charset[i++] = c++; c = 0x170e; // from TAGALOG LETTER LA while (c < 0x1715) // ..to TAGALOG SIGN VIRAMA charset[i++] = c++; c = 0x1720; // from HANUNOO LETTER A while (c < 0x1737) // ..to PHILIPPINE DOUBLE PUNCTUATION charset[i++] = c++; c = 0x1740; // from BUHID LETTER A while (c < 0x1754) // ..to BUHID VOWEL SIGN U charset[i++] = c++; c = 0x1760; // from TAGBANWA LETTER A while (c < 0x176d) // ..to TAGBANWA LETTER YA charset[i++] = c++; charset[i++] = 0x176e; // TAGBANWA LETTER LA charset[i++] = 0x1770; // TAGBANWA LETTER SA charset[i++] = 0x1772; // TAGBANWA VOWEL SIGN I charset[i++] = 0x1773; // TAGBANWA VOWEL SIGN U c = 0x1780; // from KHMER LETTER KA while (c < 0x17de) // ..to KHMER SIGN ATTHACAN charset[i++] = c++; c = 0x17e0; // from KHMER DIGIT ZERO while (c < 0x17ea) // ..to KHMER DIGIT NINE charset[i++] = c++; c = 0x17f0; // from KHMER SYMBOL LEK ATTAK SON while (c < 0x17fa) // ..to KHMER SYMBOL LEK ATTAK PRAM-BUON charset[i++] = c++; c = 0x1800; // from MONGOLIAN BIRGA while (c < 0x180f) // ..to MONGOLIAN VOWEL SEPARATOR charset[i++] = c++; c = 0x1810; // from MONGOLIAN DIGIT ZERO while (c < 0x181a) // ..to MONGOLIAN DIGIT NINE charset[i++] = c++; c = 0x1820; // from MONGOLIAN LETTER A while (c < 0x1878) // ..to MONGOLIAN LETTER MANCHU ZHA charset[i++] = c++; c = 0x1880; // from MONGOLIAN LETTER ALI GALI ANUSVARA ONE while (c < 0x18ab) // ..to MONGOLIAN LETTER MANCHU ALI GALI LHA charset[i++] = c++; c = 0x18b0; // from CANADIAN SYLLABICS OY while (c < 0x18f6) // ..to CANADIAN SYLLABICS CARRIER DENTAL S charset[i++] = c++; c = 0x1900; // from LIMBU VOWEL-CARRIER LETTER while (c < 0x191d) // ..to LIMBU LETTER HA charset[i++] = c++; c = 0x1920; // from LIMBU VOWEL SIGN A while (c < 0x192c) // ..to LIMBU SUBJOINED LETTER WA charset[i++] = c++; c = 0x1930; // from LIMBU SMALL LETTER KA while (c < 0x193c) // ..to LIMBU SIGN SA-I charset[i++] = c++; c = 0x1944; // from LIMBU EXCLAMATION MARK while (c < 0x196e) // ..to TAI LE LETTER AI charset[i++] = c++; c = 0x1970; // from TAI LE LETTER TONE-2 while (c < 0x1975) // ..to TAI LE LETTER TONE-6 charset[i++] = c++; c = 0x1980; // from NEW TAI LUE LETTER HIGH QA while (c < 0x19ac) // ..to NEW TAI LUE LETTER LOW SUA charset[i++] = c++; c = 0x19b0; // from NEW TAI LUE VOWEL SIGN VOWEL SHORTENER while (c < 0x19ca) // ..to NEW TAI LUE TONE MARK-2 charset[i++] = c++; c = 0x19d0; // from NEW TAI LUE DIGIT ZERO while (c < 0x19db) // ..to NEW TAI LUE THAM DIGIT ONE charset[i++] = c++; c = 0x19de; // from NEW TAI LUE SIGN LAE while (c < 0x1a1c) // ..to BUGINESE VOWEL SIGN AE charset[i++] = c++; c = 0x1a1e; // from BUGINESE PALLAWA while (c < 0x1a5f) // ..to TAI THAM CONSONANT SIGN SA charset[i++] = c++; c = 0x1a60; // from TAI THAM SIGN SAKOT while (c < 0x1a7d) // ..to TAI THAM SIGN KHUEN-LUE KARAN charset[i++] = c++; c = 0x1a7f; // from TAI THAM COMBINING CRYPTOGRAMMIC DOT while (c < 0x1a8a) // ..to TAI THAM HORA DIGIT NINE charset[i++] = c++; c = 0x1a90; // from TAI THAM THAM DIGIT ZERO while (c < 0x1a9a) // ..to TAI THAM THAM DIGIT NINE charset[i++] = c++; c = 0x1aa0; // from TAI THAM SIGN WIANG while (c < 0x1aae) // ..to TAI THAM SIGN CAANG charset[i++] = c++; c = 0x1b00; // from BALINESE SIGN ULU RICEM while (c < 0x1b4c) // ..to BALINESE LETTER ASYURA SASAK charset[i++] = c++; c = 0x1b50; // from BALINESE DIGIT ZERO while (c < 0x1b7d) // ..to BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING charset[i++] = c++; c = 0x1b80; // from SUNDANESE SIGN PANYECEK while (c < 0x1bab) // ..to SUNDANESE SIGN PAMAAEH charset[i++] = c++; c = 0x1bae; // from SUNDANESE LETTER KHA while (c < 0x1bba) // ..to SUNDANESE DIGIT NINE charset[i++] = c++; c = 0x1bc0; // from BATAK LETTER A while (c < 0x1bf4) // ..to BATAK PANONGONAN charset[i++] = c++; c = 0x1bfc; // from BATAK SYMBOL BINDU NA METEK while (c < 0x1c38) // ..to LEPCHA SIGN NUKTA charset[i++] = c++; c = 0x1c3b; // from LEPCHA PUNCTUATION TA-ROL while (c < 0x1c4a) // ..to LEPCHA DIGIT NINE charset[i++] = c++; c = 0x1c4d; // from LEPCHA LETTER TTA while (c < 0x1c80) // ..to OL CHIKI PUNCTUATION DOUBLE MUCAAD charset[i++] = c++; c = 0x1cd0; // from VEDIC TONE KARSHANA while (c < 0x1cf3) // ..to VEDIC SIGN ARDHAVISARGA charset[i++] = c++; c = 0x1d00; // from LATIN LETTER SMALL CAPITAL A while (c < 0x1de7) // ..to COMBINING LATIN SMALL LETTER Z charset[i++] = c++; c = 0x1dfc; // from COMBINING DOUBLE INVERTED BREVE BELOW while (c < 0x1f16) // ..to GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA charset[i++] = c++; c = 0x1f18; // from GREEK CAPITAL LETTER EPSILON WITH PSILI while (c < 0x1f1e) // ..to GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA charset[i++] = c++; c = 0x1f20; // from GREEK SMALL LETTER ETA WITH PSILI while (c < 0x1f46) // ..to GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA charset[i++] = c++; c = 0x1f48; // from GREEK CAPITAL LETTER OMICRON WITH PSILI while (c < 0x1f4e) // ..to GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA charset[i++] = c++; c = 0x1f50; // from GREEK SMALL LETTER UPSILON WITH PSILI while (c < 0x1f58) // ..to GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI charset[i++] = c++; c = 0x1f5f; // from GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI while (c < 0x1f7e) // ..to GREEK SMALL LETTER OMEGA WITH OXIA charset[i++] = c++; c = 0x1f80; // from GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI while (c < 0x1fb5) // ..to GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI charset[i++] = c++; c = 0x1fb6; // from GREEK SMALL LETTER ALPHA WITH PERISPOMENI while (c < 0x1fc5) // ..to GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI charset[i++] = c++; c = 0x1fc6; // from GREEK SMALL LETTER ETA WITH PERISPOMENI while (c < 0x1fd4) // ..to GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA charset[i++] = c++; c = 0x1fd6; // from GREEK SMALL LETTER IOTA WITH PERISPOMENI while (c < 0x1fdc) // ..to GREEK CAPITAL LETTER IOTA WITH OXIA charset[i++] = c++; c = 0x1fdd; // from GREEK DASIA AND VARIA while (c < 0x1ff0) // ..to GREEK VARIA charset[i++] = c++; charset[i++] = 0x1ff2; // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI charset[i++] = 0x1ff4; // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI c = 0x1ff6; // from GREEK SMALL LETTER OMEGA WITH PERISPOMENI while (c < 0x1fff) // ..to GREEK DASIA charset[i++] = c++; c = 0x2000; // from EN QUAD while (c < 0x2065) // ..to INVISIBLE PLUS charset[i++] = c++; c = 0x206a; // from INHIBIT SYMMETRIC SWAPPING while (c < 0x2072) // ..to SUPERSCRIPT LATIN SMALL LETTER I charset[i++] = c++; c = 0x2074; // from SUPERSCRIPT FOUR while (c < 0x208f) // ..to SUBSCRIPT RIGHT PARENTHESIS charset[i++] = c++; c = 0x2090; // from LATIN SUBSCRIPT SMALL LETTER A while (c < 0x209d) // ..to LATIN SUBSCRIPT SMALL LETTER T charset[i++] = c++; c = 0x20a0; // from EURO-CURRENCY SIGN while (c < 0x20ba) // ..to INDIAN RUPEE SIGN charset[i++] = c++; c = 0x20d0; // from COMBINING LEFT HARPOON ABOVE while (c < 0x20f1) // ..to COMBINING ASTERISK ABOVE charset[i++] = c++; c = 0x2100; // from ACCOUNT OF while (c < 0x218a) // ..to VULGAR FRACTION ZERO THIRDS charset[i++] = c++; c = 0x2190; // from LEFTWARDS ARROW while (c < 0x23f4) // ..to HOURGLASS WITH FLOWING SAND charset[i++] = c++; c = 0x2400; // from SYMBOL FOR NULL while (c < 0x2427) // ..to SYMBOL FOR SUBSTITUTE FORM TWO charset[i++] = c++; c = 0x2440; // from OCR HOOK while (c < 0x244b) // ..to OCR DOUBLE BACKSLASH charset[i++] = c++; c = 0x2460; // from CIRCLED DIGIT ONE while (c < 0x2700) // ..to WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE charset[i++] = c++; c = 0x2701; // from UPPER BLADE SCISSORS while (c < 0x27cb) // ..to VERTICAL BAR WITH HORIZONTAL STROKE charset[i++] = c++; c = 0x27ce; // from SQUARED LOGICAL AND while (c < 0x2b4d) // ..to RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR charset[i++] = c++; c = 0x2b50; // from WHITE MEDIUM STAR while (c < 0x2b5a) // ..to HEAVY CIRCLED SALTIRE charset[i++] = c++; c = 0x2c00; // from GLAGOLITIC CAPITAL LETTER AZU while (c < 0x2c2f) // ..to GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE charset[i++] = c++; c = 0x2c30; // from GLAGOLITIC SMALL LETTER AZU while (c < 0x2c5f) // ..to GLAGOLITIC SMALL LETTER LATINATE MYSLITE charset[i++] = c++; c = 0x2c60; // from LATIN CAPITAL LETTER L WITH DOUBLE BAR while (c < 0x2cf2) // ..to COPTIC COMBINING SPIRITUS LENIS charset[i++] = c++; c = 0x2cf9; // from COPTIC OLD NUBIAN FULL STOP while (c < 0x2d26) // ..to GEORGIAN SMALL LETTER HOE charset[i++] = c++; c = 0x2d30; // from TIFINAGH LETTER YA while (c < 0x2d66) // ..to TIFINAGH LETTER YAZZ charset[i++] = c++; charset[i++] = 0x2d6f; // TIFINAGH MODIFIER LETTER LABIALIZATION MARK charset[i++] = 0x2d70; // TIFINAGH SEPARATOR MARK c = 0x2d7f; // from TIFINAGH CONSONANT JOINER while (c < 0x2d97) // ..to ETHIOPIC SYLLABLE GGWE charset[i++] = c++; c = 0x2da0; // from ETHIOPIC SYLLABLE SSA while (c < 0x2da7) // ..to ETHIOPIC SYLLABLE SSO charset[i++] = c++; c = 0x2da8; // from ETHIOPIC SYLLABLE CCA while (c < 0x2daf) // ..to ETHIOPIC SYLLABLE CCO charset[i++] = c++; c = 0x2db0; // from ETHIOPIC SYLLABLE ZZA while (c < 0x2db7) // ..to ETHIOPIC SYLLABLE ZZO charset[i++] = c++; c = 0x2db8; // from ETHIOPIC SYLLABLE CCHA while (c < 0x2dbf) // ..to ETHIOPIC SYLLABLE CCHO charset[i++] = c++; c = 0x2dc0; // from ETHIOPIC SYLLABLE QYA while (c < 0x2dc7) // ..to ETHIOPIC SYLLABLE QYO charset[i++] = c++; c = 0x2dc8; // from ETHIOPIC SYLLABLE KYA while (c < 0x2dcf) // ..to ETHIOPIC SYLLABLE KYO charset[i++] = c++; c = 0x2dd0; // from ETHIOPIC SYLLABLE XYA while (c < 0x2dd7) // ..to ETHIOPIC SYLLABLE XYO charset[i++] = c++; c = 0x2dd8; // from ETHIOPIC SYLLABLE GYA while (c < 0x2ddf) // ..to ETHIOPIC SYLLABLE GYO charset[i++] = c++; c = 0x2de0; // from COMBINING CYRILLIC LETTER BE while (c < 0x2e32) // ..to WORD SEPARATOR MIDDLE DOT charset[i++] = c++; c = 0x2e80; // from CJK RADICAL REPEAT while (c < 0x2e9a) // ..to CJK RADICAL RAP charset[i++] = c++; c = 0x2e9b; // from CJK RADICAL CHOKE while (c < 0x2ef4) // ..to CJK RADICAL C-SIMPLIFIED TURTLE charset[i++] = c++; c = 0x2f00; // from KANGXI RADICAL ONE while (c < 0x2fd6) // ..to KANGXI RADICAL FLUTE charset[i++] = c++; c = 0x2ff0; // from IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT while (c < 0x2ffc) // ..to IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID charset[i++] = c++; c = 0x3000; // from IDEOGRAPHIC SPACE while (c < 0x3040) // ..to IDEOGRAPHIC HALF FILL SPACE charset[i++] = c++; c = 0x3041; // from HIRAGANA LETTER SMALL A while (c < 0x3097) // ..to HIRAGANA LETTER SMALL KE charset[i++] = c++; c = 0x3099; // from COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK while (c < 0x3100) // ..to KATAKANA DIGRAPH KOTO charset[i++] = c++; c = 0x3105; // from BOPOMOFO LETTER B while (c < 0x312e) // ..to BOPOMOFO LETTER IH charset[i++] = c++; c = 0x3131; // from HANGUL LETTER KIYEOK while (c < 0x318f) // ..to HANGUL LETTER ARAEAE charset[i++] = c++; c = 0x3190; // from IDEOGRAPHIC ANNOTATION LINKING MARK while (c < 0x31bb) // ..to BOPOMOFO LETTER ZY charset[i++] = c++; c = 0x31c0; // from CJK STROKE T while (c < 0x31e4) // ..to CJK STROKE Q charset[i++] = c++; c = 0x31f0; // from KATAKANA LETTER SMALL KU while (c < 0x321f) // ..to PARENTHESIZED KOREAN CHARACTER O HU charset[i++] = c++; c = 0x3220; // from PARENTHESIZED IDEOGRAPH ONE while (c < 0x32ff) // ..to CIRCLED KATAKANA WO charset[i++] = c++; c = 0x3300; // from SQUARE APAATO while (c < 0x3400) // ..to SQUARE GAL charset[i++] = c++; c = 0x3400; // from while (c < 0x4db6) // ..to charset[i++] = c++; c = 0x4dc0; // from HEXAGRAM FOR THE CREATIVE HEAVEN while (c < 0x4e00) // ..to HEXAGRAM FOR BEFORE COMPLETION charset[i++] = c++; c = 0x4e00; // from while (c < 0x9fcc) // ..to charset[i++] = c++; c = 0xa000; // from YI SYLLABLE IT while (c < 0xa48d) // ..to YI SYLLABLE YYR charset[i++] = c++; c = 0xa490; // from YI RADICAL QOT while (c < 0xa4c7) // ..to YI RADICAL KE charset[i++] = c++; c = 0xa4d0; // from LISU LETTER BA while (c < 0xa62c) // ..to VAI SYLLABLE NDOLE DO charset[i++] = c++; c = 0xa640; // from CYRILLIC CAPITAL LETTER ZEMLYA while (c < 0xa674) // ..to SLAVONIC ASTERISK charset[i++] = c++; c = 0xa67c; // from COMBINING CYRILLIC KAVYKA while (c < 0xa698) // ..to CYRILLIC SMALL LETTER SHWE charset[i++] = c++; c = 0xa6a0; // from BAMUM LETTER A while (c < 0xa6f8) // ..to BAMUM QUESTION MARK charset[i++] = c++; c = 0xa700; // from MODIFIER LETTER CHINESE TONE YIN PING while (c < 0xa78f) // ..to LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT charset[i++] = c++; charset[i++] = 0xa790; // LATIN CAPITAL LETTER N WITH DESCENDER charset[i++] = 0xa791; // LATIN SMALL LETTER N WITH DESCENDER c = 0xa7a0; // from LATIN CAPITAL LETTER G WITH OBLIQUE STROKE while (c < 0xa7aa) // ..to LATIN SMALL LETTER S WITH OBLIQUE STROKE charset[i++] = c++; c = 0xa7fa; // from LATIN LETTER SMALL CAPITAL TURNED M while (c < 0xa82c) // ..to SYLOTI NAGRI POETRY MARK-4 charset[i++] = c++; c = 0xa830; // from NORTH INDIC FRACTION ONE QUARTER while (c < 0xa83a) // ..to NORTH INDIC QUANTITY MARK charset[i++] = c++; c = 0xa840; // from PHAGS-PA LETTER KA while (c < 0xa878) // ..to PHAGS-PA MARK DOUBLE SHAD charset[i++] = c++; c = 0xa880; // from SAURASHTRA SIGN ANUSVARA while (c < 0xa8c5) // ..to SAURASHTRA SIGN VIRAMA charset[i++] = c++; c = 0xa8ce; // from SAURASHTRA DANDA while (c < 0xa8da) // ..to SAURASHTRA DIGIT NINE charset[i++] = c++; c = 0xa8e0; // from COMBINING DEVANAGARI DIGIT ZERO while (c < 0xa8fc) // ..to DEVANAGARI HEADSTROKE charset[i++] = c++; c = 0xa900; // from KAYAH LI DIGIT ZERO while (c < 0xa954) // ..to REJANG VIRAMA charset[i++] = c++; c = 0xa95f; // from REJANG SECTION MARK while (c < 0xa97d) // ..to HANGUL CHOSEONG SSANGYEORINHIEUH charset[i++] = c++; c = 0xa980; // from JAVANESE SIGN PANYANGGA while (c < 0xa9ce) // ..to JAVANESE TURNED PADA PISELEH charset[i++] = c++; c = 0xa9cf; // from JAVANESE PANGRANGKEP while (c < 0xa9da) // ..to JAVANESE DIGIT NINE charset[i++] = c++; charset[i++] = 0xa9de; // JAVANESE PADA TIRTA TUMETES charset[i++] = 0xa9df; // JAVANESE PADA ISEN-ISEN c = 0xaa00; // from CHAM LETTER A while (c < 0xaa37) // ..to CHAM CONSONANT SIGN WA charset[i++] = c++; c = 0xaa40; // from CHAM LETTER FINAL K while (c < 0xaa4e) // ..to CHAM CONSONANT SIGN FINAL H charset[i++] = c++; c = 0xaa50; // from CHAM DIGIT ZERO while (c < 0xaa5a) // ..to CHAM DIGIT NINE charset[i++] = c++; c = 0xaa5c; // from CHAM PUNCTUATION SPIRAL while (c < 0xaa7c) // ..to MYANMAR SIGN PAO KAREN TONE charset[i++] = c++; c = 0xaa80; // from TAI VIET LETTER LOW KO while (c < 0xaac3) // ..to TAI VIET TONE MAI SONG charset[i++] = c++; c = 0xaadb; // from TAI VIET SYMBOL KON while (c < 0xaae0) // ..to TAI VIET SYMBOL KOI KOI charset[i++] = c++; c = 0xab01; // from ETHIOPIC SYLLABLE TTHU while (c < 0xab07) // ..to ETHIOPIC SYLLABLE TTHO charset[i++] = c++; c = 0xab09; // from ETHIOPIC SYLLABLE DDHU while (c < 0xab0f) // ..to ETHIOPIC SYLLABLE DDHO charset[i++] = c++; c = 0xab11; // from ETHIOPIC SYLLABLE DZU while (c < 0xab17) // ..to ETHIOPIC SYLLABLE DZO charset[i++] = c++; c = 0xab20; // from ETHIOPIC SYLLABLE CCHHA while (c < 0xab27) // ..to ETHIOPIC SYLLABLE CCHHO charset[i++] = c++; c = 0xab28; // from ETHIOPIC SYLLABLE BBA while (c < 0xab2f) // ..to ETHIOPIC SYLLABLE BBO charset[i++] = c++; c = 0xabc0; // from MEETEI MAYEK LETTER KOK while (c < 0xabee) // ..to MEETEI MAYEK APUN IYEK charset[i++] = c++; c = 0xabf0; // from MEETEI MAYEK DIGIT ZERO while (c < 0xabfa) // ..to MEETEI MAYEK DIGIT NINE charset[i++] = c++; c = 0xac00; // from while (c < 0xd7a4) // ..to charset[i++] = c++; c = 0xd7b0; // from HANGUL JUNGSEONG O-YEO while (c < 0xd7c7) // ..to HANGUL JUNGSEONG ARAEA-E charset[i++] = c++; c = 0xd7cb; // from HANGUL JONGSEONG NIEUN-RIEUL while (c < 0xd7fc) // ..to HANGUL JONGSEONG PHIEUPH-THIEUTH charset[i++] = c++; c = 0xf900; // from CJK COMPATIBILITY IDEOGRAPH-F900 while (c < 0xfa2e) // ..to CJK COMPATIBILITY IDEOGRAPH-FA2D charset[i++] = c++; c = 0xfa30; // from CJK COMPATIBILITY IDEOGRAPH-FA30 while (c < 0xfa6e) // ..to CJK COMPATIBILITY IDEOGRAPH-FA6D charset[i++] = c++; c = 0xfa70; // from CJK COMPATIBILITY IDEOGRAPH-FA70 while (c < 0xfada) // ..to CJK COMPATIBILITY IDEOGRAPH-FAD9 charset[i++] = c++; c = 0xfb00; // from LATIN SMALL LIGATURE FF while (c < 0xfb07) // ..to LATIN SMALL LIGATURE ST charset[i++] = c++; c = 0xfb13; // from ARMENIAN SMALL LIGATURE MEN NOW while (c < 0xfb18) // ..to ARMENIAN SMALL LIGATURE MEN XEH charset[i++] = c++; c = 0xfb1d; // from HEBREW LETTER YOD WITH HIRIQ while (c < 0xfb37) // ..to HEBREW LETTER ZAYIN WITH DAGESH charset[i++] = c++; c = 0xfb38; // from HEBREW LETTER TET WITH DAGESH while (c < 0xfb3d) // ..to HEBREW LETTER LAMED WITH DAGESH charset[i++] = c++; charset[i++] = 0xfb40; // HEBREW LETTER NUN WITH DAGESH charset[i++] = 0xfb41; // HEBREW LETTER SAMEKH WITH DAGESH charset[i++] = 0xfb43; // HEBREW LETTER FINAL PE WITH DAGESH charset[i++] = 0xfb44; // HEBREW LETTER PE WITH DAGESH c = 0xfb46; // from HEBREW LETTER TSADI WITH DAGESH while (c < 0xfbc2) // ..to ARABIC SYMBOL SMALL TAH BELOW charset[i++] = c++; c = 0xfbd3; // from ARABIC LETTER NG ISOLATED FORM while (c < 0xfd40) // ..to ORNATE RIGHT PARENTHESIS charset[i++] = c++; c = 0xfd50; // from ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM while (c < 0xfd90) // ..to ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM charset[i++] = c++; c = 0xfd92; // from ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM while (c < 0xfdc8) // ..to ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM charset[i++] = c++; c = 0xfdf0; // from ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM while (c < 0xfdfe) // ..to ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM charset[i++] = c++; c = 0xfe00; // from VARIATION SELECTOR-1 while (c < 0xfe1a) // ..to PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS charset[i++] = c++; c = 0xfe20; // from COMBINING LIGATURE LEFT HALF while (c < 0xfe27) // ..to COMBINING CONJOINING MACRON charset[i++] = c++; c = 0xfe30; // from PRESENTATION FORM FOR VERTICAL TWO DOT LEADER while (c < 0xfe53) // ..to SMALL FULL STOP charset[i++] = c++; c = 0xfe54; // from SMALL SEMICOLON while (c < 0xfe67) // ..to SMALL EQUALS SIGN charset[i++] = c++; c = 0xfe68; // from SMALL REVERSE SOLIDUS while (c < 0xfe6c) // ..to SMALL COMMERCIAL AT charset[i++] = c++; c = 0xfe70; // from ARABIC FATHATAN ISOLATED FORM while (c < 0xfe75) // ..to ARABIC KASRATAN ISOLATED FORM charset[i++] = c++; c = 0xfe76; // from ARABIC FATHA ISOLATED FORM while (c < 0xfefd) // ..to ARABIC LIGATURE LAM WITH ALEF FINAL FORM charset[i++] = c++; c = 0xff01; // from FULLWIDTH EXCLAMATION MARK while (c < 0xffbf) // ..to HALFWIDTH HANGUL LETTER HIEUH charset[i++] = c++; c = 0xffc2; // from HALFWIDTH HANGUL LETTER A while (c < 0xffc8) // ..to HALFWIDTH HANGUL LETTER E charset[i++] = c++; c = 0xffca; // from HALFWIDTH HANGUL LETTER YEO while (c < 0xffd0) // ..to HALFWIDTH HANGUL LETTER OE charset[i++] = c++; c = 0xffd2; // from HALFWIDTH HANGUL LETTER YO while (c < 0xffd8) // ..to HALFWIDTH HANGUL LETTER YU charset[i++] = c++; charset[i++] = 0xffda; // HALFWIDTH HANGUL LETTER EU charset[i++] = 0xffdc; // HALFWIDTH HANGUL LETTER I c = 0xffe0; // from FULLWIDTH CENT SIGN while (c < 0xffe7) // ..to FULLWIDTH WON SIGN charset[i++] = c++; c = 0xffe8; // from HALFWIDTH FORMS LIGHT VERTICAL while (c < 0xffef) // ..to HALFWIDTH WHITE CIRCLE charset[i++] = c++; c = 0xfff9; // from INTERLINEAR ANNOTATION ANCHOR while (c < 0xfffe) // ..to REPLACEMENT CHARACTER charset[i++] = c++; c = 0x10000; // from LINEAR B SYLLABLE B008 A while (c < 0x1000c) // ..to LINEAR B SYLLABLE B046 JE charset[i++] = c++; c = 0x1000d; // from LINEAR B SYLLABLE B036 JO while (c < 0x10027) // ..to LINEAR B SYLLABLE B032 QO charset[i++] = c++; c = 0x10028; // from LINEAR B SYLLABLE B060 RA while (c < 0x1003b) // ..to LINEAR B SYLLABLE B042 WO charset[i++] = c++; charset[i++] = 0x1003c; // LINEAR B SYLLABLE B017 ZA charset[i++] = 0x1003d; // LINEAR B SYLLABLE B074 ZE c = 0x1003f; // from LINEAR B SYLLABLE B020 ZO while (c < 0x1004e) // ..to LINEAR B SYLLABLE B091 TWO charset[i++] = c++; c = 0x10050; // from LINEAR B SYMBOL B018 while (c < 0x1005e) // ..to LINEAR B SYMBOL B089 charset[i++] = c++; c = 0x10080; // from LINEAR B IDEOGRAM B100 MAN while (c < 0x100fb) // ..to LINEAR B IDEOGRAM VESSEL B305 charset[i++] = c++; charset[i++] = 0x10100; // AEGEAN WORD SEPARATOR LINE charset[i++] = 0x10102; // AEGEAN CHECK MARK c = 0x10107; // from AEGEAN NUMBER ONE while (c < 0x10134) // ..to AEGEAN NUMBER NINETY THOUSAND charset[i++] = c++; c = 0x10137; // from AEGEAN WEIGHT BASE UNIT while (c < 0x1018b) // ..to GREEK ZERO SIGN charset[i++] = c++; c = 0x10190; // from ROMAN SEXTANS SIGN while (c < 0x1019c) // ..to ROMAN CENTURIAL SIGN charset[i++] = c++; c = 0x101d0; // from PHAISTOS DISC SIGN PEDESTRIAN while (c < 0x101fe) // ..to PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE charset[i++] = c++; c = 0x10280; // from LYCIAN LETTER A while (c < 0x1029d) // ..to LYCIAN LETTER X charset[i++] = c++; c = 0x102a0; // from CARIAN LETTER A while (c < 0x102d1) // ..to CARIAN LETTER UUU3 charset[i++] = c++; c = 0x10300; // from OLD ITALIC LETTER A while (c < 0x1031f) // ..to OLD ITALIC LETTER UU charset[i++] = c++; c = 0x10320; // from OLD ITALIC NUMERAL ONE while (c < 0x10324) // ..to OLD ITALIC NUMERAL FIFTY charset[i++] = c++; c = 0x10330; // from GOTHIC LETTER AHSA while (c < 0x1034b) // ..to GOTHIC LETTER NINE HUNDRED charset[i++] = c++; c = 0x10380; // from UGARITIC LETTER ALPA while (c < 0x1039e) // ..to UGARITIC LETTER SSU charset[i++] = c++; c = 0x1039f; // from UGARITIC WORD DIVIDER while (c < 0x103c4) // ..to OLD PERSIAN SIGN HA charset[i++] = c++; c = 0x103c8; // from OLD PERSIAN SIGN AURAMAZDAA while (c < 0x103d6) // ..to OLD PERSIAN NUMBER HUNDRED charset[i++] = c++; c = 0x10400; // from DESERET CAPITAL LETTER LONG I while (c < 0x1049e) // ..to OSMANYA LETTER OO charset[i++] = c++; c = 0x104a0; // from OSMANYA DIGIT ZERO while (c < 0x104aa) // ..to OSMANYA DIGIT NINE charset[i++] = c++; c = 0x10800; // from CYPRIOT SYLLABLE A while (c < 0x10806) // ..to CYPRIOT SYLLABLE JA charset[i++] = c++; c = 0x1080a; // from CYPRIOT SYLLABLE KA while (c < 0x10836) // ..to CYPRIOT SYLLABLE WO charset[i++] = c++; charset[i++] = 0x10837; // CYPRIOT SYLLABLE XA charset[i++] = 0x10838; // CYPRIOT SYLLABLE XE c = 0x1083f; // from CYPRIOT SYLLABLE ZO while (c < 0x10856) // ..to IMPERIAL ARAMAIC LETTER TAW charset[i++] = c++; c = 0x10857; // from IMPERIAL ARAMAIC SECTION SIGN while (c < 0x10860) // ..to IMPERIAL ARAMAIC NUMBER TEN THOUSAND charset[i++] = c++; c = 0x10900; // from PHOENICIAN LETTER ALF while (c < 0x1091c) // ..to PHOENICIAN NUMBER THREE charset[i++] = c++; c = 0x1091f; // from PHOENICIAN WORD SEPARATOR while (c < 0x1093a) // ..to LYDIAN LETTER C charset[i++] = c++; c = 0x10a00; // from KHAROSHTHI LETTER A while (c < 0x10a04) // ..to KHAROSHTHI VOWEL SIGN VOCALIC R charset[i++] = c++; charset[i++] = 0x10a05; // KHAROSHTHI VOWEL SIGN E charset[i++] = 0x10a06; // KHAROSHTHI VOWEL SIGN O c = 0x10a0c; // from KHAROSHTHI VOWEL LENGTH MARK while (c < 0x10a14) // ..to KHAROSHTHI LETTER GHA charset[i++] = c++; charset[i++] = 0x10a15; // KHAROSHTHI LETTER CA charset[i++] = 0x10a17; // KHAROSHTHI LETTER JA c = 0x10a19; // from KHAROSHTHI LETTER NYA while (c < 0x10a34) // ..to KHAROSHTHI LETTER TTTHA charset[i++] = c++; charset[i++] = 0x10a38; // KHAROSHTHI SIGN BAR ABOVE charset[i++] = 0x10a3a; // KHAROSHTHI SIGN DOT BELOW c = 0x10a3f; // from KHAROSHTHI VIRAMA while (c < 0x10a48) // ..to KHAROSHTHI NUMBER ONE THOUSAND charset[i++] = c++; c = 0x10a50; // from KHAROSHTHI PUNCTUATION DOT while (c < 0x10a59) // ..to KHAROSHTHI PUNCTUATION LINES charset[i++] = c++; c = 0x10a60; // from OLD SOUTH ARABIAN LETTER HE while (c < 0x10a80) // ..to OLD SOUTH ARABIAN NUMERIC INDICATOR charset[i++] = c++; c = 0x10b00; // from AVESTAN LETTER A while (c < 0x10b36) // ..to AVESTAN LETTER HE charset[i++] = c++; c = 0x10b39; // from AVESTAN ABBREVIATION MARK while (c < 0x10b56) // ..to INSCRIPTIONAL PARTHIAN LETTER TAW charset[i++] = c++; c = 0x10b58; // from INSCRIPTIONAL PARTHIAN NUMBER ONE while (c < 0x10b73) // ..to INSCRIPTIONAL PAHLAVI LETTER TAW charset[i++] = c++; c = 0x10b78; // from INSCRIPTIONAL PAHLAVI NUMBER ONE while (c < 0x10b80) // ..to INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND charset[i++] = c++; c = 0x10c00; // from OLD TURKIC LETTER ORKHON A while (c < 0x10c49) // ..to OLD TURKIC LETTER ORKHON BASH charset[i++] = c++; c = 0x10e60; // from RUMI DIGIT ONE while (c < 0x10e7f) // ..to RUMI FRACTION TWO THIRDS charset[i++] = c++; c = 0x11000; // from BRAHMI SIGN CANDRABINDU while (c < 0x1104e) // ..to BRAHMI PUNCTUATION LOTUS charset[i++] = c++; c = 0x11052; // from BRAHMI NUMBER ONE while (c < 0x11070) // ..to BRAHMI DIGIT NINE charset[i++] = c++; c = 0x11080; // from KAITHI SIGN CANDRABINDU while (c < 0x110c2) // ..to KAITHI DOUBLE DANDA charset[i++] = c++; c = 0x12000; // from CUNEIFORM SIGN A while (c < 0x1236f) // ..to CUNEIFORM SIGN ZUM charset[i++] = c++; c = 0x12400; // from CUNEIFORM NUMERIC SIGN TWO ASH while (c < 0x12463) // ..to CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER charset[i++] = c++; c = 0x12470; // from CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER while (c < 0x12474) // ..to CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON charset[i++] = c++; c = 0x13000; // from EGYPTIAN HIEROGLYPH A001 while (c < 0x1342f) // ..to EGYPTIAN HIEROGLYPH AA032 charset[i++] = c++; c = 0x16800; // from BAMUM LETTER PHASE-A NGKUE MFON while (c < 0x16a39) // ..to BAMUM LETTER PHASE-F VUEQ charset[i++] = c++; charset[i++] = 0x1b000; // KATAKANA LETTER ARCHAIC E charset[i++] = 0x1b001; // HIRAGANA LETTER ARCHAIC YE c = 0x1d000; // from BYZANTINE MUSICAL SYMBOL PSILI while (c < 0x1d0f6) // ..to BYZANTINE MUSICAL SYMBOL GORGON NEO KATO charset[i++] = c++; c = 0x1d100; // from MUSICAL SYMBOL SINGLE BARLINE while (c < 0x1d127) // ..to MUSICAL SYMBOL DRUM CLEF-2 charset[i++] = c++; c = 0x1d129; // from MUSICAL SYMBOL MULTIPLE MEASURE REST while (c < 0x1d1de) // ..to MUSICAL SYMBOL PES SUBPUNCTIS charset[i++] = c++; c = 0x1d200; // from GREEK VOCAL NOTATION SYMBOL-1 while (c < 0x1d246) // ..to GREEK MUSICAL LEIMMA charset[i++] = c++; c = 0x1d300; // from MONOGRAM FOR EARTH while (c < 0x1d357) // ..to TETRAGRAM FOR FOSTERING charset[i++] = c++; c = 0x1d360; // from COUNTING ROD UNIT DIGIT ONE while (c < 0x1d372) // ..to COUNTING ROD TENS DIGIT NINE charset[i++] = c++; c = 0x1d400; // from MATHEMATICAL BOLD CAPITAL A while (c < 0x1d455) // ..to MATHEMATICAL ITALIC SMALL G charset[i++] = c++; c = 0x1d456; // from MATHEMATICAL ITALIC SMALL I while (c < 0x1d49d) // ..to MATHEMATICAL SCRIPT CAPITAL A charset[i++] = c++; charset[i++] = 0x1d49e; // MATHEMATICAL SCRIPT CAPITAL C charset[i++] = 0x1d49f; // MATHEMATICAL SCRIPT CAPITAL D charset[i++] = 0x1d4a5; // MATHEMATICAL SCRIPT CAPITAL J charset[i++] = 0x1d4a6; // MATHEMATICAL SCRIPT CAPITAL K c = 0x1d4a9; // from MATHEMATICAL SCRIPT CAPITAL N while (c < 0x1d4ad) // ..to MATHEMATICAL SCRIPT CAPITAL Q charset[i++] = c++; c = 0x1d4ae; // from MATHEMATICAL SCRIPT CAPITAL S while (c < 0x1d4ba) // ..to MATHEMATICAL SCRIPT SMALL D charset[i++] = c++; c = 0x1d4bd; // from MATHEMATICAL SCRIPT SMALL H while (c < 0x1d4c4) // ..to MATHEMATICAL SCRIPT SMALL N charset[i++] = c++; c = 0x1d4c5; // from MATHEMATICAL SCRIPT SMALL P while (c < 0x1d506) // ..to MATHEMATICAL FRAKTUR CAPITAL B charset[i++] = c++; c = 0x1d507; // from MATHEMATICAL FRAKTUR CAPITAL D while (c < 0x1d50b) // ..to MATHEMATICAL FRAKTUR CAPITAL G charset[i++] = c++; c = 0x1d50d; // from MATHEMATICAL FRAKTUR CAPITAL J while (c < 0x1d515) // ..to MATHEMATICAL FRAKTUR CAPITAL Q charset[i++] = c++; c = 0x1d516; // from MATHEMATICAL FRAKTUR CAPITAL S while (c < 0x1d51d) // ..to MATHEMATICAL FRAKTUR CAPITAL Y charset[i++] = c++; c = 0x1d51e; // from MATHEMATICAL FRAKTUR SMALL A while (c < 0x1d53a) // ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL B charset[i++] = c++; c = 0x1d53b; // from MATHEMATICAL DOUBLE-STRUCK CAPITAL D while (c < 0x1d53f) // ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL G charset[i++] = c++; c = 0x1d540; // from MATHEMATICAL DOUBLE-STRUCK CAPITAL I while (c < 0x1d545) // ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL M charset[i++] = c++; c = 0x1d54a; // from MATHEMATICAL DOUBLE-STRUCK CAPITAL S while (c < 0x1d551) // ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL Y charset[i++] = c++; c = 0x1d552; // from MATHEMATICAL DOUBLE-STRUCK SMALL A while (c < 0x1d6a6) // ..to MATHEMATICAL ITALIC SMALL DOTLESS J charset[i++] = c++; c = 0x1d6a8; // from MATHEMATICAL BOLD CAPITAL ALPHA while (c < 0x1d7cc) // ..to MATHEMATICAL BOLD SMALL DIGAMMA charset[i++] = c++; c = 0x1d7ce; // from MATHEMATICAL BOLD DIGIT ZERO while (c < 0x1d800) // ..to MATHEMATICAL MONOSPACE DIGIT NINE charset[i++] = c++; c = 0x1f000; // from MAHJONG TILE EAST WIND while (c < 0x1f02c) // ..to MAHJONG TILE BACK charset[i++] = c++; c = 0x1f030; // from DOMINO TILE HORIZONTAL BACK while (c < 0x1f094) // ..to DOMINO TILE VERTICAL-06-06 charset[i++] = c++; c = 0x1f0a0; // from PLAYING CARD BACK while (c < 0x1f0af) // ..to PLAYING CARD KING OF SPADES charset[i++] = c++; c = 0x1f0b1; // from PLAYING CARD ACE OF HEARTS while (c < 0x1f0bf) // ..to PLAYING CARD KING OF HEARTS charset[i++] = c++; c = 0x1f0c1; // from PLAYING CARD ACE OF DIAMONDS while (c < 0x1f0d0) // ..to PLAYING CARD BLACK JOKER charset[i++] = c++; c = 0x1f0d1; // from PLAYING CARD ACE OF CLUBS while (c < 0x1f0e0) // ..to PLAYING CARD WHITE JOKER charset[i++] = c++; c = 0x1f100; // from DIGIT ZERO FULL STOP while (c < 0x1f10b) // ..to DIGIT NINE COMMA charset[i++] = c++; c = 0x1f110; // from PARENTHESIZED LATIN CAPITAL LETTER A while (c < 0x1f12f) // ..to CIRCLED WZ charset[i++] = c++; c = 0x1f130; // from SQUARED LATIN CAPITAL LETTER A while (c < 0x1f16a) // ..to NEGATIVE CIRCLED LATIN CAPITAL LETTER Z charset[i++] = c++; c = 0x1f170; // from NEGATIVE SQUARED LATIN CAPITAL LETTER A while (c < 0x1f19b) // ..to SQUARED VS charset[i++] = c++; c = 0x1f1e6; // from REGIONAL INDICATOR SYMBOL LETTER A while (c < 0x1f203) // ..to SQUARED KATAKANA SA charset[i++] = c++; c = 0x1f210; // from SQUARED CJK UNIFIED IDEOGRAPH-624B while (c < 0x1f23b) // ..to SQUARED CJK UNIFIED IDEOGRAPH-55B6 charset[i++] = c++; c = 0x1f240; // from TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C while (c < 0x1f249) // ..to TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 charset[i++] = c++; charset[i++] = 0x1f250; // CIRCLED IDEOGRAPH ADVANTAGE charset[i++] = 0x1f251; // CIRCLED IDEOGRAPH ACCEPT c = 0x1f300; // from CYCLONE while (c < 0x1f321) // ..to SHOOTING STAR charset[i++] = c++; c = 0x1f330; // from CHESTNUT while (c < 0x1f336) // ..to CACTUS charset[i++] = c++; c = 0x1f337; // from TULIP while (c < 0x1f37d) // ..to BABY BOTTLE charset[i++] = c++; c = 0x1f380; // from RIBBON while (c < 0x1f394) // ..to GRADUATION CAP charset[i++] = c++; c = 0x1f3a0; // from CAROUSEL HORSE while (c < 0x1f3c5) // ..to SURFER charset[i++] = c++; c = 0x1f3c6; // from TROPHY while (c < 0x1f3cb) // ..to SWIMMER charset[i++] = c++; c = 0x1f3e0; // from HOUSE BUILDING while (c < 0x1f3f1) // ..to EUROPEAN CASTLE charset[i++] = c++; c = 0x1f400; // from RAT while (c < 0x1f43f) // ..to PAW PRINTS charset[i++] = c++; c = 0x1f442; // from EAR while (c < 0x1f4f8) // ..to CAMERA charset[i++] = c++; c = 0x1f4f9; // from VIDEO CAMERA while (c < 0x1f4fd) // ..to VIDEOCASSETTE charset[i++] = c++; c = 0x1f500; // from TWISTED RIGHTWARDS ARROWS while (c < 0x1f53e) // ..to DOWN-POINTING SMALL RED TRIANGLE charset[i++] = c++; c = 0x1f550; // from CLOCK FACE ONE OCLOCK while (c < 0x1f568) // ..to CLOCK FACE TWELVE-THIRTY charset[i++] = c++; c = 0x1f5fb; // from MOUNT FUJI while (c < 0x1f600) // ..to MOYAI charset[i++] = c++; c = 0x1f601; // from GRINNING FACE WITH SMILING EYES while (c < 0x1f611) // ..to NEUTRAL FACE charset[i++] = c++; charset[i++] = 0x1f612; // UNAMUSED FACE charset[i++] = 0x1f614; // PENSIVE FACE charset[i++] = 0x1f61c; // FACE WITH STUCK-OUT TONGUE AND WINKING EYE charset[i++] = 0x1f61e; // DISAPPOINTED FACE c = 0x1f620; // from ANGRY FACE while (c < 0x1f626) // ..to DISAPPOINTED BUT RELIEVED FACE charset[i++] = c++; c = 0x1f628; // from FEARFUL FACE while (c < 0x1f62c) // ..to TIRED FACE charset[i++] = c++; c = 0x1f630; // from FACE WITH OPEN MOUTH AND COLD SWEAT while (c < 0x1f634) // ..to FLUSHED FACE charset[i++] = c++; c = 0x1f635; // from DIZZY FACE while (c < 0x1f641) // ..to WEARY CAT FACE charset[i++] = c++; c = 0x1f645; // from FACE WITH NO GOOD GESTURE while (c < 0x1f650) // ..to PERSON WITH FOLDED HANDS charset[i++] = c++; c = 0x1f680; // from ROCKET while (c < 0x1f6c6) // ..to LEFT LUGGAGE charset[i++] = c++; c = 0x1f700; // from ALCHEMICAL SYMBOL FOR QUINTESSENCE while (c < 0x1f774) // ..to ALCHEMICAL SYMBOL FOR HALF OUNCE charset[i++] = c++; c = 0x20000; // from while (c < 0x2a6d7) // ..to charset[i++] = c++; c = 0x2a700; // from while (c < 0x2b735) // ..to charset[i++] = c++; c = 0x2b740; // from while (c < 0x2b81e) // ..to charset[i++] = c++; c = 0x2f800; // from CJK COMPATIBILITY IDEOGRAPH-2F800 while (c < 0x2fa1e) // ..to CJK COMPATIBILITY IDEOGRAPH-2FA1D charset[i++] = c++; c = 0xe0020; // from TAG SPACE while (c < 0xe0080) // ..to CANCEL TAG charset[i++] = c++; /* Zero-terminate it, and cache the first character */ charset[i] = 0; c0 = charset[0]; last = minlength - 1; i = 0; while (i <= last) { id[i] = 0; utf32[i++] = c0; } lastid = -1; utf32[i] = 0; /* We must init word with dummy data, it doesn't get set until filter() */ word = 1; } void generate() { int i; /* Handle the typical case specially */ if (utf32[last] = charset[++lastid]) return; lastid = 0; utf32[i = last] = c0; while (i--) { // Have a preceding position? if (utf32[i] = charset[++id[i]]) return; id[i] = 0; utf32[i] = c0; } if (++last < maxlength) { // Next length? id[last] = lastid = 0; utf32[last] = c0; utf32[last + 1] = 0; } else // We're done utf32 = 0; } void restore() { int i, o, c; /* Convert the restored word back from UTF-8 to UTF-32 */ i = o = 0; while (c = word[i]) { if (c >= 0xf0) { c = (c << 6) + word[++i]; c = (c << 6) + word[++i]; c = (c << 6) + word[++i]; c -= 0x3C82080; } else if (c >= 0xe0) { c = (c << 6) + word[++i]; c = (c << 6) + word[++i]; c -= 0xE2080; } else if (c >= 0xc0) { c = (c << 6) + word[++i]; c -= 0x3080; } i++; utf32[o++] = c; } utf32[o] = 0; /* Calculate the current length and infer the character indices */ last = 0; while (c = utf32[last]) { i = 0; while (charset[i] != c && charset[i]) i++; if (!charset[i]) i = 0; // Not found id[last++] = i; } lastid = id[--last]; } /* Convert from UTF-32 to UTF-8 */ void filter() { int i, c; i = -1; c = 0; while (utf32[++i]) { if (utf32[i] >= 0x10000) { word[c++] = 0xf0 | (utf32[i]>>18); word[c++] = 0x80 | (utf32[i]>>12 & 0x3f); word[c++] = 0x80 | (utf32[i]>>6 & 0x3f); word[c++] = 0x80 | (utf32[i] & 0x3f); } else if (utf32[i] >= 0x0800) { word[c++] = 0xe0 | (utf32[i]>>12); word[c++] = 0x80 | (utf32[i]>>6 & 0x3f); word[c++] = 0x80 | (utf32[i] & 0x3f); } else if (utf32[i] >= 0x80) { word[c++] = 0xc0 | (utf32[i]>>6); word[c++] = 0x80 | (utf32[i] & 0x3f); } else { word[c++] = utf32[i]; } } word[c] = 0; }