diff options
Diffstat (limited to 'src/chrtrans')
36 files changed, 988 insertions, 1686 deletions
diff --git a/src/chrtrans/Makefile.old b/src/chrtrans/Makefile.old index 0ca53fc2..42b49eec 100644 --- a/src/chrtrans/Makefile.old +++ b/src/chrtrans/Makefile.old @@ -30,6 +30,7 @@ TABLES= \ cp1257_uni.h \ cp437_uni.h \ cp737_uni.h \ + cp775_uni.h \ cp850_uni.h \ cp852_uni.h \ cp862_uni.h \ @@ -76,6 +77,7 @@ cp1256_uni.h: cp1256_uni.tbl makeuctb cp1257_uni.h: cp1257_uni.tbl makeuctb cp437_uni.h: cp437_uni.tbl makeuctb cp737_uni.h: cp737_uni.tbl makeuctb +cp775_uni.h: cp775_uni.tbl makeuctb cp850_uni.h: cp850_uni.tbl makeuctb cp852_uni.h: cp852_uni.tbl makeuctb cp862_uni.h: cp862_uni.tbl makeuctb diff --git a/src/chrtrans/README.format b/src/chrtrans/README.format index 4ced0a14..636afd32 100644 --- a/src/chrtrans/README.format +++ b/src/chrtrans/README.format @@ -26,7 +26,7 @@ b) directives: start with a keyword which may be abbreviated to one letter (first letter must be capitalized), followed by space and a value. Currently recognized: - + OptionName The name under which this should appear on the O)ptions screen in the list for Display Character Set @@ -53,7 +53,7 @@ c) character translation definitions: 0x41 U+0041 U+0391 ... and are used for "forward" translation (mapping this charset to Unicode) - AS WELL AS "back" translation (mapping Unicodes to an 8-bit + AS WELL AS "back" translation (mapping Unicodes to an 8-bit [incl. 7-bit ASCII] code). For the "forward" direction, only the first Unicode is used; for @@ -63,7 +63,7 @@ c) character translation definitions: The above example line would tell the chartrans mechanism: "For this charset, code position 65 [hex 0x41] contains Unicode U+0041 (LATIN CAPITAL LETTER A). For translation of Unicodes to - this charset, use byte value 65 [hex 0x41] for U+0041 (LATIN CAPITAL + this charset, use byte value 65 [hex 0x41] for U+0041 (LATIN CAPITAL LETTER A) as well as for U+0391 (GREEK CAPITAL LETTER ALPHA)." [Note that for bytes in the ASCII range 0x00-0x7F, the forward translations @@ -82,6 +82,10 @@ c) character translation definitions: * where <range> ::= <fontpos>-<fontpos> * and <unicode> ::= U+<h><h><h><h> * and <h> ::= <hexadecimal digit> + * + [Note that <fontpos> _without_ targets assumed notdefined, + so tables from ftp.unicode.org need no patching.] + d) string replacement definitions: @@ -89,10 +93,10 @@ d) string replacement definitions: U+00cd:I' - which would mean "Replace Unicode U+00cd (LATIN CAPITAL LETTER I WITH + which would mean "Replace Unicode U+00cd (LATIN CAPITAL LETTER I WITH ACUTE" with the string (consisting of two character) I' (if no other translation is available)." Please note that replacement definitions - in certnain charset table will override ones from Default table. + in certain charset table will override ones from the Default table. Note that everything after the ':' is currently taken VERBATIM, so careful with trailing blanks etc. @@ -111,7 +115,7 @@ d) string replacement definitions: Motivation: -- It is an extention of the format already in use for Linux (kernel, +- It is an extension of the format already in use for Linux (kernel, kbd package), those files can be used with some minimal editing. - It is easy to convert Unicode tables for other charsets, as they diff --git a/src/chrtrans/README.tables b/src/chrtrans/README.tables index be6dac6a..5993ccee 100644 --- a/src/chrtrans/README.tables +++ b/src/chrtrans/README.tables @@ -1,12 +1,13 @@ -The translation table files in this directory are _examples only_. -They were collected from several sources (among them ftp://ftp.unicode.org, -Linux kbd package, ftp://dkuug.dk/) and are believed to be correct -in their mappings, but not checked in detail. The Unicode/UCS2 values -for some of the RFC 1345 Mnemonic codes are out of date, a cleanup and -update would be needed for serious use. +The translation table files in this directory were collected from +several sources (among them ftp://ftp.unicode.org, Linux kbd package, +ftp://dkuug.dk/) and are believed to be correct in their mappings, +but not checked in detail. The Unicode/UCS2 values +for some of the RFC 1345 Mnemonic codes are out of date, +a cleanup and update would be needed for serious use. +[See also http://czyborra.com/charsets/iso8859.html for codepages survey.] More translation files can be easily provided (and new character entities -added to entities.h), this set is just to test whether the system works +added to entities.h), this set is just to test whether the system works in principle (and also how it behaves with incomplete data...) See the file README.format for a brief explanation of what's in the @@ -27,7 +28,7 @@ charset known to Lynx) you currently have to manually edit UCdomap.c, in two places: a) Near the top, you will find a bunch of lines (some may be commented out) - + #include "<fn>.h" Add or comment out as you wish. But it is probably safest to leave the @@ -44,7 +45,7 @@ did under a)...) [The <something> is derived from the charset's MIME name. if in doubt, check the last lines of the corresponding ...uni.h file.] c) To let make automatically notice when you have changed one of the - table files, and automatically regenerate the *uni.h file(s), + table files, and automatically regenerate the *uni.h file(s), you also have to add any new tables to both src/Makefile *and* src/chrtrans/Makefile. Or, for auto-config, the equivalent files named makefile.in before running ./configure, or makefile after running diff --git a/src/chrtrans/UCkd.h b/src/chrtrans/UCkd.h index 1e55716e..104882d7 100644 --- a/src/chrtrans/UCkd.h +++ b/src/chrtrans/UCkd.h @@ -34,7 +34,7 @@ struct unipair { }; struct unipair_str { u16 unicode; - char * replace_str; + CONST char * replace_str; }; struct unimapdesc { u16 entry_ct; diff --git a/src/chrtrans/cp1250_uni.tbl b/src/chrtrans/cp1250_uni.tbl index 41e8581d..49ba9008 100644 --- a/src/chrtrans/cp1250_uni.tbl +++ b/src/chrtrans/cp1250_uni.tbl @@ -2,280 +2,155 @@ Mwindows-1250 #Name as a Display Charset (used on Options screen) -OWinLatin2 (cp1250) +OEastern European (windows-1250) # -# Name: cp1250_WinLatin2 to Unicode table +# Name: cp1250 to Unicode table # Unicode version: 2.0 # Table version: 2.00 # Table format: Format A -# Date: 04/24/96 -# Authors: Lori Brownell <loribr@microsoft.com> -# K.D. Chang <a-kchang@microsoft.com> +# Date: 04/15/98 +# +# Contact: cpxlate@microsoft.com +# # General notes: none # # Format: Three tab-separated columns -# Column #1 is the cp1250_WinLatin2 code (in hex) +# Column #1 is the cp1250 code (in hex) # Column #2 is the Unicode (in hex as 0xXXXX) # Column #3 is the Unicode name (follows a comment sign, '#') # -# The entries are in cp1250_WinLatin2 order +# The entries are in cp1250 order # -#0x00 U+0000 #NULL -#0x01 U+0001 #START OF HEADING -#0x02 U+0002 #START OF TEXT -#0x03 U+0003 #END OF TEXT -#0x04 U+0004 #END OF TRANSMISSION -#0x05 U+0005 #ENQUIRY -#0x06 U+0006 #ACKNOWLEDGE -#0x07 U+0007 #BELL -#0x08 U+0008 #BACKSPACE -#0x09 U+0009 #HORIZONTAL TABULATION -#0x0A U+000A #LINE FEED -#0x0B U+000B #VERTICAL TABULATION -#0x0C U+000C #FORM FEED -#0x0D U+000D #CARRIAGE RETURN -#0x0E U+000E #SHIFT OUT -#0x0F U+000F #SHIFT IN -#0x10 U+0010 #DATA LINK ESCAPE -#0x11 U+0011 #DEVICE CONTROL ONE -#0x12 U+0012 #DEVICE CONTROL TWO -#0x13 U+0013 #DEVICE CONTROL THREE -#0x14 U+0014 #DEVICE CONTROL FOUR -#0x15 U+0015 #NEGATIVE ACKNOWLEDGE -#0x16 U+0016 #SYNCHRONOUS IDLE -#0x17 U+0017 #END OF TRANSMISSION BLOCK -#0x18 U+0018 #CANCEL -#0x19 U+0019 #END OF MEDIUM -#0x1A U+001A #SUBSTITUTE -#0x1B U+001B #ESCAPE -#0x1C U+001C #FILE SEPARATOR -#0x1D U+001D #GROUP SEPARATOR -#0x1E U+001E #RECORD SEPARATOR -#0x1F U+001F #UNIT SEPARATOR -0x20 U+0020 #SPACE -0x21 U+0021 #EXCLAMATION MARK -0x22 U+0022 #QUOTATION MARK -0x23 U+0023 #NUMBER SIGN -0x24 U+0024 #DOLLAR SIGN -0x25 U+0025 #PERCENT SIGN -0x26 U+0026 #AMPERSAND -0x27 U+0027 #APOSTROPHE -0x28 U+0028 #LEFT PARENTHESIS -0x29 U+0029 #RIGHT PARENTHESIS -0x2A U+002A #ASTERISK -0x2B U+002B #PLUS SIGN -0x2C U+002C #COMMA -0x2D U+002D #HYPHEN-MINUS -0x2E U+002E #FULL STOP -0x2F U+002F #SOLIDUS -0x30 U+0030 #DIGIT ZERO -0x31 U+0031 #DIGIT ONE -0x32 U+0032 #DIGIT TWO -0x33 U+0033 #DIGIT THREE -0x34 U+0034 #DIGIT FOUR -0x35 U+0035 #DIGIT FIVE -0x36 U+0036 #DIGIT SIX -0x37 U+0037 #DIGIT SEVEN -0x38 U+0038 #DIGIT EIGHT -0x39 U+0039 #DIGIT NINE -0x3A U+003A #COLON -0x3B U+003B #SEMICOLON -0x3C U+003C #LESS-THAN SIGN -0x3D U+003D #EQUALS SIGN -0x3E U+003E #GREATER-THAN SIGN -0x3F U+003F #QUESTION MARK -0x40 U+0040 #COMMERCIAL AT -0x41 U+0041 #LATIN CAPITAL LETTER A -0x42 U+0042 #LATIN CAPITAL LETTER B -0x43 U+0043 #LATIN CAPITAL LETTER C -0x44 U+0044 #LATIN CAPITAL LETTER D -0x45 U+0045 #LATIN CAPITAL LETTER E -0x46 U+0046 #LATIN CAPITAL LETTER F -0x47 U+0047 #LATIN CAPITAL LETTER G -0x48 U+0048 #LATIN CAPITAL LETTER H -0x49 U+0049 #LATIN CAPITAL LETTER I -0x4A U+004A #LATIN CAPITAL LETTER J -0x4B U+004B #LATIN CAPITAL LETTER K -0x4C U+004C #LATIN CAPITAL LETTER L -0x4D U+004D #LATIN CAPITAL LETTER M -0x4E U+004E #LATIN CAPITAL LETTER N -0x4F U+004F #LATIN CAPITAL LETTER O -0x50 U+0050 #LATIN CAPITAL LETTER P -0x51 U+0051 #LATIN CAPITAL LETTER Q -0x52 U+0052 #LATIN CAPITAL LETTER R -0x53 U+0053 #LATIN CAPITAL LETTER S -0x54 U+0054 #LATIN CAPITAL LETTER T -0x55 U+0055 #LATIN CAPITAL LETTER U -0x56 U+0056 #LATIN CAPITAL LETTER V -0x57 U+0057 #LATIN CAPITAL LETTER W -0x58 U+0058 #LATIN CAPITAL LETTER X -0x59 U+0059 #LATIN CAPITAL LETTER Y -0x5A U+005A #LATIN CAPITAL LETTER Z -0x5B U+005B #LEFT SQUARE BRACKET -0x5C U+005C #REVERSE SOLIDUS -0x5D U+005D #RIGHT SQUARE BRACKET -0x5E U+005E #CIRCUMFLEX ACCENT -0x5F U+005F #LOW LINE -0x60 U+0060 #GRAVE ACCENT -0x61 U+0061 #LATIN SMALL LETTER A -0x62 U+0062 #LATIN SMALL LETTER B -0x63 U+0063 #LATIN SMALL LETTER C -0x64 U+0064 #LATIN SMALL LETTER D -0x65 U+0065 #LATIN SMALL LETTER E -0x66 U+0066 #LATIN SMALL LETTER F -0x67 U+0067 #LATIN SMALL LETTER G -0x68 U+0068 #LATIN SMALL LETTER H -0x69 U+0069 #LATIN SMALL LETTER I -0x6A U+006A #LATIN SMALL LETTER J -0x6B U+006B #LATIN SMALL LETTER K -0x6C U+006C #LATIN SMALL LETTER L -0x6D U+006D #LATIN SMALL LETTER M -0x6E U+006E #LATIN SMALL LETTER N -0x6F U+006F #LATIN SMALL LETTER O -0x70 U+0070 #LATIN SMALL LETTER P -0x71 U+0071 #LATIN SMALL LETTER Q -0x72 U+0072 #LATIN SMALL LETTER R -0x73 U+0073 #LATIN SMALL LETTER S -0x74 U+0074 #LATIN SMALL LETTER T -0x75 U+0075 #LATIN SMALL LETTER U -0x76 U+0076 #LATIN SMALL LETTER V -0x77 U+0077 #LATIN SMALL LETTER W -0x78 U+0078 #LATIN SMALL LETTER X -0x79 U+0079 #LATIN SMALL LETTER Y -0x7A U+007A #LATIN SMALL LETTER Z -0x7B U+007B #LEFT CURLY BRACKET -0x7C U+007C #VERTICAL LINE -0x7D U+007D #RIGHT CURLY BRACKET -0x7E U+007E #TILDE -#0x7F U+007F #DELETE -#0x80 #UNDEFINED -#0x81 #UNDEFINED -0x82 U+201A #SINGLE LOW-9 QUOTATION MARK -#0x83 #UNDEFINED -0x84 U+201E #DOUBLE LOW-9 QUOTATION MARK -0x85 U+2026 #HORIZONTAL ELLIPSIS -0x86 U+2020 #DAGGER -0x87 U+2021 #DOUBLE DAGGER -#0x88 #UNDEFINED -0x89 U+2030 #PER MILLE SIGN -0x8A U+0160 #LATIN CAPITAL LETTER S WITH CARON -0x8B U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK -0x8C U+015A #LATIN CAPITAL LETTER S WITH ACUTE -0x8D U+0164 #LATIN CAPITAL LETTER T WITH CARON -0x8E U+017D #LATIN CAPITAL LETTER Z WITH CARON -0x8F U+0179 #LATIN CAPITAL LETTER Z WITH ACUTE -#0x90 #UNDEFINED -0x91 U+2018 #LEFT SINGLE QUOTATION MARK -0x92 U+2019 #RIGHT SINGLE QUOTATION MARK -0x93 U+201C #LEFT DOUBLE QUOTATION MARK -0x94 U+201D #RIGHT DOUBLE QUOTATION MARK -0x95 U+2022 #BULLET -0x96 U+2013 #EN DASH -0x97 U+2014 #EM DASH -#0x98 #UNDEFINED -0x99 U+2122 #TRADE MARK SIGN -0x9A U+0161 #LATIN SMALL LETTER S WITH CARON -0x9B U+203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK -0x9C U+015B #LATIN SMALL LETTER S WITH ACUTE -0x9D U+0165 #LATIN SMALL LETTER T WITH CARON -0x9E U+017E #LATIN SMALL LETTER Z WITH CARON -0x9F U+017A #LATIN SMALL LETTER Z WITH ACUTE -0xA0 U+00A0 #NO-BREAK SPACE -0xA1 U+02C7 #CARON -0xA2 U+02D8 #BREVE -0xA3 U+0141 #LATIN CAPITAL LETTER L WITH STROKE -0xA4 U+00A4 #CURRENCY SIGN -0xA5 U+0104 #LATIN CAPITAL LETTER A WITH OGONEK -0xA6 U+00A6 #BROKEN BAR -0xA7 U+00A7 #SECTION SIGN -0xA8 U+00A8 #DIAERESIS -0xA9 U+00A9 #COPYRIGHT SIGN -0xAA U+015E #LATIN CAPITAL LETTER S WITH CEDILLA -0xAB U+00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK -#0xAC #UNDEFINED -0xAD U+00AD #SOFT HYPHEN -0xAE U+00AE #REGISTERED SIGN -0xAF U+017B #LATIN CAPITAL LETTER Z WITH DOT ABOVE -0xB0 U+00B0 #DEGREE SIGN -0xB1 U+00B1 #PLUS-MINUS SIGN -0xB2 U+02DB #OGONEK -0xB3 U+0142 #LATIN SMALL LETTER L WITH STROKE -0xB4 U+00B4 #ACUTE ACCENT -0xB5 U+00B5 #MICRO SIGN -0xB6 U+00B6 #PILCROW SIGN -0xB7 U+00B7 #MIDDLE DOT -0xB8 U+00B8 #CEDILLA -0xB9 U+0105 #LATIN SMALL LETTER A WITH OGONEK -0xBA U+015F #LATIN SMALL LETTER S WITH CEDILLA -0xBB U+00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -0xBC U+013D #LATIN CAPITAL LETTER L WITH CARON -0xBD U+02DD #DOUBLE ACUTE ACCENT -0xBE U+013E #LATIN SMALL LETTER L WITH CARON -0xBF U+017C #LATIN SMALL LETTER Z WITH DOT ABOVE -0xC0 U+0154 #LATIN CAPITAL LETTER R WITH ACUTE -0xC1 U+00C1 #LATIN CAPITAL LETTER A WITH ACUTE -0xC2 U+00C2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX -0xC3 U+0102 #LATIN CAPITAL LETTER A WITH BREVE -0xC4 U+00C4 #LATIN CAPITAL LETTER A WITH DIAERESIS -0xC5 U+0139 #LATIN CAPITAL LETTER L WITH ACUTE -0xC6 U+0106 #LATIN CAPITAL LETTER C WITH ACUTE -0xC7 U+00C7 #LATIN CAPITAL LETTER C WITH CEDILLA -0xC8 U+010C #LATIN CAPITAL LETTER C WITH CARON -0xC9 U+00C9 #LATIN CAPITAL LETTER E WITH ACUTE -0xCA U+0118 #LATIN CAPITAL LETTER E WITH OGONEK -0xCB U+00CB #LATIN CAPITAL LETTER E WITH DIAERESIS -0xCC U+011A #LATIN CAPITAL LETTER E WITH CARON -0xCD U+00CD #LATIN CAPITAL LETTER I WITH ACUTE -0xCE U+00CE #LATIN CAPITAL LETTER I WITH CIRCUMFLEX -0xCF U+010E #LATIN CAPITAL LETTER D WITH CARON -0xD0 U+0110 #LATIN CAPITAL LETTER D WITH STROKE -0xD1 U+0143 #LATIN CAPITAL LETTER N WITH ACUTE -0xD2 U+0147 #LATIN CAPITAL LETTER N WITH CARON -0xD3 U+00D3 #LATIN CAPITAL LETTER O WITH ACUTE -0xD4 U+00D4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX -0xD5 U+0150 #LATIN CAPITAL LETTER O WITH DOUBLE ACUTE -0xD6 U+00D6 #LATIN CAPITAL LETTER O WITH DIAERESIS -0xD7 U+00D7 #MULTIPLICATION SIGN -0xD8 U+0158 #LATIN CAPITAL LETTER R WITH CARON -0xD9 U+016E #LATIN CAPITAL LETTER U WITH RING ABOVE -0xDA U+00DA #LATIN CAPITAL LETTER U WITH ACUTE -0xDB U+0170 #LATIN CAPITAL LETTER U WITH DOUBLE ACUTE -0xDC U+00DC #LATIN CAPITAL LETTER U WITH DIAERESIS -0xDD U+00DD #LATIN CAPITAL LETTER Y WITH ACUTE -0xDE U+0162 #LATIN CAPITAL LETTER T WITH CEDILLA -0xDF U+00DF #LATIN SMALL LETTER SHARP S -0xE0 U+0155 #LATIN SMALL LETTER R WITH ACUTE -0xE1 U+00E1 #LATIN SMALL LETTER A WITH ACUTE -0xE2 U+00E2 #LATIN SMALL LETTER A WITH CIRCUMFLEX -0xE3 U+0103 #LATIN SMALL LETTER A WITH BREVE -0xE4 U+00E4 #LATIN SMALL LETTER A WITH DIAERESIS -0xE5 U+013A #LATIN SMALL LETTER L WITH ACUTE -0xE6 U+0107 #LATIN SMALL LETTER C WITH ACUTE -0xE7 U+00E7 #LATIN SMALL LETTER C WITH CEDILLA -0xE8 U+010D #LATIN SMALL LETTER C WITH CARON -0xE9 U+00E9 #LATIN SMALL LETTER E WITH ACUTE -0xEA U+0119 #LATIN SMALL LETTER E WITH OGONEK -0xEB U+00EB #LATIN SMALL LETTER E WITH DIAERESIS -0xEC U+011B #LATIN SMALL LETTER E WITH CARON -0xED U+00ED #LATIN SMALL LETTER I WITH ACUTE -0xEE U+00EE #LATIN SMALL LETTER I WITH CIRCUMFLEX -0xEF U+010F #LATIN SMALL LETTER D WITH CARON -0xF0 U+0111 #LATIN SMALL LETTER D WITH STROKE -0xF1 U+0144 #LATIN SMALL LETTER N WITH ACUTE -0xF2 U+0148 #LATIN SMALL LETTER N WITH CARON -0xF3 U+00F3 #LATIN SMALL LETTER O WITH ACUTE -0xF4 U+00F4 #LATIN SMALL LETTER O WITH CIRCUMFLEX -0xF5 U+0151 #LATIN SMALL LETTER O WITH DOUBLE ACUTE -0xF6 U+00F6 #LATIN SMALL LETTER O WITH DIAERESIS -0xF7 U+00F7 #DIVISION SIGN -0xF8 U+0159 #LATIN SMALL LETTER R WITH CARON -0xF9 U+016F #LATIN SMALL LETTER U WITH RING ABOVE -0xFA U+00FA #LATIN SMALL LETTER U WITH ACUTE -0xFB U+0171 #LATIN SMALL LETTER U WITH DOUBLE ACUTE -0xFC U+00FC #LATIN SMALL LETTER U WITH DIAERESIS -0xFD U+00FD #LATIN SMALL LETTER Y WITH ACUTE -0xFE U+0163 #LATIN SMALL LETTER T WITH CEDILLA -0xFF U+02D9 #DOT ABOVE - +################## +0x20-0x7e idem +# +0x80 U+20AC #EURO SIGN +0x81 #UNDEFINED +0x82 U+201A #SINGLE LOW-9 QUOTATION MARK +0x83 #UNDEFINED +0x84 U+201E #DOUBLE LOW-9 QUOTATION MARK +0x85 U+2026 #HORIZONTAL ELLIPSIS +0x86 U+2020 #DAGGER +0x87 U+2021 #DOUBLE DAGGER +0x88 #UNDEFINED +0x89 U+2030 #PER MILLE SIGN +0x8A U+0160 #LATIN CAPITAL LETTER S WITH CARON +0x8B U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK +0x8C U+015A #LATIN CAPITAL LETTER S WITH ACUTE +0x8D U+0164 #LATIN CAPITAL LETTER T WITH CARON +0x8E U+017D #LATIN CAPITAL LETTER Z WITH CARON +0x8F U+0179 #LATIN CAPITAL LETTER Z WITH ACUTE +0x90 #UNDEFINED +0x91 U+2018 #LEFT SINGLE QUOTATION MARK +0x92 U+2019 #RIGHT SINGLE QUOTATION MARK +0x93 U+201C #LEFT DOUBLE QUOTATION MARK +0x94 U+201D #RIGHT DOUBLE QUOTATION MARK +0x95 U+2022 #BULLET +0x96 U+2013 #EN DASH +0x97 U+2014 #EM DASH +0x98 #UNDEFINED +0x99 U+2122 #TRADE MARK SIGN +0x9A U+0161 #LATIN SMALL LETTER S WITH CARON +0x9B U+203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +0x9C U+015B #LATIN SMALL LETTER S WITH ACUTE +0x9D U+0165 #LATIN SMALL LETTER T WITH CARON +0x9E U+017E #LATIN SMALL LETTER Z WITH CARON +0x9F U+017A #LATIN SMALL LETTER Z WITH ACUTE +0xA0 U+00A0 #NO-BREAK SPACE +0xA1 U+02C7 #CARON +0xA2 U+02D8 #BREVE +0xA3 U+0141 #LATIN CAPITAL LETTER L WITH STROKE +0xA4 U+00A4 #CURRENCY SIGN +0xA5 U+0104 #LATIN CAPITAL LETTER A WITH OGONEK +0xA6 U+00A6 #BROKEN BAR +0xA7 U+00A7 #SECTION SIGN +0xA8 U+00A8 #DIAERESIS +0xA9 U+00A9 #COPYRIGHT SIGN +0xAA U+015E #LATIN CAPITAL LETTER S WITH CEDILLA +0xAB U+00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC U+00AC #NOT SIGN +0xAD U+00AD #SOFT HYPHEN +0xAE U+00AE #REGISTERED SIGN +0xAF U+017B #LATIN CAPITAL LETTER Z WITH DOT ABOVE +0xB0 U+00B0 #DEGREE SIGN +0xB1 U+00B1 #PLUS-MINUS SIGN +0xB2 U+02DB #OGONEK +0xB3 U+0142 #LATIN SMALL LETTER L WITH STROKE +0xB4 U+00B4 #ACUTE ACCENT +0xB5 U+00B5 #MICRO SIGN +0xB6 U+00B6 #PILCROW SIGN +0xB7 U+00B7 #MIDDLE DOT +0xB8 U+00B8 #CEDILLA +0xB9 U+0105 #LATIN SMALL LETTER A WITH OGONEK +0xBA U+015F #LATIN SMALL LETTER S WITH CEDILLA +0xBB U+00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC U+013D #LATIN CAPITAL LETTER L WITH CARON +0xBD U+02DD #DOUBLE ACUTE ACCENT +0xBE U+013E #LATIN SMALL LETTER L WITH CARON +0xBF U+017C #LATIN SMALL LETTER Z WITH DOT ABOVE +0xC0 U+0154 #LATIN CAPITAL LETTER R WITH ACUTE +0xC1 U+00C1 #LATIN CAPITAL LETTER A WITH ACUTE +0xC2 U+00C2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 U+0102 #LATIN CAPITAL LETTER A WITH BREVE +0xC4 U+00C4 #LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 U+0139 #LATIN CAPITAL LETTER L WITH ACUTE +0xC6 U+0106 #LATIN CAPITAL LETTER C WITH ACUTE +0xC7 U+00C7 #LATIN CAPITAL LETTER C WITH CEDILLA +0xC8 U+010C #LATIN CAPITAL LETTER C WITH CARON +0xC9 U+00C9 #LATIN CAPITAL LETTER E WITH ACUTE +0xCA U+0118 #LATIN CAPITAL LETTER E WITH OGONEK +0xCB U+00CB #LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC U+011A #LATIN CAPITAL LETTER E WITH CARON +0xCD U+00CD #LATIN CAPITAL LETTER I WITH ACUTE +0xCE U+00CE #LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF U+010E #LATIN CAPITAL LETTER D WITH CARON +0xD0 U+0110 #LATIN CAPITAL LETTER D WITH STROKE +0xD1 U+0143 #LATIN CAPITAL LETTER N WITH ACUTE +0xD2 U+0147 #LATIN CAPITAL LETTER N WITH CARON +0xD3 U+00D3 #LATIN CAPITAL LETTER O WITH ACUTE +0xD4 U+00D4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 U+0150 #LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0xD6 U+00D6 #LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 U+00D7 #MULTIPLICATION SIGN +0xD8 U+0158 #LATIN CAPITAL LETTER R WITH CARON +0xD9 U+016E #LATIN CAPITAL LETTER U WITH RING ABOVE +0xDA U+00DA #LATIN CAPITAL LETTER U WITH ACUTE +0xDB U+0170 #LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0xDC U+00DC #LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD U+00DD #LATIN CAPITAL LETTER Y WITH ACUTE +0xDE U+0162 #LATIN CAPITAL LETTER T WITH CEDILLA +0xDF U+00DF #LATIN SMALL LETTER SHARP S +0xE0 U+0155 #LATIN SMALL LETTER R WITH ACUTE +0xE1 U+00E1 #LATIN SMALL LETTER A WITH ACUTE +0xE2 U+00E2 #LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 U+0103 #LATIN SMALL LETTER A WITH BREVE +0xE4 U+00E4 #LATIN SMALL LETTER A WITH DIAERESIS +0xE5 U+013A #LATIN SMALL LETTER L WITH ACUTE +0xE6 U+0107 #LATIN SMALL LETTER C WITH ACUTE +0xE7 U+00E7 #LATIN SMALL LETTER C WITH CEDILLA +0xE8 U+010D #LATIN SMALL LETTER C WITH CARON +0xE9 U+00E9 #LATIN SMALL LETTER E WITH ACUTE +0xEA U+0119 #LATIN SMALL LETTER E WITH OGONEK +0xEB U+00EB #LATIN SMALL LETTER E WITH DIAERESIS +0xEC U+011B #LATIN SMALL LETTER E WITH CARON +0xED U+00ED #LATIN SMALL LETTER I WITH ACUTE +0xEE U+00EE #LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF U+010F #LATIN SMALL LETTER D WITH CARON +0xF0 U+0111 #LATIN SMALL LETTER D WITH STROKE +0xF1 U+0144 #LATIN SMALL LETTER N WITH ACUTE +0xF2 U+0148 #LATIN SMALL LETTER N WITH CARON +0xF3 U+00F3 #LATIN SMALL LETTER O WITH ACUTE +0xF4 U+00F4 #LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 U+0151 #LATIN SMALL LETTER O WITH DOUBLE ACUTE +0xF6 U+00F6 #LATIN SMALL LETTER O WITH DIAERESIS +0xF7 U+00F7 #DIVISION SIGN +0xF8 U+0159 #LATIN SMALL LETTER R WITH CARON +0xF9 U+016F #LATIN SMALL LETTER U WITH RING ABOVE +0xFA U+00FA #LATIN SMALL LETTER U WITH ACUTE +0xFB U+0171 #LATIN SMALL LETTER U WITH DOUBLE ACUTE +0xFC U+00FC #LATIN SMALL LETTER U WITH DIAERESIS +0xFD U+00FD #LATIN SMALL LETTER Y WITH ACUTE +0xFE U+0163 #LATIN SMALL LETTER T WITH CEDILLA +0xFF U+02D9 #DOT ABOVE diff --git a/src/chrtrans/cp1251_uni.tbl b/src/chrtrans/cp1251_uni.tbl index e9bb9460..541f8640 100644 --- a/src/chrtrans/cp1251_uni.tbl +++ b/src/chrtrans/cp1251_uni.tbl @@ -2,153 +2,155 @@ Mwindows-1251 #Name as a Display Charset (used on Options screen) -OWinCyrillic (cp1251) +OCyrillic (windows-1251) # -# Name: cp1251_WinCyrillic to Unicode table +# Name: cp1251 to Unicode table # Unicode version: 2.0 # Table version: 2.00 # Table format: Format A -# Date: 04/24/96 -# Authors: Lori Brownell <loribr@microsoft.com> -# K.D. Chang <a-kchang@microsoft.com> +# Date: 04/15/98 +# +# Contact: cpxlate@microsoft.com +# # General notes: none # # Format: Three tab-separated columns -# Column #1 is the cp1251_WinCyrillic code (in hex) +# Column #1 is the cp1251 code (in hex) # Column #2 is the Unicode (in hex as 0xXXXX) # Column #3 is the Unicode name (follows a comment sign, '#') # -# The entries are in cp1251_WinCyrillic order +# The entries are in cp1251 order # +################## + +0x20-0x7e idem # -0x20-0x7f idem -# -0x80 U+0402 #CYRILLIC CAPITAL LETTER DJE -0x81 U+0403 #CYRILLIC CAPITAL LETTER GJE -0x82 U+201A #SINGLE LOW-9 QUOTATION MARK -0x83 U+0453 #CYRILLIC SMALL LETTER GJE -0x84 U+201E #DOUBLE LOW-9 QUOTATION MARK -0x85 U+2026 #HORIZONTAL ELLIPSIS -0x86 U+2020 #DAGGER -0x87 U+2021 #DOUBLE DAGGER -#0x88 #UNDEFINED -0x89 U+2030 #PER MILLE SIGN -0x8A U+0409 #CYRILLIC CAPITAL LETTER LJE -0x8B U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK -0x8C U+040A #CYRILLIC CAPITAL LETTER NJE -0x8D U+040C #CYRILLIC CAPITAL LETTER KJE -0x8E U+040B #CYRILLIC CAPITAL LETTER TSHE -0x8F U+040F #CYRILLIC CAPITAL LETTER DZHE -0x90 U+0452 #CYRILLIC SMALL LETTER DJE -0x91 U+2018 #LEFT SINGLE QUOTATION MARK -0x92 U+2019 #RIGHT SINGLE QUOTATION MARK -0x93 U+201C #LEFT DOUBLE QUOTATION MARK -0x94 U+201D #RIGHT DOUBLE QUOTATION MARK -0x95 U+2022 #BULLET -0x96 U+2013 #EN DASH -0x97 U+2014 #EM DASH -#0x98 #UNDEFINED -0x99 U+2122 #TRADE MARK SIGN -0x9A U+0459 #CYRILLIC SMALL LETTER LJE -0x9B U+203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK -0x9C U+045A #CYRILLIC SMALL LETTER NJE -0x9D U+045C #CYRILLIC SMALL LETTER KJE -0x9E U+045B #CYRILLIC SMALL LETTER TSHE -0x9F U+045F #CYRILLIC SMALL LETTER DZHE -0xA0 U+00A0 #NO-BREAK SPACE -0xA1 U+040E #CYRILLIC CAPITAL LETTER SHORT U -0xA2 U+045E #CYRILLIC SMALL LETTER SHORT U -0xA3 U+0408 #CYRILLIC CAPITAL LETTER JE -0xA4 U+00A4 #CURRENCY SIGN -0xA5 U+0490 #CYRILLIC CAPITAL LETTER GHE WITH UPTURN -0xA6 U+00A6 #BROKEN BAR -0xA7 U+00A7 #SECTION SIGN -0xA8 U+0401 #CYRILLIC CAPITAL LETTER IO -0xA9 U+00A9 #COPYRIGHT SIGN -0xAA U+0404 #CYRILLIC CAPITAL LETTER UKRAINIAN IE -0xAB U+00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK -0xAC U+00AC #NOT SIGN -0xAD U+00AD #SOFT HYPHEN -0xAE U+00AE #REGISTERED SIGN -0xAF U+0407 #CYRILLIC CAPITAL LETTER YI -0xB0 U+00B0 #DEGREE SIGN -0xB1 U+00B1 #PLUS-MINUS SIGN -0xB2 U+0406 #CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I -0xB3 U+0456 #CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I -0xB4 U+0491 #CYRILLIC SMALL LETTER GHE WITH UPTURN -0xB5 U+00B5 #MICRO SIGN -0xB6 U+00B6 #PILCROW SIGN -0xB7 U+00B7 #MIDDLE DOT -0xB8 U+0451 #CYRILLIC SMALL LETTER IO -0xB9 U+2116 #NUMERO SIGN -0xBA U+0454 #CYRILLIC SMALL LETTER UKRAINIAN IE -0xBB U+00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -0xBC U+0458 #CYRILLIC SMALL LETTER JE -0xBD U+0405 #CYRILLIC CAPITAL LETTER DZE -0xBE U+0455 #CYRILLIC SMALL LETTER DZE -0xBF U+0457 #CYRILLIC SMALL LETTER YI -0xC0 U+0410 #CYRILLIC CAPITAL LETTER A -0xC1 U+0411 #CYRILLIC CAPITAL LETTER BE -0xC2 U+0412 #CYRILLIC CAPITAL LETTER VE -0xC3 U+0413 #CYRILLIC CAPITAL LETTER GHE -0xC4 U+0414 #CYRILLIC CAPITAL LETTER DE -0xC5 U+0415 #CYRILLIC CAPITAL LETTER IE -0xC6 U+0416 #CYRILLIC CAPITAL LETTER ZHE -0xC7 U+0417 #CYRILLIC CAPITAL LETTER ZE -0xC8 U+0418 #CYRILLIC CAPITAL LETTER I -0xC9 U+0419 #CYRILLIC CAPITAL LETTER SHORT I -0xCA U+041A #CYRILLIC CAPITAL LETTER KA -0xCB U+041B #CYRILLIC CAPITAL LETTER EL -0xCC U+041C #CYRILLIC CAPITAL LETTER EM -0xCD U+041D #CYRILLIC CAPITAL LETTER EN -0xCE U+041E #CYRILLIC CAPITAL LETTER O -0xCF U+041F #CYRILLIC CAPITAL LETTER PE -0xD0 U+0420 #CYRILLIC CAPITAL LETTER ER -0xD1 U+0421 #CYRILLIC CAPITAL LETTER ES -0xD2 U+0422 #CYRILLIC CAPITAL LETTER TE -0xD3 U+0423 #CYRILLIC CAPITAL LETTER U -0xD4 U+0424 #CYRILLIC CAPITAL LETTER EF -0xD5 U+0425 #CYRILLIC CAPITAL LETTER HA -0xD6 U+0426 #CYRILLIC CAPITAL LETTER TSE -0xD7 U+0427 #CYRILLIC CAPITAL LETTER CHE -0xD8 U+0428 #CYRILLIC CAPITAL LETTER SHA -0xD9 U+0429 #CYRILLIC CAPITAL LETTER SHCHA -0xDA U+042A #CYRILLIC CAPITAL LETTER HARD SIGN -0xDB U+042B #CYRILLIC CAPITAL LETTER YERU -0xDC U+042C #CYRILLIC CAPITAL LETTER SOFT SIGN -0xDD U+042D #CYRILLIC CAPITAL LETTER E -0xDE U+042E #CYRILLIC CAPITAL LETTER YU -0xDF U+042F #CYRILLIC CAPITAL LETTER YA -0xE0 U+0430 #CYRILLIC SMALL LETTER A -0xE1 U+0431 #CYRILLIC SMALL LETTER BE -0xE2 U+0432 #CYRILLIC SMALL LETTER VE -0xE3 U+0433 #CYRILLIC SMALL LETTER GHE -0xE4 U+0434 #CYRILLIC SMALL LETTER DE -0xE5 U+0435 #CYRILLIC SMALL LETTER IE -0xE6 U+0436 #CYRILLIC SMALL LETTER ZHE -0xE7 U+0437 #CYRILLIC SMALL LETTER ZE -0xE8 U+0438 #CYRILLIC SMALL LETTER I -0xE9 U+0439 #CYRILLIC SMALL LETTER SHORT I -0xEA U+043A #CYRILLIC SMALL LETTER KA -0xEB U+043B #CYRILLIC SMALL LETTER EL -0xEC U+043C #CYRILLIC SMALL LETTER EM -0xED U+043D #CYRILLIC SMALL LETTER EN -0xEE U+043E #CYRILLIC SMALL LETTER O -0xEF U+043F #CYRILLIC SMALL LETTER PE -0xF0 U+0440 #CYRILLIC SMALL LETTER ER -0xF1 U+0441 #CYRILLIC SMALL LETTER ES -0xF2 U+0442 #CYRILLIC SMALL LETTER TE -0xF3 U+0443 #CYRILLIC SMALL LETTER U -0xF4 U+0444 #CYRILLIC SMALL LETTER EF -0xF5 U+0445 #CYRILLIC SMALL LETTER HA -0xF6 U+0446 #CYRILLIC SMALL LETTER TSE -0xF7 U+0447 #CYRILLIC SMALL LETTER CHE -0xF8 U+0448 #CYRILLIC SMALL LETTER SHA -0xF9 U+0449 #CYRILLIC SMALL LETTER SHCHA -0xFA U+044A #CYRILLIC SMALL LETTER HARD SIGN -0xFB U+044B #CYRILLIC SMALL LETTER YERU -0xFC U+044C #CYRILLIC SMALL LETTER SOFT SIGN -0xFD U+044D #CYRILLIC SMALL LETTER E -0xFE U+044E #CYRILLIC SMALL LETTER YU -0xFF U+044F #CYRILLIC SMALL LETTER YA +0x80 U+0402 #CYRILLIC CAPITAL LETTER DJE +0x81 U+0403 #CYRILLIC CAPITAL LETTER GJE +0x82 U+201A #SINGLE LOW-9 QUOTATION MARK +0x83 U+0453 #CYRILLIC SMALL LETTER GJE +0x84 U+201E #DOUBLE LOW-9 QUOTATION MARK +0x85 U+2026 #HORIZONTAL ELLIPSIS +0x86 U+2020 #DAGGER +0x87 U+2021 #DOUBLE DAGGER +0x88 U+20AC #EURO SIGN +0x89 U+2030 #PER MILLE SIGN +0x8A U+0409 #CYRILLIC CAPITAL LETTER LJE +0x8B U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK +0x8C U+040A #CYRILLIC CAPITAL LETTER NJE +0x8D U+040C #CYRILLIC CAPITAL LETTER KJE +0x8E U+040B #CYRILLIC CAPITAL LETTER TSHE +0x8F U+040F #CYRILLIC CAPITAL LETTER DZHE +0x90 U+0452 #CYRILLIC SMALL LETTER DJE +0x91 U+2018 #LEFT SINGLE QUOTATION MARK +0x92 U+2019 #RIGHT SINGLE QUOTATION MARK +0x93 U+201C #LEFT DOUBLE QUOTATION MARK +0x94 U+201D #RIGHT DOUBLE QUOTATION MARK +0x95 U+2022 #BULLET +0x96 U+2013 #EN DASH +0x97 U+2014 #EM DASH +0x98 #UNDEFINED +0x99 U+2122 #TRADE MARK SIGN +0x9A U+0459 #CYRILLIC SMALL LETTER LJE +0x9B U+203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +0x9C U+045A #CYRILLIC SMALL LETTER NJE +0x9D U+045C #CYRILLIC SMALL LETTER KJE +0x9E U+045B #CYRILLIC SMALL LETTER TSHE +0x9F U+045F #CYRILLIC SMALL LETTER DZHE +0xA0 U+00A0 #NO-BREAK SPACE +0xA1 U+040E #CYRILLIC CAPITAL LETTER SHORT U +0xA2 U+045E #CYRILLIC SMALL LETTER SHORT U +0xA3 U+0408 #CYRILLIC CAPITAL LETTER JE +0xA4 U+00A4 #CURRENCY SIGN +0xA5 U+0490 #CYRILLIC CAPITAL LETTER GHE WITH UPTURN +0xA6 U+00A6 #BROKEN BAR +0xA7 U+00A7 #SECTION SIGN +0xA8 U+0401 #CYRILLIC CAPITAL LETTER IO +0xA9 U+00A9 #COPYRIGHT SIGN +0xAA U+0404 #CYRILLIC CAPITAL LETTER UKRAINIAN IE +0xAB U+00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC U+00AC #NOT SIGN +0xAD U+00AD #SOFT HYPHEN +0xAE U+00AE #REGISTERED SIGN +0xAF U+0407 #CYRILLIC CAPITAL LETTER YI +0xB0 U+00B0 #DEGREE SIGN +0xB1 U+00B1 #PLUS-MINUS SIGN +0xB2 U+0406 #CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +0xB3 U+0456 #CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +0xB4 U+0491 #CYRILLIC SMALL LETTER GHE WITH UPTURN +0xB5 U+00B5 #MICRO SIGN +0xB6 U+00B6 #PILCROW SIGN +0xB7 U+00B7 #MIDDLE DOT +0xB8 U+0451 #CYRILLIC SMALL LETTER IO +0xB9 U+2116 #NUMERO SIGN +0xBA U+0454 #CYRILLIC SMALL LETTER UKRAINIAN IE +0xBB U+00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC U+0458 #CYRILLIC SMALL LETTER JE +0xBD U+0405 #CYRILLIC CAPITAL LETTER DZE +0xBE U+0455 #CYRILLIC SMALL LETTER DZE +0xBF U+0457 #CYRILLIC SMALL LETTER YI +0xC0 U+0410 #CYRILLIC CAPITAL LETTER A +0xC1 U+0411 #CYRILLIC CAPITAL LETTER BE +0xC2 U+0412 #CYRILLIC CAPITAL LETTER VE +0xC3 U+0413 #CYRILLIC CAPITAL LETTER GHE +0xC4 U+0414 #CYRILLIC CAPITAL LETTER DE +0xC5 U+0415 #CYRILLIC CAPITAL LETTER IE +0xC6 U+0416 #CYRILLIC CAPITAL LETTER ZHE +0xC7 U+0417 #CYRILLIC CAPITAL LETTER ZE +0xC8 U+0418 #CYRILLIC CAPITAL LETTER I +0xC9 U+0419 #CYRILLIC CAPITAL LETTER SHORT I +0xCA U+041A #CYRILLIC CAPITAL LETTER KA +0xCB U+041B #CYRILLIC CAPITAL LETTER EL +0xCC U+041C #CYRILLIC CAPITAL LETTER EM +0xCD U+041D #CYRILLIC CAPITAL LETTER EN +0xCE U+041E #CYRILLIC CAPITAL LETTER O +0xCF U+041F #CYRILLIC CAPITAL LETTER PE +0xD0 U+0420 #CYRILLIC CAPITAL LETTER ER +0xD1 U+0421 #CYRILLIC CAPITAL LETTER ES +0xD2 U+0422 #CYRILLIC CAPITAL LETTER TE +0xD3 U+0423 #CYRILLIC CAPITAL LETTER U +0xD4 U+0424 #CYRILLIC CAPITAL LETTER EF +0xD5 U+0425 #CYRILLIC CAPITAL LETTER HA +0xD6 U+0426 #CYRILLIC CAPITAL LETTER TSE +0xD7 U+0427 #CYRILLIC CAPITAL LETTER CHE +0xD8 U+0428 #CYRILLIC CAPITAL LETTER SHA +0xD9 U+0429 #CYRILLIC CAPITAL LETTER SHCHA +0xDA U+042A #CYRILLIC CAPITAL LETTER HARD SIGN +0xDB U+042B #CYRILLIC CAPITAL LETTER YERU +0xDC U+042C #CYRILLIC CAPITAL LETTER SOFT SIGN +0xDD U+042D #CYRILLIC CAPITAL LETTER E +0xDE U+042E #CYRILLIC CAPITAL LETTER YU +0xDF U+042F #CYRILLIC CAPITAL LETTER YA +0xE0 U+0430 #CYRILLIC SMALL LETTER A +0xE1 U+0431 #CYRILLIC SMALL LETTER BE +0xE2 U+0432 #CYRILLIC SMALL LETTER VE +0xE3 U+0433 #CYRILLIC SMALL LETTER GHE +0xE4 U+0434 #CYRILLIC SMALL LETTER DE +0xE5 U+0435 #CYRILLIC SMALL LETTER IE +0xE6 U+0436 #CYRILLIC SMALL LETTER ZHE +0xE7 U+0437 #CYRILLIC SMALL LETTER ZE +0xE8 U+0438 #CYRILLIC SMALL LETTER I +0xE9 U+0439 #CYRILLIC SMALL LETTER SHORT I +0xEA U+043A #CYRILLIC SMALL LETTER KA +0xEB U+043B #CYRILLIC SMALL LETTER EL +0xEC U+043C #CYRILLIC SMALL LETTER EM +0xED U+043D #CYRILLIC SMALL LETTER EN +0xEE U+043E #CYRILLIC SMALL LETTER O +0xEF U+043F #CYRILLIC SMALL LETTER PE +0xF0 U+0440 #CYRILLIC SMALL LETTER ER +0xF1 U+0441 #CYRILLIC SMALL LETTER ES +0xF2 U+0442 #CYRILLIC SMALL LETTER TE +0xF3 U+0443 #CYRILLIC SMALL LETTER U +0xF4 U+0444 #CYRILLIC SMALL LETTER EF +0xF5 U+0445 #CYRILLIC SMALL LETTER HA +0xF6 U+0446 #CYRILLIC SMALL LETTER TSE +0xF7 U+0447 #CYRILLIC SMALL LETTER CHE +0xF8 U+0448 #CYRILLIC SMALL LETTER SHA +0xF9 U+0449 #CYRILLIC SMALL LETTER SHCHA +0xFA U+044A #CYRILLIC SMALL LETTER HARD SIGN +0xFB U+044B #CYRILLIC SMALL LETTER YERU +0xFC U+044C #CYRILLIC SMALL LETTER SOFT SIGN +0xFD U+044D #CYRILLIC SMALL LETTER E +0xFE U+044E #CYRILLIC SMALL LETTER YU +0xFF U+044F #CYRILLIC SMALL LETTER YA diff --git a/src/chrtrans/cp1252_uni.tbl b/src/chrtrans/cp1252_uni.tbl index d8adb2fa..09ee9b6a 100644 --- a/src/chrtrans/cp1252_uni.tbl +++ b/src/chrtrans/cp1252_uni.tbl @@ -6,280 +6,155 @@ D0 Mwindows-1252 #Name as a Display Charset (used on Options screen) -OWinLatin1 (cp1252) +OWestern (windows-1252) # -# Name: cp1252_WinLatin1 to Unicode table -# Unicode version: 1.1 -# Table version: 1.1 +# Name: cp1252 to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 # Table format: Format A -# Date: 03/31/95 -# Authors: Michel Suignard <michelsu@microsoft.com> -# Lori Hoerth <lorih@microsoft.com> +# Date: 04/15/98 +# +# Contact: cpxlate@microsoft.com +# # General notes: none # # Format: Three tab-separated columns -# Column #1 is the cp1252_WinLatin1 code (in hex) -# Column #2 is the Unicode (in hex as U+XXXX) +# Column #1 is the cp1252 code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) # Column #3 is the Unicode name (follows a comment sign, '#') # -# The entries are in cp1252_WinLatin1 order +# The entries are in cp1252 order # -#0x00 U+0000 #Null -#0x01 U+0001 #START OF HEADING -#0x02 U+0002 #START OF TEXT -#0x03 U+0003 #END OF TEXT -#0x04 U+0004 #END OF TRANSMISSION -#0x05 U+0005 #ENQUIRY -#0x06 U+0006 #ACKNOWLEDGE -#0x07 U+0007 #BELL -#0x08 U+0008 #BACKSPACE -#0x09 U+0009 #HORIZONTAL TABULATION -#0x0a U+000a #LINE FEED -#0x0b U+000b #VERTICAL TABULATION -#0x0c U+000c #FORM FEED -#0x0d U+000d #CARRIAGE RETURN -#0x0e U+000e #SHIFT OUT -#0x0f U+000f #SHIFT IN -#0x10 U+0010 #DATA LINK ESCAPE -#0x11 U+0011 #DEVICE CONTROL ONE -#0x12 U+0012 #DEVICE CONTROL TWO -#0x13 U+0013 #DEVICE CONTROL THREE -#0x14 U+0014 #DEVICE CONTROL FOUR -#0x15 U+0015 #NEGATIVE ACKNOWLEDGE -#0x16 U+0016 #SYNCHRONOUS IDLE -#0x17 U+0017 #END OF TRANSMISSION BLOCK -#0x18 U+0018 #CANCEL -#0x19 U+0019 #END OF MEDIUM -#0x1a U+001a #SUBSTITUTE -#0x1b U+001b #ESCAPE -#0x1c U+001c #FILE SEPARATOR -#0x1d U+001d #GROUP SEPARATOR -#0x1e U+001e #RECORD SEPARATOR -#0x1f U+001f #UNIT SEPARATOR -0x20 U+0020 #SPACE -0x21 U+0021 #EXCLAMATION MARK -0x22 U+0022 #QUOTATION MARK -0x23 U+0023 #NUMBER SIGN -0x24 U+0024 #DOLLAR SIGN -0x25 U+0025 #PERCENT SIGN -0x26 U+0026 #AMPERSAND -0x27 U+0027 #APOSTROPHE -0x28 U+0028 #LEFT PARENTHESIS -0x29 U+0029 #RIGHT PARENTHESIS -0x2a U+002a #ASTERISK -0x2b U+002b #PLUS SIGN -0x2c U+002c #COMMA -0x2d U+002d #HYPHEN-MINUS -0x2e U+002e #FULL STOP -0x2f U+002f #SOLIDUS -0x30 U+0030 #DIGIT ZERO -0x31 U+0031 #DIGIT ONE -0x32 U+0032 #DIGIT TWO -0x33 U+0033 #DIGIT THREE -0x34 U+0034 #DIGIT FOUR -0x35 U+0035 #DIGIT FIVE -0x36 U+0036 #DIGIT SIX -0x37 U+0037 #DIGIT SEVEN -0x38 U+0038 #DIGIT EIGHT -0x39 U+0039 #DIGIT NINE -0x3a U+003a #COLON -0x3b U+003b #SEMICOLON -0x3c U+003c #LESS-THAN SIGN -0x3d U+003d #EQUALS SIGN -0x3e U+003e #GREATER-THAN SIGN -0x3f U+003f #QUESTION MARK -0x40 U+0040 #COMMERCIAL AT -0x41 U+0041 #LATIN CAPITAL LETTER A -0x42 U+0042 #LATIN CAPITAL LETTER B -0x43 U+0043 #LATIN CAPITAL LETTER C -0x44 U+0044 #LATIN CAPITAL LETTER D -0x45 U+0045 #LATIN CAPITAL LETTER E -0x46 U+0046 #LATIN CAPITAL LETTER F -0x47 U+0047 #LATIN CAPITAL LETTER G -0x48 U+0048 #LATIN CAPITAL LETTER H -0x49 U+0049 #LATIN CAPITAL LETTER I -0x4a U+004a #LATIN CAPITAL LETTER J -0x4b U+004b #LATIN CAPITAL LETTER K -0x4c U+004c #LATIN CAPITAL LETTER L -0x4d U+004d #LATIN CAPITAL LETTER M -0x4e U+004e #LATIN CAPITAL LETTER N -0x4f U+004f #LATIN CAPITAL LETTER O -0x50 U+0050 #LATIN CAPITAL LETTER P -0x51 U+0051 #LATIN CAPITAL LETTER Q -0x52 U+0052 #LATIN CAPITAL LETTER R -0x53 U+0053 #LATIN CAPITAL LETTER S -0x54 U+0054 #LATIN CAPITAL LETTER T -0x55 U+0055 #LATIN CAPITAL LETTER U -0x56 U+0056 #LATIN CAPITAL LETTER V -0x57 U+0057 #LATIN CAPITAL LETTER W -0x58 U+0058 #LATIN CAPITAL LETTER X -0x59 U+0059 #LATIN CAPITAL LETTER Y -0x5a U+005a #LATIN CAPITAL LETTER Z -0x5b U+005b #LEFT SQUARE BRACKET -0x5c U+005c #REVERSE SOLIDUS -0x5d U+005d #RIGHT SQUARE BRACKET -0x5e U+005e #CIRCUMFLEX ACCENT -0x5f U+005f #LOW LINE -0x60 U+0060 #GRAVE ACCENT -0x61 U+0061 #LATIN SMALL LETTER A -0x62 U+0062 #LATIN SMALL LETTER B -0x63 U+0063 #LATIN SMALL LETTER C -0x64 U+0064 #LATIN SMALL LETTER D -0x65 U+0065 #LATIN SMALL LETTER E -0x66 U+0066 #LATIN SMALL LETTER F -0x67 U+0067 #LATIN SMALL LETTER G -0x68 U+0068 #LATIN SMALL LETTER H -0x69 U+0069 #LATIN SMALL LETTER I -0x6a U+006a #LATIN SMALL LETTER J -0x6b U+006b #LATIN SMALL LETTER K -0x6c U+006c #LATIN SMALL LETTER L -0x6d U+006d #LATIN SMALL LETTER M -0x6e U+006e #LATIN SMALL LETTER N -0x6f U+006f #LATIN SMALL LETTER O -0x70 U+0070 #LATIN SMALL LETTER P -0x71 U+0071 #LATIN SMALL LETTER Q -0x72 U+0072 #LATIN SMALL LETTER R -0x73 U+0073 #LATIN SMALL LETTER S -0x74 U+0074 #LATIN SMALL LETTER T -0x75 U+0075 #LATIN SMALL LETTER U -0x76 U+0076 #LATIN SMALL LETTER V -0x77 U+0077 #LATIN SMALL LETTER W -0x78 U+0078 #LATIN SMALL LETTER X -0x79 U+0079 #LATIN SMALL LETTER Y -0x7a U+007a #LATIN SMALL LETTER Z -0x7b U+007b #LEFT CURLY BRACKET -0x7c U+007c #VERTICAL LINE -0x7d U+007d #RIGHT CURLY BRACKET -0x7e U+007e #TILDE -0x7f U+007f #DELETE -#0x80 U+0080 #NOT USED -#0x81 U+0081 #NOT USED -0x82 U+201a #SINGLE LOW-9 QUOTATION MARK -0x83 U+0192 #LATIN SMALL LETTER F WITH HOOK -0x84 U+201e #DOUBLE LOW-9 QUOTATION MARK -0x85 U+2026 #HORIZONTAL ELLIPSIS -0x86 U+2020 #DAGGER -0x87 U+2021 #DOUBLE DAGGER -0x88 U+02c6 #MODIFIER LETTER CIRCUMFLEX ACCENT -0x89 U+2030 #PER MILLE SIGN -0x8a U+0160 #LATIN CAPITAL LETTER S WITH CARON -0x8b U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK -0x8c U+0152 #LATIN CAPITAL LIGATURE OE -#0x8d U+008d #NOT USED -#0x8e U+008e #NOT USED -#0x8f U+008f #NOT USED -#0x90 U+0090 #NOT USED -0x91 U+2018 #LEFT SINGLE QUOTATION MARK -0x92 U+2019 #RIGHT SINGLE QUOTATION MARK -0x93 U+201c #LEFT DOUBLE QUOTATION MARK -0x94 U+201d #RIGHT DOUBLE QUOTATION MARK -0x95 U+2022 #BULLET -0x96 U+2013 #EN DASH -0x97 U+2014 #EM DASH -0x98 U+02dc #SMALL TILDE -0x99 U+2122 #TRADE MARK SIGN -0x9a U+0161 #LATIN SMALL LETTER S WITH CARON -0x9b U+203a #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK -0x9c U+0153 #LATIN SMALL LIGATURE OE -#0x9d U+009d #NOT USED -#0x9e U+009e #NOT USED -0x9f U+0178 #LATIN CAPITAL LETTER Y WITH DIAERESIS -0xa0 U+00a0 #NO-BREAK SPACE -0xa1 U+00a1 #INVERTED EXCLAMATION MARK -0xa2 U+00a2 #CENT SIGN -0xa3 U+00a3 #POUND SIGN -0xa4 U+00a4 #CURRENCY SIGN -0xa5 U+00a5 #YEN SIGN -0xa6 U+00a6 #BROKEN BAR -0xa7 U+00a7 #SECTION SIGN -0xa8 U+00a8 #DIAERESIS -0xa9 U+00a9 #COPYRIGHT SIGN -0xaa U+00aa #FEMININE ORDINAL INDICATOR -0xab U+00ab #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK -0xac U+00ac #NOT SIGN -0xad U+00ad #SOFT HYPHEN -0xae U+00ae #REGISTERED SIGN -0xaf U+00af #MACRON -0xb0 U+00b0 #DEGREE SIGN -0xb1 U+00b1 #PLUS-MINUS SIGN -0xb2 U+00b2 #SUPERSCRIPT TWO -0xb3 U+00b3 #SUPERSCRIPT THREE -0xb4 U+00b4 #ACUTE ACCENT -0xb5 U+00b5 #MICRO SIGN -0xb6 U+00b6 #PILCROW SIGN -0xb7 U+00b7 #MIDDLE DOT -0xb8 U+00b8 #CEDILLA -0xb9 U+00b9 #SUPERSCRIPT ONE -0xba U+00ba #MASCULINE ORDINAL INDICATOR -0xbb U+00bb #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -0xbc U+00bc #VULGAR FRACTION ONE QUARTER -0xbd U+00bd #VULGAR FRACTION ONE HALF -0xbe U+00be #VULGAR FRACTION THREE QUARTERS -0xbf U+00bf #INVERTED QUESTION MARK -0xc0 U+00c0 #LATIN CAPITAL LETTER A WITH GRAVE -0xc1 U+00c1 #LATIN CAPITAL LETTER A WITH ACUTE -0xc2 U+00c2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX -0xc3 U+00c3 #LATIN CAPITAL LETTER A WITH TILDE -0xc4 U+00c4 #LATIN CAPITAL LETTER A WITH DIAERESIS -0xc5 U+00c5 #LATIN CAPITAL LETTER A WITH RING ABOVE -0xc6 U+00c6 #LATIN CAPITAL LIGATURE AE -0xc7 U+00c7 #LATIN CAPITAL LETTER C WITH CEDILLA -0xc8 U+00c8 #LATIN CAPITAL LETTER E WITH GRAVE -0xc9 U+00c9 #LATIN CAPITAL LETTER E WITH ACUTE -0xca U+00ca #LATIN CAPITAL LETTER E WITH CIRCUMFLEX -0xcb U+00cb #LATIN CAPITAL LETTER E WITH DIAERESIS -0xcc U+00cc #LATIN CAPITAL LETTER I WITH GRAVE -0xcd U+00cd #LATIN CAPITAL LETTER I WITH ACUTE -0xce U+00ce #LATIN CAPITAL LETTER I WITH CIRCUMFLEX -0xcf U+00cf #LATIN CAPITAL LETTER I WITH DIAERESIS -0xd0 U+00d0 #LATIN CAPITAL LETTER ETH -0xd1 U+00d1 #LATIN CAPITAL LETTER N WITH TILDE -0xd2 U+00d2 #LATIN CAPITAL LETTER O WITH GRAVE -0xd3 U+00d3 #LATIN CAPITAL LETTER O WITH ACUTE -0xd4 U+00d4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX -0xd5 U+00d5 #LATIN CAPITAL LETTER O WITH TILDE -0xd6 U+00d6 #LATIN CAPITAL LETTER O WITH DIAERESIS -0xd7 U+00d7 #MULTIPLICATION SIGN -0xd8 U+00d8 #LATIN CAPITAL LETTER O WITH STROKE -0xd9 U+00d9 #LATIN CAPITAL LETTER U WITH GRAVE -0xda U+00da #LATIN CAPITAL LETTER U WITH ACUTE -0xdb U+00db #LATIN CAPITAL LETTER U WITH CIRCUMFLEX -0xdc U+00dc #LATIN CAPITAL LETTER U WITH DIAERESIS -0xdd U+00dd #LATIN CAPITAL LETTER Y WITH ACUTE -0xde U+00de #LATIN CAPITAL LETTER THORN -0xdf U+00df #LATIN SMALL LETTER SHARP S -0xe0 U+00e0 #LATIN SMALL LETTER A WITH GRAVE -0xe1 U+00e1 #LATIN SMALL LETTER A WITH ACUTE -0xe2 U+00e2 #LATIN SMALL LETTER A WITH CIRCUMFLEX -0xe3 U+00e3 #LATIN SMALL LETTER A WITH TILDE -0xe4 U+00e4 #LATIN SMALL LETTER A WITH DIAERESIS -0xe5 U+00e5 #LATIN SMALL LETTER A WITH RING ABOVE -0xe6 U+00e6 #LATIN SMALL LIGATURE AE -0xe7 U+00e7 #LATIN SMALL LETTER C WITH CEDILLA -0xe8 U+00e8 #LATIN SMALL LETTER E WITH GRAVE -0xe9 U+00e9 #LATIN SMALL LETTER E WITH ACUTE -0xea U+00ea #LATIN SMALL LETTER E WITH CIRCUMFLEX -0xeb U+00eb #LATIN SMALL LETTER E WITH DIAERESIS -0xec U+00ec #LATIN SMALL LETTER I WITH GRAVE -0xed U+00ed #LATIN SMALL LETTER I WITH ACUTE -0xee U+00ee #LATIN SMALL LETTER I WITH CIRCUMFLEX -0xef U+00ef #LATIN SMALL LETTER I WITH DIAERESIS -0xf0 U+00f0 #LATIN SMALL LETTER ETH -0xf1 U+00f1 #LATIN SMALL LETTER N WITH TILDE -0xf2 U+00f2 #LATIN SMALL LETTER O WITH GRAVE -0xf3 U+00f3 #LATIN SMALL LETTER O WITH ACUTE -0xf4 U+00f4 #LATIN SMALL LETTER O WITH CIRCUMFLEX -0xf5 U+00f5 #LATIN SMALL LETTER O WITH TILDE -0xf6 U+00f6 #LATIN SMALL LETTER O WITH DIAERESIS -0xf7 U+00f7 #DIVISION SIGN -0xf8 U+00f8 #LATIN SMALL LETTER O WITH STROKE -0xf9 U+00f9 #LATIN SMALL LETTER U WITH GRAVE -0xfa U+00fa #LATIN SMALL LETTER U WITH ACUTE -0xfb U+00fb #LATIN SMALL LETTER U WITH CIRCUMFLEX -0xfc U+00fc #LATIN SMALL LETTER U WITH DIAERESIS -0xfd U+00fd #LATIN SMALL LETTER Y WITH ACUTE -0xfe U+00fe #LATIN SMALL LETTER THORN -0xff U+00ff #LATIN SMALL LETTER Y WITH DIAERESIS - +################## +0x20-0x7e idem +# +0x80 U+20AC #EURO SIGN +0x81 #UNDEFINED +0x82 U+201A #SINGLE LOW-9 QUOTATION MARK +0x83 U+0192 #LATIN SMALL LETTER F WITH HOOK +0x84 U+201E #DOUBLE LOW-9 QUOTATION MARK +0x85 U+2026 #HORIZONTAL ELLIPSIS +0x86 U+2020 #DAGGER +0x87 U+2021 #DOUBLE DAGGER +0x88 U+02C6 #MODIFIER LETTER CIRCUMFLEX ACCENT +0x89 U+2030 #PER MILLE SIGN +0x8A U+0160 #LATIN CAPITAL LETTER S WITH CARON +0x8B U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK +0x8C U+0152 #LATIN CAPITAL LIGATURE OE +0x8D #UNDEFINED +0x8E U+017D #LATIN CAPITAL LETTER Z WITH CARON +0x8F #UNDEFINED +0x90 #UNDEFINED +0x91 U+2018 #LEFT SINGLE QUOTATION MARK +0x92 U+2019 #RIGHT SINGLE QUOTATION MARK +0x93 U+201C #LEFT DOUBLE QUOTATION MARK +0x94 U+201D #RIGHT DOUBLE QUOTATION MARK +0x95 U+2022 #BULLET +0x96 U+2013 #EN DASH +0x97 U+2014 #EM DASH +0x98 U+02DC #SMALL TILDE +0x99 U+2122 #TRADE MARK SIGN +0x9A U+0161 #LATIN SMALL LETTER S WITH CARON +0x9B U+203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +0x9C U+0153 #LATIN SMALL LIGATURE OE +0x9D #UNDEFINED +0x9E U+017E #LATIN SMALL LETTER Z WITH CARON +0x9F U+0178 #LATIN CAPITAL LETTER Y WITH DIAERESIS +0xA0 U+00A0 #NO-BREAK SPACE +0xA1 U+00A1 #INVERTED EXCLAMATION MARK +0xA2 U+00A2 #CENT SIGN +0xA3 U+00A3 #POUND SIGN +0xA4 U+00A4 #CURRENCY SIGN +0xA5 U+00A5 #YEN SIGN +0xA6 U+00A6 #BROKEN BAR +0xA7 U+00A7 #SECTION SIGN +0xA8 U+00A8 #DIAERESIS +0xA9 U+00A9 #COPYRIGHT SIGN +0xAA U+00AA #FEMININE ORDINAL INDICATOR +0xAB U+00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC U+00AC #NOT SIGN +0xAD U+00AD #SOFT HYPHEN +0xAE U+00AE #REGISTERED SIGN +0xAF U+00AF #MACRON +0xB0 U+00B0 #DEGREE SIGN +0xB1 U+00B1 #PLUS-MINUS SIGN +0xB2 U+00B2 #SUPERSCRIPT TWO +0xB3 U+00B3 #SUPERSCRIPT THREE +0xB4 U+00B4 #ACUTE ACCENT +0xB5 U+00B5 #MICRO SIGN +0xB6 U+00B6 #PILCROW SIGN +0xB7 U+00B7 #MIDDLE DOT +0xB8 U+00B8 #CEDILLA +0xB9 U+00B9 #SUPERSCRIPT ONE +0xBA U+00BA #MASCULINE ORDINAL INDICATOR +0xBB U+00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC U+00BC #VULGAR FRACTION ONE QUARTER +0xBD U+00BD #VULGAR FRACTION ONE HALF +0xBE U+00BE #VULGAR FRACTION THREE QUARTERS +0xBF U+00BF #INVERTED QUESTION MARK +0xC0 U+00C0 #LATIN CAPITAL LETTER A WITH GRAVE +0xC1 U+00C1 #LATIN CAPITAL LETTER A WITH ACUTE +0xC2 U+00C2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 U+00C3 #LATIN CAPITAL LETTER A WITH TILDE +0xC4 U+00C4 #LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 U+00C5 #LATIN CAPITAL LETTER A WITH RING ABOVE +0xC6 U+00C6 #LATIN CAPITAL LETTER AE +0xC7 U+00C7 #LATIN CAPITAL LETTER C WITH CEDILLA +0xC8 U+00C8 #LATIN CAPITAL LETTER E WITH GRAVE +0xC9 U+00C9 #LATIN CAPITAL LETTER E WITH ACUTE +0xCA U+00CA #LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xCB U+00CB #LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC U+00CC #LATIN CAPITAL LETTER I WITH GRAVE +0xCD U+00CD #LATIN CAPITAL LETTER I WITH ACUTE +0xCE U+00CE #LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF U+00CF #LATIN CAPITAL LETTER I WITH DIAERESIS +0xD0 U+00D0 #LATIN CAPITAL LETTER ETH +0xD1 U+00D1 #LATIN CAPITAL LETTER N WITH TILDE +0xD2 U+00D2 #LATIN CAPITAL LETTER O WITH GRAVE +0xD3 U+00D3 #LATIN CAPITAL LETTER O WITH ACUTE +0xD4 U+00D4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 U+00D5 #LATIN CAPITAL LETTER O WITH TILDE +0xD6 U+00D6 #LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 U+00D7 #MULTIPLICATION SIGN +0xD8 U+00D8 #LATIN CAPITAL LETTER O WITH STROKE +0xD9 U+00D9 #LATIN CAPITAL LETTER U WITH GRAVE +0xDA U+00DA #LATIN CAPITAL LETTER U WITH ACUTE +0xDB U+00DB #LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xDC U+00DC #LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD U+00DD #LATIN CAPITAL LETTER Y WITH ACUTE +0xDE U+00DE #LATIN CAPITAL LETTER THORN +0xDF U+00DF #LATIN SMALL LETTER SHARP S +0xE0 U+00E0 #LATIN SMALL LETTER A WITH GRAVE +0xE1 U+00E1 #LATIN SMALL LETTER A WITH ACUTE +0xE2 U+00E2 #LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 U+00E3 #LATIN SMALL LETTER A WITH TILDE +0xE4 U+00E4 #LATIN SMALL LETTER A WITH DIAERESIS +0xE5 U+00E5 #LATIN SMALL LETTER A WITH RING ABOVE +0xE6 U+00E6 #LATIN SMALL LETTER AE +0xE7 U+00E7 #LATIN SMALL LETTER C WITH CEDILLA +0xE8 U+00E8 #LATIN SMALL LETTER E WITH GRAVE +0xE9 U+00E9 #LATIN SMALL LETTER E WITH ACUTE +0xEA U+00EA #LATIN SMALL LETTER E WITH CIRCUMFLEX +0xEB U+00EB #LATIN SMALL LETTER E WITH DIAERESIS +0xEC U+00EC #LATIN SMALL LETTER I WITH GRAVE +0xED U+00ED #LATIN SMALL LETTER I WITH ACUTE +0xEE U+00EE #LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF U+00EF #LATIN SMALL LETTER I WITH DIAERESIS +0xF0 U+00F0 #LATIN SMALL LETTER ETH +0xF1 U+00F1 #LATIN SMALL LETTER N WITH TILDE +0xF2 U+00F2 #LATIN SMALL LETTER O WITH GRAVE +0xF3 U+00F3 #LATIN SMALL LETTER O WITH ACUTE +0xF4 U+00F4 #LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 U+00F5 #LATIN SMALL LETTER O WITH TILDE +0xF6 U+00F6 #LATIN SMALL LETTER O WITH DIAERESIS +0xF7 U+00F7 #DIVISION SIGN +0xF8 U+00F8 #LATIN SMALL LETTER O WITH STROKE +0xF9 U+00F9 #LATIN SMALL LETTER U WITH GRAVE +0xFA U+00FA #LATIN SMALL LETTER U WITH ACUTE +0xFB U+00FB #LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC U+00FC #LATIN SMALL LETTER U WITH DIAERESIS +0xFD U+00FD #LATIN SMALL LETTER Y WITH ACUTE +0xFE U+00FE #LATIN SMALL LETTER THORN +0xFF U+00FF #LATIN SMALL LETTER Y WITH DIAERESIS diff --git a/src/chrtrans/cp1253_uni.tbl b/src/chrtrans/cp1253_uni.tbl index 097dae2b..b53e44a2 100644 --- a/src/chrtrans/cp1253_uni.tbl +++ b/src/chrtrans/cp1253_uni.tbl @@ -2,45 +2,47 @@ Mwindows-1253 #Name as a Display Charset (used on Options screen) -OWinGreek (cp1253) +OGreek (windows-1253) -# Name: cp1253_WinGreek to Unicode table +# +# Name: cp1253 to Unicode table # Unicode version: 2.0 # Table version: 2.00 # Table format: Format A -# Date: 04/24/96 -# Authors: Lori Brownell <loribr@microsoft.com> -# K.D. Chang <a-kchang@microsoft.com> +# Date: 04/15/98 +# +# Contact: cpxlate@microsoft.com +# # General notes: none # # Format: Three tab-separated columns -# Column #1 is the cp1253_WinGreek code (in hex) +# Column #1 is the cp1253 code (in hex) # Column #2 is the Unicode (in hex as 0xXXXX) # Column #3 is the Unicode name (follows a comment sign, '#') # -# The entries are in cp1253_WinGreek order +# The entries are in cp1253 order # ################## -0x20-0x7f idem +0x20-0x7e idem # -#0x80 #UNDEFINED -#0x81 #UNDEFINED +0x80 U+20AC #EURO SIGN +0x81 #UNDEFINED 0x82 U+201A #SINGLE LOW-9 QUOTATION MARK 0x83 U+0192 #LATIN SMALL LETTER F WITH HOOK 0x84 U+201E #DOUBLE LOW-9 QUOTATION MARK 0x85 U+2026 #HORIZONTAL ELLIPSIS 0x86 U+2020 #DAGGER 0x87 U+2021 #DOUBLE DAGGER -#0x88 #UNDEFINED +0x88 #UNDEFINED 0x89 U+2030 #PER MILLE SIGN -#0x8A #UNDEFINED +0x8A #UNDEFINED 0x8B U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK -#0x8C #UNDEFINED -#0x8D #UNDEFINED -#0x8E #UNDEFINED -#0x8F #UNDEFINED -#0x90 #UNDEFINED +0x8C #UNDEFINED +0x8D #UNDEFINED +0x8E #UNDEFINED +0x8F #UNDEFINED +0x90 #UNDEFINED 0x91 U+2018 #LEFT SINGLE QUOTATION MARK 0x92 U+2019 #RIGHT SINGLE QUOTATION MARK 0x93 U+201C #LEFT DOUBLE QUOTATION MARK @@ -48,14 +50,14 @@ OWinGreek (cp1253) 0x95 U+2022 #BULLET 0x96 U+2013 #EN DASH 0x97 U+2014 #EM DASH -#0x98 #UNDEFINED +0x98 #UNDEFINED 0x99 U+2122 #TRADE MARK SIGN -#0x9A #UNDEFINED +0x9A #UNDEFINED 0x9B U+203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK -#0x9C #UNDEFINED -#0x9D #UNDEFINED -#0x9E #UNDEFINED -#0x9F #UNDEFINED +0x9C #UNDEFINED +0x9D #UNDEFINED +0x9E #UNDEFINED +0x9F #UNDEFINED 0xA0 U+00A0 #NO-BREAK SPACE 0xA1 U+0385 #GREEK DIALYTIKA TONOS 0xA2 U+0386 #GREEK CAPITAL LETTER ALPHA WITH TONOS @@ -66,7 +68,7 @@ OWinGreek (cp1253) 0xA7 U+00A7 #SECTION SIGN 0xA8 U+00A8 #DIAERESIS 0xA9 U+00A9 #COPYRIGHT SIGN -#0xAA #UNDEFINED +0xAA #UNDEFINED 0xAB U+00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0xAC U+00AC #NOT SIGN 0xAD U+00AD #SOFT HYPHEN @@ -106,7 +108,7 @@ OWinGreek (cp1253) 0xCF U+039F #GREEK CAPITAL LETTER OMICRON 0xD0 U+03A0 #GREEK CAPITAL LETTER PI 0xD1 U+03A1 #GREEK CAPITAL LETTER RHO -#0xD2 #UNDEFINED +0xD2 #UNDEFINED 0xD3 U+03A3 #GREEK CAPITAL LETTER SIGMA 0xD4 U+03A4 #GREEK CAPITAL LETTER TAU 0xD5 U+03A5 #GREEK CAPITAL LETTER UPSILON @@ -151,4 +153,4 @@ OWinGreek (cp1253) 0xFC U+03CC #GREEK SMALL LETTER OMICRON WITH TONOS 0xFD U+03CD #GREEK SMALL LETTER UPSILON WITH TONOS 0xFE U+03CE #GREEK SMALL LETTER OMEGA WITH TONOS -#0xFF #UNDEFINED +0xFF #UNDEFINED diff --git a/src/chrtrans/cp1255_uni.tbl b/src/chrtrans/cp1255_uni.tbl index 5f4d761a..6c4ca7d0 100644 --- a/src/chrtrans/cp1255_uni.tbl +++ b/src/chrtrans/cp1255_uni.tbl @@ -2,92 +2,94 @@ Mwindows-1255 #Name as a Display Charset (used on Options screen). -OWinHebrew (cp1255) +OHebrew (windows-1255) -# Name: cp1255_WinHebrew to Unicode table +# +# Name: cp1255 to Unicode table # Unicode version: 2.0 # Table version: 2.00 # Table format: Format A -# Date: 04/24/96 -# Authors: Lori Brownell <loribr@microsoft.com> -# K.D. Chang <a-kchang@microsoft.com> +# Date: 04/15/98 +# +# Contact: cpxlate@microsoft.com +# # General notes: none # # Format: Three tab-separated columns -# Column #1 is the cp1255_WinHebrew code (in hex) +# Column #1 is the cp1255 code (in hex) # Column #2 is the Unicode (in hex as 0xXXXX) # Column #3 is the Unicode name (follows a comment sign, '#') # -# The entries are in cp1255_WinHebrew order +# The entries are in cp1255 order # ################## -0x20-0x7f idem +0x20-0x7e idem # -#0x80 #UNDEFINED -#0x81 #UNDEFINED -0x82 U+201A #LOW SINGLE COMMA QUOTATION MARK -0x83 U+0192 #LATIN SMALL LETTER SCRIPT F -0x84 U+201E #LOW DOUBLE COMMA QUOTATION MARK +0x80 U+20AC #EURO SIGN +0x81 #UNDEFINED +0x82 U+201A #SINGLE LOW-9 QUOTATION MARK +0x83 U+0192 #LATIN SMALL LETTER F WITH HOOK +0x84 U+201E #DOUBLE LOW-9 QUOTATION MARK 0x85 U+2026 #HORIZONTAL ELLIPSIS 0x86 U+2020 #DAGGER 0x87 U+2021 #DOUBLE DAGGER -0x88 U+02C6 #MODIFIER LETTER CIRCUMFLEX +0x88 U+02C6 #MODIFIER LETTER CIRCUMFLEX ACCENT 0x89 U+2030 #PER MILLE SIGN -#0x8A #UNDEFINED -0x8B U+2039 #LEFT POINTING SINGLE GUILLEMET -#0x8C #UNDEFINED -#0x8D #UNDEFINED -#0x8E #UNDEFINED -#0x8F #UNDEFINED -#0x90 #UNDEFINED -0x91 U+2018 #SINGLE TURNED COMMA QUOTATION MARK -0x92 U+2019 #SINGLE COMMA QUOTATION MARK -0x93 U+201C #DOUBLE TURNED COMMA QUOTATION MARK -0x94 U+201D #DOUBLE COMMA QUOTATION MARK +0x8A #UNDEFINED +0x8B U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK +0x8C #UNDEFINED +0x8D #UNDEFINED +0x8E #UNDEFINED +0x8F #UNDEFINED +0x90 #UNDEFINED +0x91 U+2018 #LEFT SINGLE QUOTATION MARK +0x92 U+2019 #RIGHT SINGLE QUOTATION MARK +0x93 U+201C #LEFT DOUBLE QUOTATION MARK +0x94 U+201D #RIGHT DOUBLE QUOTATION MARK 0x95 U+2022 #BULLET 0x96 U+2013 #EN DASH 0x97 U+2014 #EM DASH -0x98 U+02DC #SPACING TILDE -0x99 U+2122 #TRADEMARK -#0x9A #UNDEFINED -0x9B U+203A #RIGHT POINTING SINGLE GUILLEMET -#0x9C #UNDEFINED -#0x9D #UNDEFINED -#0x9E #UNDEFINED -#0x9F #UNDEFINED -0xA0 U+00A0 #NON-BREAKING SPACE -#0xA1 #UNDEFINED +0x98 U+02DC #SMALL TILDE +0x99 U+2122 #TRADE MARK SIGN +0x9A #UNDEFINED +0x9B U+203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +0x9C #UNDEFINED +0x9D #UNDEFINED +0x9E #UNDEFINED +0x9F #UNDEFINED +0xA0 U+00A0 #NO-BREAK SPACE +0xA1 U+00A1 #INVERTED EXCLAMATION MARK 0xA2 U+00A2 #CENT SIGN 0xA3 U+00A3 #POUND SIGN 0xA4 U+20AA #NEW SHEQEL SIGN 0xA5 U+00A5 #YEN SIGN -0xA6 U+00A6 #BROKEN VERTICAL BAR +0xA6 U+00A6 #BROKEN BAR 0xA7 U+00A7 #SECTION SIGN -0xA8 U+00A8 #SPACING DIAERESIS +0xA8 U+00A8 #DIAERESIS 0xA9 U+00A9 #COPYRIGHT SIGN -#0xAA #UNDEFINED -0xAB U+00AB #LEFT POINTING GUILLEMET +0xAA U+00D7 #MULTIPLICATION SIGN +0xAB U+00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0xAC U+00AC #NOT SIGN 0xAD U+00AD #SOFT HYPHEN -0xAE U+00AE #REGISTERED TRADE MARK SIGN -0xAF U+00AF #SPACING MACRON +0xAE U+00AE #REGISTERED SIGN +0xAF U+00AF #MACRON 0xB0 U+00B0 #DEGREE SIGN -0xB1 U+00B1 #PLUS-OR-MINUS SIGN -0xB2 U+00B2 #SUPERSCRIPT DIGIT TWO -0xB3 U+00B3 #SUPERSCRIPT DIGIT THREE -0xB4 U+00B4 #SPACING ACUTE +0xB1 U+00B1 #PLUS-MINUS SIGN +0xB2 U+00B2 #SUPERSCRIPT TWO +0xB3 U+00B3 #SUPERSCRIPT THREE +0xB4 U+00B4 #ACUTE ACCENT 0xB5 U+00B5 #MICRO SIGN -0xB6 U+00B6 #PARAGRAPH SIGN +0xB6 U+00B6 #PILCROW SIGN 0xB7 U+00B7 #MIDDLE DOT -#0xB8 #UNDEFINED -0xB9 U+00B9 #SUPERSCRIPT DIGIT ONE -#0xBA #UNDEFINED -0xBB U+00BB #RIGHT POINTING GUILLEMET -0xBC U+00BC #FRACTION ONE QUARTER -0xBD U+00BD #FRACTION ONE HALF -0xBE U+00BE #FRACTION THREE QUARTERS -#0xBF #UNDEFINED +0xB8 U+00B8 #CEDILLA +0xB9 U+00B9 #SUPERSCRIPT ONE +0xBA U+00F7 #DIVISION SIGN +0xBB U+00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC U+00BC #VULGAR FRACTION ONE QUARTER +0xBD U+00BD #VULGAR FRACTION ONE HALF +0xBE U+00BE #VULGAR FRACTION THREE QUARTERS +0xBF U+00BF #INVERTED QUESTION MARK 0xC0 U+05B0 #HEBREW POINT SHEVA 0xC1 U+05B1 #HEBREW POINT HATAF SEGOL 0xC2 U+05B2 #HEBREW POINT HATAF PATAH @@ -98,28 +100,28 @@ OWinHebrew (cp1255) 0xC7 U+05B7 #HEBREW POINT PATAH 0xC8 U+05B8 #HEBREW POINT QAMATS 0xC9 U+05B9 #HEBREW POINT HOLAM -0xCA U+05BA #HEBREW POINT +0xCA #UNDEFINED 0xCB U+05BB #HEBREW POINT QUBUTS -0xCC U+05BC #HEBREW POINT DAGESH +0xCC U+05BC #HEBREW POINT DAGESH OR MAPIQ 0xCD U+05BD #HEBREW POINT METEG 0xCE U+05BE #HEBREW PUNCTUATION MAQAF 0xCF U+05BF #HEBREW POINT RAFE -0xD0 U+05C0 #HEBREW POINT PASEQ +0xD0 U+05C0 #HEBREW PUNCTUATION PASEQ 0xD1 U+05C1 #HEBREW POINT SHIN DOT 0xD2 U+05C2 #HEBREW POINT SIN DOT 0xD3 U+05C3 #HEBREW PUNCTUATION SOF PASUQ -0xD4 U+05F0 #HEBREW LETTER DOUBLE VAV -0xD5 U+05F1 #HEBREW LETTER VAV YOD -0xD6 U+05F2 #HEBREW LETTER DOUBLE YOD -#0xD7 #UNDEFINED -#0xD8 #UNDEFINED -#0xD9 #UNDEFINED -#0xDA #UNDEFINED -#0xDB #UNDEFINED -#0xDC #UNDEFINED -#0xDD #UNDEFINED -#0xDE #UNDEFINED -#0xDF #UNDEFINED +0xD4 U+05F0 #HEBREW LIGATURE YIDDISH DOUBLE VAV +0xD5 U+05F1 #HEBREW LIGATURE YIDDISH VAV YOD +0xD6 U+05F2 #HEBREW LIGATURE YIDDISH DOUBLE YOD +0xD7 U+05F3 #HEBREW PUNCTUATION GERESH +0xD8 U+05F4 #HEBREW PUNCTUATION GERSHAYIM +0xD9 #UNDEFINED +0xDA #UNDEFINED +0xDB #UNDEFINED +0xDC #UNDEFINED +0xDD #UNDEFINED +0xDE #UNDEFINED +0xDF #UNDEFINED 0xE0 U+05D0 #HEBREW LETTER ALEF 0xE1 U+05D1 #HEBREW LETTER BET 0xE2 U+05D2 #HEBREW LETTER GIMEL @@ -147,8 +149,8 @@ OWinHebrew (cp1255) 0xF8 U+05E8 #HEBREW LETTER RESH 0xF9 U+05E9 #HEBREW LETTER SHIN 0xFA U+05EA #HEBREW LETTER TAV -#0xFB #UNDEFINED -#0xFC #UNDEFINED +0xFB #UNDEFINED +0xFC #UNDEFINED 0xFD U+200E #LEFT-TO-RIGHT MARK 0xFE U+200F #RIGHT-TO-LEFT MARK -#0xFF #UNDEFINED +0xFF #UNDEFINED diff --git a/src/chrtrans/cp1256_uni.tbl b/src/chrtrans/cp1256_uni.tbl index 6d85d221..23af4614 100644 --- a/src/chrtrans/cp1256_uni.tbl +++ b/src/chrtrans/cp1256_uni.tbl @@ -2,110 +2,112 @@ Mwindows-1256 #Name as a Display Charset (used on Options screen). -OWinArabic (cp1256) +OArabic (windows-1256) -# Name: cp1256_WinArabic to Unicode table +# +# Name: cp1256 to Unicode table # Unicode version: 2.0 # Table version: 2.00 # Table format: Format A -# Date: 04/24/96 -# Authors: Lori Brownell <loribr@microsoft.com> -# K.D. Chang <a-kchang@microsoft.com> +# Date: 04/15/98 +# +# Contact: cpxlate@microsoft.com +# # General notes: none # # Format: Three tab-separated columns -# Column #1 is the cp1256_WinArabic code (in hex) +# Column #1 is the cp1256 code (in hex) # Column #2 is the Unicode (in hex as 0xXXXX) # Column #3 is the Unicode name (follows a comment sign, '#') # -# The entries are in cp1256_WinArabic order +# The entries are in cp1256 order # ################## -0x20-0x7f idem +0x20-0x7e idem # -#0x80 #UNDEFINED -0x81 U+067E #ARABIC TAA WITH THREE DOTS BELOW -0x82 U+201A #LOW SINGLE COMMA QUOTATION MARK -0x83 U+0192 #LATIN SMALL LETTER SCRIPT F -0x84 U+201E #LOW DOUBLE COMMA QUOTATION MARK +0x80 U+20AC #EURO SIGN +0x81 U+067E #ARABIC LETTER PEH +0x82 U+201A #SINGLE LOW-9 QUOTATION MARK +0x83 U+0192 #LATIN SMALL LETTER F WITH HOOK +0x84 U+201E #DOUBLE LOW-9 QUOTATION MARK 0x85 U+2026 #HORIZONTAL ELLIPSIS 0x86 U+2020 #DAGGER 0x87 U+2021 #DOUBLE DAGGER -0x88 U+02C6 #MODIFIER LETTER CIRCUMFLEX +0x88 U+02C6 #MODIFIER LETTER CIRCUMFLEX ACCENT 0x89 U+2030 #PER MILLE SIGN -#0x8A #UNDEFINED -0x8B U+2039 #LEFT POINTING SINGLE GUILLEMET -0x8C U+0152 #LATIN CAPITAL LETTER O E -0x8D U+0686 #ARABIC HAA WITH MIDDLE THREE DOTS DOWNWARD -0x8E U+0698 #ARABIC RA WITH THREE DOTS ABOVE -#0x8F #UNDEFINED -0x90 U+06AF #ARABIC GAF -0x91 U+2018 #SINGLE TURNED COMMA QUOTATION MARK -0x92 U+2019 #SINGLE COMMA QUOTATION MARK -0x93 U+201C #DOUBLE TURNED COMMA QUOTATION MARK -0x94 U+201D #DOUBLE COMMA QUOTATION MARK +0x8A #UNDEFINED +0x8B U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK +0x8C U+0152 #LATIN CAPITAL LIGATURE OE +0x8D U+0686 #ARABIC LETTER TCHEH +0x8E U+0698 #ARABIC LETTER JEH +0x8F #UNDEFINED +0x90 U+06AF #ARABIC LETTER GAF +0x91 U+2018 #LEFT SINGLE QUOTATION MARK +0x92 U+2019 #RIGHT SINGLE QUOTATION MARK +0x93 U+201C #LEFT DOUBLE QUOTATION MARK +0x94 U+201D #RIGHT DOUBLE QUOTATION MARK 0x95 U+2022 #BULLET 0x96 U+2013 #EN DASH 0x97 U+2014 #EM DASH -#0x98 #UNDEFINED -0x99 U+2122 #TRADEMARK -#0x9A #UNDEFINED -0x9B U+203A #RIGHT POINTING SINGLE GUILLEMET -0x9C U+0153 #LATIN SMALL LETTER O E +0x98 #UNDEFINED +0x99 U+2122 #TRADE MARK SIGN +0x9A #UNDEFINED +0x9B U+203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +0x9C U+0153 #LATIN SMALL LIGATURE OE 0x9D U+200C #ZERO WIDTH NON-JOINER 0x9E U+200D #ZERO WIDTH JOINER -#0x9F #UNDEFINED -0xA0 U+00A0 #NON-BREAKING SPACE +0x9F #UNDEFINED +0xA0 U+00A0 #NO-BREAK SPACE 0xA1 U+060C #ARABIC COMMA 0xA2 U+00A2 #CENT SIGN 0xA3 U+00A3 #POUND SIGN 0xA4 U+00A4 #CURRENCY SIGN 0xA5 U+00A5 #YEN SIGN -0xA6 U+00A6 #BROKEN VERTICAL BAR +0xA6 U+00A6 #BROKEN BAR 0xA7 U+00A7 #SECTION SIGN -0xA8 U+00A8 #SPACING DIAERESIS +0xA8 U+00A8 #DIAERESIS 0xA9 U+00A9 #COPYRIGHT SIGN -#0xAA #UNDEFINED -0xAB U+00AB #LEFT POINTING GUILLEMET +0xAA #UNDEFINED +0xAB U+00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0xAC U+00AC #NOT SIGN 0xAD U+00AD #SOFT HYPHEN -0xAE U+00AE #REGISTERED TRADE MARK SIGN -0xAF U+00AF #SPACING MACRON +0xAE U+00AE #REGISTERED SIGN +0xAF U+00AF #MACRON 0xB0 U+00B0 #DEGREE SIGN -0xB1 U+00B1 #PLUS-OR-MINUS SIGN -0xB2 U+00B2 #SUPERSCRIPT DIGIT TWO -0xB3 U+00B3 #SUPERSCRIPT DIGIT THREE -0xB4 U+00B4 #SPACING ACUTE +0xB1 U+00B1 #PLUS-MINUS SIGN +0xB2 U+00B2 #SUPERSCRIPT TWO +0xB3 U+00B3 #SUPERSCRIPT THREE +0xB4 U+00B4 #ACUTE ACCENT 0xB5 U+00B5 #MICRO SIGN -0xB6 U+00B6 #PARAGRAPH SIGN +0xB6 U+00B6 #PILCROW SIGN 0xB7 U+00B7 #MIDDLE DOT -0xB8 U+00B8 #SPACING CEDILLA -0xB9 U+00B9 #SUPERSCRIPT DIGIT ONE +0xB8 U+00B8 #CEDILLA +0xB9 U+00B9 #SUPERSCRIPT ONE 0xBA U+061B #ARABIC SEMICOLON -0xBB U+00BB #RIGHT POINTING GUILLEMET -0xBC U+00BC #FRACTION ONE QUARTER -0xBD U+00BD #FRACTION ONE HALF -0xBE U+00BE #FRACTION THREE QUARTERS +0xBB U+00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC U+00BC #VULGAR FRACTION ONE QUARTER +0xBD U+00BD #VULGAR FRACTION ONE HALF +0xBE U+00BE #VULGAR FRACTION THREE QUARTERS 0xBF U+061F #ARABIC QUESTION MARK -#0xC0 #UNDEFINED -0xC1 U+0621 #ARABIC LETTER HAMZAH -0xC2 U+0622 #ARABIC LETTER MADDAH ON ALEF -0xC3 U+0623 #ARABIC LETTER HAMZAH ON ALEF -0xC4 U+0624 #ARABIC LETTER HAMZAH ON WAW -0xC5 U+0625 #ARABIC LETTER HAMZAH UNDER ALEF -0xC6 U+0626 #ARABIC LETTER HAMZAH ON YA +0xC0 #UNDEFINED +0xC1 U+0621 #ARABIC LETTER HAMZA +0xC2 U+0622 #ARABIC LETTER ALEF WITH MADDA ABOVE +0xC3 U+0623 #ARABIC LETTER ALEF WITH HAMZA ABOVE +0xC4 U+0624 #ARABIC LETTER WAW WITH HAMZA ABOVE +0xC5 U+0625 #ARABIC LETTER ALEF WITH HAMZA BELOW +0xC6 U+0626 #ARABIC LETTER YEH WITH HAMZA ABOVE 0xC7 U+0627 #ARABIC LETTER ALEF -0xC8 U+0628 #ARABIC LETTER BAA -0xC9 U+0629 #ARABIC LETTER TAA MARBUTAH -0xCA U+062A #ARABIC LETTER TAA -0xCB U+062B #ARABIC LETTER THAA +0xC8 U+0628 #ARABIC LETTER BEH +0xC9 U+0629 #ARABIC LETTER TEH MARBUTA +0xCA U+062A #ARABIC LETTER TEH +0xCB U+062B #ARABIC LETTER THEH 0xCC U+062C #ARABIC LETTER JEEM -0xCD U+062D #ARABIC LETTER HAA -0xCE U+062E #ARABIC LETTER KHAA +0xCD U+062D #ARABIC LETTER HAH +0xCE U+062E #ARABIC LETTER KHAH 0xCF U+062F #ARABIC LETTER DAL 0xD0 U+0630 #ARABIC LETTER THAL -0xD1 U+0631 #ARABIC LETTER RA +0xD1 U+0631 #ARABIC LETTER REH 0xD2 U+0632 #ARABIC LETTER ZAIN 0xD3 U+0633 #ARABIC LETTER SEEN 0xD4 U+0634 #ARABIC LETTER SHEEN @@ -113,42 +115,42 @@ OWinArabic (cp1256) 0xD6 U+0636 #ARABIC LETTER DAD 0xD7 U+00D7 #MULTIPLICATION SIGN 0xD8 U+0637 #ARABIC LETTER TAH -0xD9 U+0638 #ARABIC LETTER DHAH +0xD9 U+0638 #ARABIC LETTER ZAH 0xDA U+0639 #ARABIC LETTER AIN 0xDB U+063A #ARABIC LETTER GHAIN 0xDC U+0640 #ARABIC TATWEEL -0xDD U+0641 #ARABIC LETTER FA +0xDD U+0641 #ARABIC LETTER FEH 0xDE U+0642 #ARABIC LETTER QAF -0xDF U+0643 #ARABIC LETTER CAF -0xE0 U+00E0 #LATIN SMALL LETTER A GRAVE +0xDF U+0643 #ARABIC LETTER KAF +0xE0 U+00E0 #LATIN SMALL LETTER A WITH GRAVE 0xE1 U+0644 #ARABIC LETTER LAM -0xE2 U+00E2 #LATIN SMALL LETTER A CIRCUMFLEX +0xE2 U+00E2 #LATIN SMALL LETTER A WITH CIRCUMFLEX 0xE3 U+0645 #ARABIC LETTER MEEM 0xE4 U+0646 #ARABIC LETTER NOON -0xE5 U+0647 #ARABIC LETTER HA +0xE5 U+0647 #ARABIC LETTER HEH 0xE6 U+0648 #ARABIC LETTER WAW -0xE7 U+00E7 #LATIN SMALL LETTER C CEDILLA -0xE8 U+00E8 #LATIN SMALL LETTER E GRAVE -0xE9 U+00E9 #LATIN SMALL LETTER E ACUTE -0xEA U+00EA #LATIN SMALL LETTER E CIRCUMFLEX -0xEB U+00EB #LATIN SMALL LETTER E DIAERESIS -0xEC U+0649 #ARABIC LETTER ALEF MAQSURAH -0xED U+064A #ARABIC LETTER YA -0xEE U+00EE #LATIN SMALL LETTER I CIRCUMFLEX -0xEF U+00EF #LATIN SMALL LETTER I DIAERESIS +0xE7 U+00E7 #LATIN SMALL LETTER C WITH CEDILLA +0xE8 U+00E8 #LATIN SMALL LETTER E WITH GRAVE +0xE9 U+00E9 #LATIN SMALL LETTER E WITH ACUTE +0xEA U+00EA #LATIN SMALL LETTER E WITH CIRCUMFLEX +0xEB U+00EB #LATIN SMALL LETTER E WITH DIAERESIS +0xEC U+0649 #ARABIC LETTER ALEF MAKSURA +0xED U+064A #ARABIC LETTER YEH +0xEE U+00EE #LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF U+00EF #LATIN SMALL LETTER I WITH DIAERESIS 0xF0 U+064B #ARABIC FATHATAN 0xF1 U+064C #ARABIC DAMMATAN 0xF2 U+064D #ARABIC KASRATAN -0xF3 U+064E #ARABIC FATHAH -0xF4 U+00F4 #LATIN SMALL LETTER O CIRCUMFLEX -0xF5 U+064F #ARABIC DAMMAH -0xF6 U+0650 #ARABIC KASRAH +0xF3 U+064E #ARABIC FATHA +0xF4 U+00F4 #LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 U+064F #ARABIC DAMMA +0xF6 U+0650 #ARABIC KASRA 0xF7 U+00F7 #DIVISION SIGN -0xF8 U+0651 #ARABIC SHADDAH -0xF9 U+00F9 #LATIN SMALL LETTER U GRAVE +0xF8 U+0651 #ARABIC SHADDA +0xF9 U+00F9 #LATIN SMALL LETTER U WITH GRAVE 0xFA U+0652 #ARABIC SUKUN -0xFB U+00FB #LATIN SMALL LETTER U CIRCUMFLEX -0xFC U+00FC #LATIN SMALL LETTER U DIAERESIS +0xFB U+00FB #LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC U+00FC #LATIN SMALL LETTER U WITH DIAERESIS 0xFD U+200E #LEFT-TO-RIGHT MARK 0xFE U+200F #RIGHT-TO-LEFT MARK -#0xFF #UNDEFINED +0xFF #UNDEFINED diff --git a/src/chrtrans/cp1257_uni.tbl b/src/chrtrans/cp1257_uni.tbl index 794df717..4c1e70e6 100644 --- a/src/chrtrans/cp1257_uni.tbl +++ b/src/chrtrans/cp1257_uni.tbl @@ -2,169 +2,47 @@ Mwindows-1257 #Name as a Display Charset (used on Options screen) -OWinBaltRim (cp1257) +OBaltic Rim (windows-1257) -# Name: cp1257_WinBaltic to Unicode table +# +# Name: cp1257 to Unicode table # Unicode version: 2.0 # Table version: 2.00 # Table format: Format A -# Date: 04/24/96 -# Authors: Lori Brownell <loribr@microsoft.com> -# K.D. Chang <a-kchang@microsoft.com> +# Date: 04/15/98 +# +# Contact: cpxlate@microsoft.com +# # General notes: none # # Format: Three tab-separated columns -# Column #1 is the cp1257_WinBaltic code (in hex) +# Column #1 is the cp1257 code (in hex) # Column #2 is the Unicode (in hex as 0xXXXX) # Column #3 is the Unicode name (follows a comment sign, '#') # -# The entries are in cp1257_WinBaltic order +# The entries are in cp1257 order # -#0x00 U+0000 #NULL -#0x01 U+0001 #START OF HEADING -#0x02 U+0002 #START OF TEXT -#0x03 U+0003 #END OF TEXT -#0x04 U+0004 #END OF TRANSMISSION -#0x05 U+0005 #ENQUIRY -#0x06 U+0006 #ACKNOWLEDGE -#0x07 U+0007 #BELL -#0x08 U+0008 #BACKSPACE -#0x09 U+0009 #HORIZONTAL TABULATION -#0x0A U+000A #LINE FEED -#0x0B U+000B #VERTICAL TABULATION -#0x0C U+000C #FORM FEED -#0x0D U+000D #CARRIAGE RETURN -#0x0E U+000E #SHIFT OUT -#0x0F U+000F #SHIFT IN -#0x10 U+0010 #DATA LINK ESCAPE -#0x11 U+0011 #DEVICE CONTROL ONE -#0x12 U+0012 #DEVICE CONTROL TWO -#0x13 U+0013 #DEVICE CONTROL THREE -#0x14 U+0014 #DEVICE CONTROL FOUR -#0x15 U+0015 #NEGATIVE ACKNOWLEDGE -#0x16 U+0016 #SYNCHRONOUS IDLE -#0x17 U+0017 #END OF TRANSMISSION BLOCK -#0x18 U+0018 #CANCEL -#0x19 U+0019 #END OF MEDIUM -#0x1A U+001A #SUBSTITUTE -#0x1B U+001B #ESCAPE -#0x1C U+001C #FILE SEPARATOR -#0x1D U+001D #GROUP SEPARATOR -#0x1E U+001E #RECORD SEPARATOR -#0x1F U+001F #UNIT SEPARATOR -0x20 U+0020 #SPACE -0x21 U+0021 #EXCLAMATION MARK -0x22 U+0022 #QUOTATION MARK -0x23 U+0023 #NUMBER SIGN -0x24 U+0024 #DOLLAR SIGN -0x25 U+0025 #PERCENT SIGN -0x26 U+0026 #AMPERSAND -0x27 U+0027 #APOSTROPHE -0x28 U+0028 #LEFT PARENTHESIS -0x29 U+0029 #RIGHT PARENTHESIS -0x2A U+002A #ASTERISK -0x2B U+002B #PLUS SIGN -0x2C U+002C #COMMA -0x2D U+002D #HYPHEN-MINUS -0x2E U+002E #FULL STOP -0x2F U+002F #SOLIDUS -0x30 U+0030 #DIGIT 0 -0x31 U+0031 #DIGIT 1 -0x32 U+0032 #DIGIT 2 -0x33 U+0033 #DIGIT 3 -0x34 U+0034 #DIGIT 4 -0x35 U+0035 #DIGIT 5 -0x36 U+0036 #DIGIT 6 -0x37 U+0037 #DIGIT 7 -0x38 U+0038 #DIGIT 8 -0x39 U+0039 #DIGIT 9 -0x3A U+003A #COLON -0x3B U+003B #SEMICOLON -0x3C U+003C #LESS-THAN SIGN -0x3D U+003D #EQUALS SIGN -0x3E U+003E #GREATER-THAN SIGN -0x3F U+003F #QUESTION MARK -0x40 U+0040 #COMMERCIAL AT -0x41 U+0041 #LATIN CAPITAL A -0x42 U+0042 #LATIN CAPITAL B -0x43 U+0043 #LATIN CAPITAL C -0x44 U+0044 #LATIN CAPITAL D -0x45 U+0045 #LATIN CAPITAL E -0x46 U+0046 #LATIN CAPITAL F -0x47 U+0047 #LATIN CAPITAL G -0x48 U+0048 #LATIN CAPITAL H -0x49 U+0049 #LATIN CAPITAL I -0x4A U+004A #LATIN CAPITAL J -0x4B U+004B #LATIN CAPITAL K -0x4C U+004C #LATIN CAPITAL L -0x4D U+004D #LATIN CAPITAL M -0x4E U+004E #LATIN CAPITAL N -0x4F U+004F #LATIN CAPITAL O -0x50 U+0050 #LATIN CAPITAL P -0x51 U+0051 #LATIN CAPITAL Q -0x52 U+0052 #LATIN CAPITAL R -0x53 U+0053 #LATIN CAPITAL S -0x54 U+0054 #LATIN CAPITAL T -0x55 U+0055 #LATIN CAPITAL U -0x56 U+0056 #LATIN CAPITAL V -0x57 U+0057 #LATIN CAPITAL W -0x58 U+0058 #LATIN CAPITAL X -0x59 U+0059 #LATIN CAPITAL Y -0x5A U+005A #LATIN CAPITAL Z -0x5B U+005B #LEFT SQUARE BRACKET -0x5C U+005C #BACKSLASH -0x5D U+005D #RIGHT SQUARE BRACKET -0x5E U+005E #CIRCUMFLEX -0x5F U+005F #LOW LINE -0x60 U+0060 #GRAVE -0x61 U+0061 #LATIN SMALL A -0x62 U+0062 #LATIN SMALL B -0x63 U+0063 #LATIN SMALL C -0x64 U+0064 #LATIN SMALL D -0x65 U+0065 #LATIN SMALL E -0x66 U+0066 #LATIN SMALL F -0x67 U+0067 #LATIN SMALL G -0x68 U+0068 #LATIN SMALL H -0x69 U+0069 #LATIN SMALL I -0x6A U+006A #LATIN SMALL J -0x6B U+006B #LATIN SMALL K -0x6C U+006C #LATIN SMALL L -0x6D U+006D #LATIN SMALL M -0x6E U+006E #LATIN SMALL N -0x6F U+006F #LATIN SMALL O -0x70 U+0070 #LATIN SMALL P -0x71 U+0071 #LATIN SMALL Q -0x72 U+0072 #LATIN SMALL R -0x73 U+0073 #LATIN SMALL S -0x74 U+0074 #LATIN SMALL T -0x75 U+0075 #LATIN SMALL U -0x76 U+0076 #LATIN SMALL V -0x77 U+0077 #LATIN SMALL W -0x78 U+0078 #LATIN SMALL X -0x79 U+0079 #LATIN SMALL Y -0x7A U+007A #LATIN SMALL Z -0x7B U+007B #LEFT CURLY BRACKET -0x7C U+007C #VERTICAL LINE -0x7D U+007D #RIGHT CURLY BRACKET -0x7E U+007E #TILDE -#0x7F U+007F #DELETE -#0x80 #UNDEFINED -#0x81 #UNDEFINED -0x82 U+201A #LOW SINGLE COMMA QUOTATION MARK -#0x83 #UNDEFINED -0x84 U+201E #LOW DOUBLE COMMA QUOTATION MARK +################## + +0x20-0x7e idem +# +0x80 U+20AC #EURO SIGN +0x81 #UNDEFINED +0x82 U+201A #SINGLE LOW-9 QUOTATION MARK +0x83 #UNDEFINED +0x84 U+201E #DOUBLE LOW-9 QUOTATION MARK 0x85 U+2026 #HORIZONTAL ELLIPSIS 0x86 U+2020 #DAGGER 0x87 U+2021 #DOUBLE DAGGER -#0x88 #UNDEFINED +0x88 #UNDEFINED 0x89 U+2030 #PER MILLE SIGN -#0x8A #UNDEFINED -0x8B U+2039 #LEFT POINTING SINGLE GUILLEMENT -#0x8C #UNDEFINED +0x8A #UNDEFINED +0x8B U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK +0x8C #UNDEFINED 0x8D U+00A8 #DIAERESIS -0x8E U+02C7 #HACEK +0x8E U+02C7 #CARON 0x8F U+00B8 #CEDILLA -#0x90 #UNDEFINED +0x90 #UNDEFINED 0x91 U+2018 #LEFT SINGLE QUOTATION MARK 0x92 U+2019 #RIGHT SINGLE QUOTATION MARK 0x93 U+201C #LEFT DOUBLE QUOTATION MARK @@ -172,107 +50,108 @@ OWinBaltRim (cp1257) 0x95 U+2022 #BULLET 0x96 U+2013 #EN DASH 0x97 U+2014 #EM DASH -#0x98 #UNDEFINED +0x98 #UNDEFINED 0x99 U+2122 #TRADE MARK SIGN -#0x9A #UNDEFINED -0x9B U+203A #RIGHT POINTING SINGLE GUILLEMENT -#0x9C #UNDEFINED +0x9A #UNDEFINED +0x9B U+203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +0x9C #UNDEFINED 0x9D U+00AF #MACRON 0x9E U+02DB #OGONEK -#0x9F #UNDEFINED +0x9F #UNDEFINED 0xA0 U+00A0 #NO-BREAK SPACE -#0xA1 #UNDEFINED +0xA1 #UNDEFINED 0xA2 U+00A2 #CENT SIGN 0xA3 U+00A3 #POUND SIGN 0xA4 U+00A4 #CURRENCY SIGN -#0xA5 #UNDEFINED +0xA5 #UNDEFINED 0xA6 U+00A6 #BROKEN BAR 0xA7 U+00A7 #SECTION SIGN -0xA8 U+00D8 #LATIN CAPITAL O STROKE +0xA8 U+00D8 #LATIN CAPITAL LETTER O WITH STROKE 0xA9 U+00A9 #COPYRIGHT SIGN -0xAA U+0156 #LATIN CAPITAL R CEDILLA -0xAB U+00AB #LEFT POINTING GUILLEMENT +0xAA U+0156 #LATIN CAPITAL LETTER R WITH CEDILLA +0xAB U+00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0xAC U+00AC #NOT SIGN 0xAD U+00AD #SOFT HYPHEN 0xAE U+00AE #REGISTERED SIGN -0xAF U+00C6 #LATIN CAPITAL AE LIGATURE +0xAF U+00C6 #LATIN CAPITAL LETTER AE 0xB0 U+00B0 #DEGREE SIGN 0xB1 U+00B1 #PLUS-MINUS SIGN -0xB2 U+00B2 #SUPERSCRIPT 2 -0xB3 U+00B3 #SUPERSCRIPT 3 -0xB4 U+00B4 #ACUTE +0xB2 U+00B2 #SUPERSCRIPT TWO +0xB3 U+00B3 #SUPERSCRIPT THREE +0xB4 U+00B4 #ACUTE ACCENT 0xB5 U+00B5 #MICRO SIGN 0xB6 U+00B6 #PILCROW SIGN 0xB7 U+00B7 #MIDDLE DOT -0xB8 U+00F8 #LATIN SAMLL O STROKE +0xB8 U+00F8 #LATIN SMALL LETTER O WITH STROKE 0xB9 U+00B9 #SUPERSCRIPT ONE -0xBA U+0157 #LATIN SMALL R CEDILLA -0xBB U+00BB #RIGHT POINTING GUILLEMENT -0xBC U+00BC #FRACTION 1/4 -0xBD U+00BD #FRACTION 1/2 -0xBE U+00BE #FRACTION 3/4 -0xBF U+00E6 #LATIN SMALL AE LIGATURE -0xC0 U+0104 #LATIN CAPITAL A OGONEK -0xC1 U+012E #LATIN CAPITAL I OGONEK -0xC2 U+0100 #LATIN CAPITAL A MACRON -0xC3 U+0106 #LATIN CAPITAL C ACUTE -0xC4 U+00C4 #LATIN CAPITAL A DIAERESIS -0xC5 U+00C5 #LATIN CAPITAL A RING ABOVE -0xC6 U+0118 #LATIN CAPITAL E OGONEK -0xC7 U+0112 #LATIN CAPITAL E MACRON -0xC8 U+010C #LATIN CAPITAL C HACEK -0xC9 U+00C9 #LATIN CAPITAL E ACUTE -0xCA U+0179 #LATIN CAPITAL Z ACUTE -0xCB U+0116 #LATIN CAPITAL E DOT ABOVE -0xCC U+0122 #LATIN CAPITAL G CEDILLA -0xCD U+0136 #LATIN CAPITAL K CEDILLA -0xCE U+012A #LATIN CAPITAL I MACRON -0xCF U+013B #LATIN CAPITAL L CEDILLA -0xD0 U+0160 #LATIN CAPITAL S HACEK -0xD1 U+0143 #LATIN CAPITAL N ACUTE -0xD2 U+0145 #LATIN CAPITAL N CEDILLA -0xD3 U+00D3 #LATIN CAPITAL O ACUTE -0xD4 U+014C #LATIN CAPITAL O MACRON -0xD5 U+00D5 #LATIN CAPITAL O TILDE -0xD6 U+00D6 #LATIN CAPITAL O DIAERESIS +0xBA U+0157 #LATIN SMALL LETTER R WITH CEDILLA +0xBB U+00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC U+00BC #VULGAR FRACTION ONE QUARTER +0xBD U+00BD #VULGAR FRACTION ONE HALF +0xBE U+00BE #VULGAR FRACTION THREE QUARTERS +0xBF U+00E6 #LATIN SMALL LETTER AE +0xC0 U+0104 #LATIN CAPITAL LETTER A WITH OGONEK +0xC1 U+012E #LATIN CAPITAL LETTER I WITH OGONEK +0xC2 U+0100 #LATIN CAPITAL LETTER A WITH MACRON +0xC3 U+0106 #LATIN CAPITAL LETTER C WITH ACUTE +0xC4 U+00C4 #LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 U+00C5 #LATIN CAPITAL LETTER A WITH RING ABOVE +0xC6 U+0118 #LATIN CAPITAL LETTER E WITH OGONEK +0xC7 U+0112 #LATIN CAPITAL LETTER E WITH MACRON +0xC8 U+010C #LATIN CAPITAL LETTER C WITH CARON +0xC9 U+00C9 #LATIN CAPITAL LETTER E WITH ACUTE +0xCA U+0179 #LATIN CAPITAL LETTER Z WITH ACUTE +0xCB U+0116 #LATIN CAPITAL LETTER E WITH DOT ABOVE +0xCC U+0122 #LATIN CAPITAL LETTER G WITH CEDILLA +0xCD U+0136 #LATIN CAPITAL LETTER K WITH CEDILLA +0xCE U+012A #LATIN CAPITAL LETTER I WITH MACRON +0xCF U+013B #LATIN CAPITAL LETTER L WITH CEDILLA +0xD0 U+0160 #LATIN CAPITAL LETTER S WITH CARON +0xD1 U+0143 #LATIN CAPITAL LETTER N WITH ACUTE +0xD2 U+0145 #LATIN CAPITAL LETTER N WITH CEDILLA +0xD3 U+00D3 #LATIN CAPITAL LETTER O WITH ACUTE +0xD4 U+014C #LATIN CAPITAL LETTER O WITH MACRON +0xD5 U+00D5 #LATIN CAPITAL LETTER O WITH TILDE +0xD6 U+00D6 #LATIN CAPITAL LETTER O WITH DIAERESIS 0xD7 U+00D7 #MULTIPLICATION SIGN -0xD8 U+0172 #LATIN CAPITAL U OGONEK -0xD9 U+0141 #LATIN CAPITAL L STROKE -0xDA U+015A #LATIN CAPITAL S ACUTE -0xDB U+016A #LATIN CAPITAL U MACRON -0xDC U+00DC #LATIN CAPITAL U DIAERESIS -0xDD U+017B #LATIN CAPITAL Z DOT ABOVE -0xDE U+017D #LATIN CAPITAL Z HACEK -0xDF U+00DF #LATIN SMALL SHARP SS -0xE0 U+0105 #LATIN SMALL A OGONEK -0xE1 U+012F #LATIN SMALL I OGONEK -0xE2 U+0101 #LATIN SMALL A MACRON -0xE3 U+0107 #LATIN SMALL C ACUTE -0xE4 U+00E4 #LATIN SMALL A DIAERESIS -0xE5 U+00E5 #LATIN SMALL A RING ABOVE -0xE6 U+0119 #LATIN SMALL E OGONEK -0xE7 U+0113 #LATIN SMALL E MACRON -0xE8 U+010D #LATIN SMALL C HACEK -0xE9 U+00E9 #LATIN SMALL E ACUTE -0xEA U+017A #LATIN SMALL Z ACUTE -0xEB U+0117 #LATIN SMALL E DOT ABOVE -0xEC U+0123 #LATIN SMALL G CEDILLA -0xED U+0137 #LATIN SMALL K CEDILLA -0xEE U+012B #LATIN SMALL I MACRON -0xEF U+013C #LATIN SMALL L CEDILLA -0xF0 U+0161 #LATIN SMALL S HACEK -0xF1 U+0144 #LATIN SMALL N ACUTE -0xF2 U+0146 #LATIN SMALL N CEDILLA -0xF3 U+00F3 #LATIN SMALL O ACUTE -0xF4 U+014D #LATIN SMALL O MACRON -0xF5 U+00F5 #LATIN SMALL O TILDE -0xF6 U+00F6 #LATIN SMALL O DIAERESIS +0xD8 U+0172 #LATIN CAPITAL LETTER U WITH OGONEK +0xD9 U+0141 #LATIN CAPITAL LETTER L WITH STROKE +0xDA U+015A #LATIN CAPITAL LETTER S WITH ACUTE +0xDB U+016A #LATIN CAPITAL LETTER U WITH MACRON +0xDC U+00DC #LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD U+017B #LATIN CAPITAL LETTER Z WITH DOT ABOVE +0xDE U+017D #LATIN CAPITAL LETTER Z WITH CARON +0xDF U+00DF #LATIN SMALL LETTER SHARP S +0xE0 U+0105 #LATIN SMALL LETTER A WITH OGONEK +0xE1 U+012F #LATIN SMALL LETTER I WITH OGONEK +0xE2 U+0101 #LATIN SMALL LETTER A WITH MACRON +0xE3 U+0107 #LATIN SMALL LETTER C WITH ACUTE +0xE4 U+00E4 #LATIN SMALL LETTER A WITH DIAERESIS +0xE5 U+00E5 #LATIN SMALL LETTER A WITH RING ABOVE +0xE6 U+0119 #LATIN SMALL LETTER E WITH OGONEK +0xE7 U+0113 #LATIN SMALL LETTER E WITH MACRON +0xE8 U+010D #LATIN SMALL LETTER C WITH CARON +0xE9 U+00E9 #LATIN SMALL LETTER E WITH ACUTE +0xEA U+017A #LATIN SMALL LETTER Z WITH ACUTE +0xEB U+0117 #LATIN SMALL LETTER E WITH DOT ABOVE +0xEC U+0123 #LATIN SMALL LETTER G WITH CEDILLA +0xED U+0137 #LATIN SMALL LETTER K WITH CEDILLA +0xEE U+012B #LATIN SMALL LETTER I WITH MACRON +0xEF U+013C #LATIN SMALL LETTER L WITH CEDILLA +0xF0 U+0161 #LATIN SMALL LETTER S WITH CARON +0xF1 U+0144 #LATIN SMALL LETTER N WITH ACUTE +0xF2 U+0146 #LATIN SMALL LETTER N WITH CEDILLA +0xF3 U+00F3 #LATIN SMALL LETTER O WITH ACUTE +0xF4 U+014D #LATIN SMALL LETTER O WITH MACRON +0xF5 U+00F5 #LATIN SMALL LETTER O WITH TILDE +0xF6 U+00F6 #LATIN SMALL LETTER O WITH DIAERESIS 0xF7 U+00F7 #DIVISION SIGN -0xF8 U+0173 #LATIN SMALL U OGONEK -0xF9 U+0142 #LATIN SMALL L STROKE -0xFA U+015B #LATIN SMALL S ACUTE -0xFB U+016B #LATIN SMALL U MACRON -0xFC U+00FC #LATIN SMALL U DIAERESIS -0xFD U+017C #LATIN SMALL Z DOT ABOVE -0xFE U+017E #LATIN SMALL Z HACEK +0xF8 U+0173 #LATIN SMALL LETTER U WITH OGONEK +0xF9 U+0142 #LATIN SMALL LETTER L WITH STROKE +0xFA U+015B #LATIN SMALL LETTER S WITH ACUTE +0xFB U+016B #LATIN SMALL LETTER U WITH MACRON +0xFC U+00FC #LATIN SMALL LETTER U WITH DIAERESIS +0xFD U+017C #LATIN SMALL LETTER Z WITH DOT ABOVE +0xFE U+017E #LATIN SMALL LETTER Z WITH CARON 0xFF U+02D9 #DOT ABOVE + diff --git a/src/chrtrans/cp437_uni.tbl b/src/chrtrans/cp437_uni.tbl index 621e730e..b84f503f 100644 --- a/src/chrtrans/cp437_uni.tbl +++ b/src/chrtrans/cp437_uni.tbl @@ -6,20 +6,21 @@ D0 Mcp437 #Name as a Display Charset (used on Options screen) -ODosLatinUS (cp437) +OIBM PC US codepage (cp437) + # # Name: cp437_DOSLatinUS to Unicode table -# Unicode version: 1.1 -# Table version: 1.1 +# Unicode version: 2.0 +# Table version: 2.00 # Table format: Format A -# Date: 03/31/95 -# Authors: Michel Suignard <michelsu@microsoft.com> -# Lori Hoerth <lorih@microsoft.com> +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> # General notes: none # # Format: Three tab-separated columns -# Column #1 is the cp437_DosLatinUS code (in hex) -# Column #2 is the Unicode (in hex as U+XXXX) +# Column #1 is the cp437_DOSLatinUS code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) # Column #3 is the Unicode name (follows a comment sign, '#') # # The entries are in cp437_DosLatinUS order @@ -27,6 +28,8 @@ ODosLatinUS (cp437) # some mapppings of greek letters to latin letters added, # just for fun.. - KW # +####################################### + 0x20-0x7f idem # 0x80 U+00c7 #LATIN CAPITAL LETTER C WITH CEDILLA diff --git a/src/chrtrans/cp737_uni.tbl b/src/chrtrans/cp737_uni.tbl index 32f01c8c..81442ccc 100644 --- a/src/chrtrans/cp737_uni.tbl +++ b/src/chrtrans/cp737_uni.tbl @@ -2,7 +2,7 @@ Mcp737 #Name as a Display Charset (used on Options screen) -ODosGreek (cp737) +OGreek (cp737) # # Name: cp737_DOSGreek to Unicode table diff --git a/src/chrtrans/cp775_uni.tbl b/src/chrtrans/cp775_uni.tbl index 02a8b61b..4d63208b 100644 --- a/src/chrtrans/cp775_uni.tbl +++ b/src/chrtrans/cp775_uni.tbl @@ -2,7 +2,7 @@ Mcp775 #Name as a Display Charset (used on Options screen) -ODosBaltRim (cp775) +OBaltic Rim (cp775) # Name: cp775_DOSBaltRim to Unicode table # Unicode version: 2.0 @@ -20,134 +20,10 @@ ODosBaltRim (cp775) # # The entries are in cp775_DOSBaltRim order # -#0x00 U+0000 #NULL -#0x01 U+0001 #START OF HEADING -#0x02 U+0002 #START OF TEXT -#0x03 U+0003 #END OF TEXT -#0x04 U+0004 #END OF TRANSMISSION -#0x05 U+0005 #ENQUIRY -#0x06 U+0006 #ACKNOWLEDGE -#0x07 U+0007 #BELL -#0x08 U+0008 #BACKSPACE -#0x09 U+0009 #HORIZONTAL TABULATION -#0x0a U+000a #LINE FEED -#0x0b U+000b #VERTICAL TABULATION -#0x0c U+000c #FORM FEED -#0x0d U+000d #CARRIAGE RETURN -#0x0e U+000e #SHIFT OUT -#0x0f U+000f #SHIFT IN -#0x10 U+0010 #DATA LINK ESCAPE -#0x11 U+0011 #DEVICE CONTROL ONE -#0x12 U+0012 #DEVICE CONTROL TWO -#0x13 U+0013 #DEVICE CONTROL THREE -#0x14 U+0014 #DEVICE CONTROL FOUR -#0x15 U+0015 #NEGATIVE ACKNOWLEDGE -#0x16 U+0016 #SYNCHRONOUS IDLE -#0x17 U+0017 #END OF TRANSMISSION BLOCK -#0x18 U+0018 #CANCEL -#0x19 U+0019 #END OF MEDIUM -#0x1a U+001a #SUBSTITUTE -#0x1b U+001b #ESCAPE -#0x1c U+001c #FILE SEPARATOR -#0x1d U+001d #GROUP SEPARATOR -#0x1e U+001e #RECORD SEPARATOR -#0x1f U+001f #UNIT SEPARATOR -0x20 U+0020 #SPACE -0x21 U+0021 #EXCLAMATION MARK -0x22 U+0022 #QUOTATION MARK -0x23 U+0023 #NUMBER SIGN -0x24 U+0024 #DOLLAR SIGN -0x25 U+0025 #PERCENT SIGN -0x26 U+0026 #AMPERSAND -0x27 U+0027 #APOSTROPHE -0x28 U+0028 #LEFT PARENTHESIS -0x29 U+0029 #RIGHT PARENTHESIS -0x2a U+002a #ASTERISK -0x2b U+002b #PLUS SIGN -0x2c U+002c #COMMA -0x2d U+002d #HYPHEN-MINUS -0x2e U+002e #FULL STOP -0x2f U+002f #SOLIDUS -0x30 U+0030 #DIGIT ZERO -0x31 U+0031 #DIGIT ONE -0x32 U+0032 #DIGIT TWO -0x33 U+0033 #DIGIT THREE -0x34 U+0034 #DIGIT FOUR -0x35 U+0035 #DIGIT FIVE -0x36 U+0036 #DIGIT SIX -0x37 U+0037 #DIGIT SEVEN -0x38 U+0038 #DIGIT EIGHT -0x39 U+0039 #DIGIT NINE -0x3a U+003a #COLON -0x3b U+003b #SEMICOLON -0x3c U+003c #LESS-THAN SIGN -0x3d U+003d #EQUALS SIGN -0x3e U+003e #GREATER-THAN SIGN -0x3f U+003f #QUESTION MARK -0x40 U+0040 #COMMERCIAL AT -0x41 U+0041 #LATIN CAPITAL LETTER A -0x42 U+0042 #LATIN CAPITAL LETTER B -0x43 U+0043 #LATIN CAPITAL LETTER C -0x44 U+0044 #LATIN CAPITAL LETTER D -0x45 U+0045 #LATIN CAPITAL LETTER E -0x46 U+0046 #LATIN CAPITAL LETTER F -0x47 U+0047 #LATIN CAPITAL LETTER G -0x48 U+0048 #LATIN CAPITAL LETTER H -0x49 U+0049 #LATIN CAPITAL LETTER I -0x4a U+004a #LATIN CAPITAL LETTER J -0x4b U+004b #LATIN CAPITAL LETTER K -0x4c U+004c #LATIN CAPITAL LETTER L -0x4d U+004d #LATIN CAPITAL LETTER M -0x4e U+004e #LATIN CAPITAL LETTER N -0x4f U+004f #LATIN CAPITAL LETTER O -0x50 U+0050 #LATIN CAPITAL LETTER P -0x51 U+0051 #LATIN CAPITAL LETTER Q -0x52 U+0052 #LATIN CAPITAL LETTER R -0x53 U+0053 #LATIN CAPITAL LETTER S -0x54 U+0054 #LATIN CAPITAL LETTER T -0x55 U+0055 #LATIN CAPITAL LETTER U -0x56 U+0056 #LATIN CAPITAL LETTER V -0x57 U+0057 #LATIN CAPITAL LETTER W -0x58 U+0058 #LATIN CAPITAL LETTER X -0x59 U+0059 #LATIN CAPITAL LETTER Y -0x5a U+005a #LATIN CAPITAL LETTER Z -0x5b U+005b #LEFT SQUARE BRACKET -0x5c U+005c #REVERSE SOLIDUS -0x5d U+005d #RIGHT SQUARE BRACKET -0x5e U+005e #CIRCUMFLEX ACCENT -0x5f U+005f #LOW LINE -0x60 U+0060 #GRAVE ACCENT -0x61 U+0061 #LATIN SMALL LETTER A -0x62 U+0062 #LATIN SMALL LETTER B -0x63 U+0063 #LATIN SMALL LETTER C -0x64 U+0064 #LATIN SMALL LETTER D -0x65 U+0065 #LATIN SMALL LETTER E -0x66 U+0066 #LATIN SMALL LETTER F -0x67 U+0067 #LATIN SMALL LETTER G -0x68 U+0068 #LATIN SMALL LETTER H -0x69 U+0069 #LATIN SMALL LETTER I -0x6a U+006a #LATIN SMALL LETTER J -0x6b U+006b #LATIN SMALL LETTER K -0x6c U+006c #LATIN SMALL LETTER L -0x6d U+006d #LATIN SMALL LETTER M -0x6e U+006e #LATIN SMALL LETTER N -0x6f U+006f #LATIN SMALL LETTER O -0x70 U+0070 #LATIN SMALL LETTER P -0x71 U+0071 #LATIN SMALL LETTER Q -0x72 U+0072 #LATIN SMALL LETTER R -0x73 U+0073 #LATIN SMALL LETTER S -0x74 U+0074 #LATIN SMALL LETTER T -0x75 U+0075 #LATIN SMALL LETTER U -0x76 U+0076 #LATIN SMALL LETTER V -0x77 U+0077 #LATIN SMALL LETTER W -0x78 U+0078 #LATIN SMALL LETTER X -0x79 U+0079 #LATIN SMALL LETTER Y -0x7a U+007a #LATIN SMALL LETTER Z -0x7b U+007b #LEFT CURLY BRACKET -0x7c U+007c #VERTICAL LINE -0x7d U+007d #RIGHT CURLY BRACKET -0x7e U+007e #TILDE -#0x7f U+007f #DELETE +################## + +0x20-0x7e idem +# 0x80 U+0106 #LATIN CAPITAL LETTER C WITH ACUTE 0x81 U+00fc #LATIN SMALL LETTER U WITH DIAERESIS 0x82 U+00e9 #LATIN SMALL LETTER E WITH ACUTE diff --git a/src/chrtrans/cp850_uni.tbl b/src/chrtrans/cp850_uni.tbl index 05685971..af4dbff5 100644 --- a/src/chrtrans/cp850_uni.tbl +++ b/src/chrtrans/cp850_uni.tbl @@ -8,153 +8,29 @@ D0 Mcp850 #Name as a Display Charset (used on Options screen) -ODosLatin1 (cp850) +OWestern (cp850) # # Name: cp850_DOSLatin1 to Unicode table -# Unicode version: 1.1 -# Table version: 1.1 +# Unicode version: 2.0 +# Table version: 2.00 # Table format: Format A -# Date: 03/31/95 -# Authors: Michel Suignard <michelsu@microsoft.com> -# Lori Hoerth <lorih@microsoft.com> +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> # General notes: none # # Format: Three tab-separated columns # Column #1 is the cp850_DOSLatin1 code (in hex) -# Column #2 is the Unicode (in hex as U+XXXX) +# Column #2 is the Unicode (in hex as 0xXXXX) # Column #3 is the Unicode name (follows a comment sign, '#') # # The entries are in cp850_DOSLatin1 order # -#0x00 U+0000 #NULL -#0x01 U+0001 #START OF HEADING -#0x02 U+0002 #START OF TEXT -#0x03 U+0003 #END OF TEXT -#0x04 U+0004 #END OF TRANSMISSION -#0x05 U+0005 #ENQUIRY -#0x06 U+0006 #ACKNOWLEDGE -#0x07 U+0007 #BELL -#0x08 U+0008 #BACKSPACE -#0x09 U+0009 #HORIZONTAL TABULATION -#0x0a U+000a #LINE FEED -#0x0b U+000b #VERTICAL TABULATION -#0x0c U+000c #FORM FEED -#0x0d U+000d #CARRIAGE RETURN -#0x0e U+000e #SHIFT OUT -#0x0f U+000f #SHIFT IN -#0x10 U+0010 #DATA LINK ESCAPE -#0x11 U+0011 #DEVICE CONTROL ONE -#0x12 U+0012 #DEVICE CONTROL TWO -#0x13 U+0013 #DEVICE CONTROL THREE -#0x14 U+0014 #DEVICE CONTROL FOUR -#0x15 U+0015 #NEGATIVE ACKNOWLEDGE -#0x16 U+0016 #SYNCHRONOUS IDLE -#0x17 U+0017 #END OF TRANSMISSION BLOCK -#0x18 U+0018 #CANCEL -#0x19 U+0019 #END OF MEDIUM -#0x1a U+001a #SUBSTITUTE -#0x1b U+001b #ESCAPE -#0x1c U+001c #FILE SEPARATOR -#0x1d U+001d #GROUP SEPARATOR -#0x1e U+001e #RECORD SEPARATOR -#0x1f U+001f #UNIT SEPARATOR -0x20 U+0020 #SPACE -0x21 U+0021 #EXCLAMATION MARK -0x22 U+0022 #QUOTATION MARK -0x23 U+0023 #NUMBER SIGN -0x24 U+0024 #DOLLAR SIGN -0x25 U+0025 #PERCENT SIGN -0x26 U+0026 #AMPERSAND -0x27 U+0027 #APOSTROPHE -0x28 U+0028 #LEFT PARENTHESIS -0x29 U+0029 #RIGHT PARENTHESIS -0x2a U+002a #ASTERISK -0x2b U+002b #PLUS SIGN -0x2c U+002c #COMMA -0x2d U+002d #HYPHEN-MINUS -0x2e U+002e #FULL STOP -0x2f U+002f #SOLIDUS -0x30 U+0030 #DIGIT ZERO -0x31 U+0031 #DIGIT ONE -0x32 U+0032 #DIGIT TWO -0x33 U+0033 #DIGIT THREE -0x34 U+0034 #DIGIT FOUR -0x35 U+0035 #DIGIT FIVE -0x36 U+0036 #DIGIT SIX -0x37 U+0037 #DIGIT SEVEN -0x38 U+0038 #DIGIT EIGHT -0x39 U+0039 #DIGIT NINE -0x3a U+003a #COLON -0x3b U+003b #SEMICOLON -0x3c U+003c #LESS-THAN SIGN -0x3d U+003d #EQUALS SIGN -0x3e U+003e #GREATER-THAN SIGN -0x3f U+003f #QUESTION MARK -0x40 U+0040 #COMMERCIAL AT -0x41 U+0041 #LATIN CAPITAL LETTER A -0x42 U+0042 #LATIN CAPITAL LETTER B -0x43 U+0043 #LATIN CAPITAL LETTER C -0x44 U+0044 #LATIN CAPITAL LETTER D -0x45 U+0045 #LATIN CAPITAL LETTER E -0x46 U+0046 #LATIN CAPITAL LETTER F -0x47 U+0047 #LATIN CAPITAL LETTER G -0x48 U+0048 #LATIN CAPITAL LETTER H -0x49 U+0049 #LATIN CAPITAL LETTER I -0x4a U+004a #LATIN CAPITAL LETTER J -0x4b U+004b #LATIN CAPITAL LETTER K -0x4c U+004c #LATIN CAPITAL LETTER L -0x4d U+004d #LATIN CAPITAL LETTER M -0x4e U+004e #LATIN CAPITAL LETTER N -0x4f U+004f #LATIN CAPITAL LETTER O -0x50 U+0050 #LATIN CAPITAL LETTER P -0x51 U+0051 #LATIN CAPITAL LETTER Q -0x52 U+0052 #LATIN CAPITAL LETTER R -0x53 U+0053 #LATIN CAPITAL LETTER S -0x54 U+0054 #LATIN CAPITAL LETTER T -0x55 U+0055 #LATIN CAPITAL LETTER U -0x56 U+0056 #LATIN CAPITAL LETTER V -0x57 U+0057 #LATIN CAPITAL LETTER W -0x58 U+0058 #LATIN CAPITAL LETTER X -0x59 U+0059 #LATIN CAPITAL LETTER Y -0x5a U+005a #LATIN CAPITAL LETTER Z -0x5b U+005b #LEFT SQUARE BRACKET -0x5c U+005c #REVERSE SOLIDUS -0x5d U+005d #RIGHT SQUARE BRACKET -0x5e U+005e #CIRCUMFLEX ACCENT -0x5f U+005f #LOW LINE -0x60 U+0060 #GRAVE ACCENT -0x61 U+0061 #LATIN SMALL LETTER A -0x62 U+0062 #LATIN SMALL LETTER B -0x63 U+0063 #LATIN SMALL LETTER C -0x64 U+0064 #LATIN SMALL LETTER D -0x65 U+0065 #LATIN SMALL LETTER E -0x66 U+0066 #LATIN SMALL LETTER F -0x67 U+0067 #LATIN SMALL LETTER G -0x68 U+0068 #LATIN SMALL LETTER H -0x69 U+0069 #LATIN SMALL LETTER I -0x6a U+006a #LATIN SMALL LETTER J -0x6b U+006b #LATIN SMALL LETTER K -0x6c U+006c #LATIN SMALL LETTER L -0x6d U+006d #LATIN SMALL LETTER M -0x6e U+006e #LATIN SMALL LETTER N -0x6f U+006f #LATIN SMALL LETTER O -0x70 U+0070 #LATIN SMALL LETTER P -0x71 U+0071 #LATIN SMALL LETTER Q -0x72 U+0072 #LATIN SMALL LETTER R -0x73 U+0073 #LATIN SMALL LETTER S -0x74 U+0074 #LATIN SMALL LETTER T -0x75 U+0075 #LATIN SMALL LETTER U -0x76 U+0076 #LATIN SMALL LETTER V -0x77 U+0077 #LATIN SMALL LETTER W -0x78 U+0078 #LATIN SMALL LETTER X -0x79 U+0079 #LATIN SMALL LETTER Y -0x7a U+007a #LATIN SMALL LETTER Z -0x7b U+007b #LEFT CURLY BRACKET -0x7c U+007c #VERTICAL LINE -0x7d U+007d #RIGHT CURLY BRACKET -0x7e U+007e #TILDE -0x7f U+007f #DELETE +################## + +0x20-0x7e idem +# 0x80 U+00c7 #LATIN CAPITAL LETTER C WITH CEDILLA 0x81 U+00fc #LATIN SMALL LETTER U WITH DIAERESIS 0x82 U+00e9 #LATIN SMALL LETTER E WITH ACUTE diff --git a/src/chrtrans/cp852_uni.tbl b/src/chrtrans/cp852_uni.tbl index 0658d893..946b4996 100644 --- a/src/chrtrans/cp852_uni.tbl +++ b/src/chrtrans/cp852_uni.tbl @@ -2,7 +2,7 @@ Mcp852 #Name as a Display Charset (used on Options screen) -ODosLatin2 (cp852) +OEastern European (cp852) # # Name: cp852_DOSLatin2 to Unicode table @@ -21,260 +21,136 @@ ODosLatin2 (cp852) # # The entries are in cp852_DOSLatin2 order # -#0x00 U+0000 #NULL -#0x01 U+0001 #START OF HEADING -#0x02 U+0002 #START OF TEXT -#0x03 U+0003 #END OF TEXT -#0x04 U+0004 #END OF TRANSMISSION -#0x05 U+0005 #ENQUIRY -#0x06 U+0006 #ACKNOWLEDGE -#0x07 U+0007 #BELL -#0x08 U+0008 #BACKSPACE -#0x09 U+0009 #HORIZONTAL TABULATION -#0x0a U+000a #LINE FEED -#0x0b U+000b #VERTICAL TABULATION -#0x0c U+000c #FORM FEED -#0x0d U+000d #CARRIAGE RETURN -#0x0e U+000e #SHIFT OUT -#0x0f U+000f #SHIFT IN -#0x10 U+0010 #DATA LINK ESCAPE -#0x11 U+0011 #DEVICE CONTROL ONE -#0x12 U+0012 #DEVICE CONTROL TWO -#0x13 U+0013 #DEVICE CONTROL THREE -#0x14 U+0014 #DEVICE CONTROL FOUR -#0x15 U+0015 #NEGATIVE ACKNOWLEDGE -#0x16 U+0016 #SYNCHRONOUS IDLE -#0x17 U+0017 #END OF TRANSMISSION BLOCK -#0x18 U+0018 #CANCEL -#0x19 U+0019 #END OF MEDIUM -#0x1a U+001a #SUBSTITUTE -#0x1b U+001b #ESCAPE -#0x1c U+001c #FILE SEPARATOR -#0x1d U+001d #GROUP SEPARATOR -#0x1e U+001e #RECORD SEPARATOR -#0x1f U+001f #UNIT SEPARATOR -0x20 U+0020 #SPACE -0x21 U+0021 #EXCLAMATION MARK -0x22 U+0022 #QUOTATION MARK -0x23 U+0023 #NUMBER SIGN -0x24 U+0024 #DOLLAR SIGN -0x25 U+0025 #PERCENT SIGN -0x26 U+0026 #AMPERSAND -0x27 U+0027 #APOSTROPHE -0x28 U+0028 #LEFT PARENTHESIS -0x29 U+0029 #RIGHT PARENTHESIS -0x2a U+002a #ASTERISK -0x2b U+002b #PLUS SIGN -0x2c U+002c #COMMA -0x2d U+002d #HYPHEN-MINUS -0x2e U+002e #FULL STOP -0x2f U+002f #SOLIDUS -0x30 U+0030 #DIGIT ZERO -0x31 U+0031 #DIGIT ONE -0x32 U+0032 #DIGIT TWO -0x33 U+0033 #DIGIT THREE -0x34 U+0034 #DIGIT FOUR -0x35 U+0035 #DIGIT FIVE -0x36 U+0036 #DIGIT SIX -0x37 U+0037 #DIGIT SEVEN -0x38 U+0038 #DIGIT EIGHT -0x39 U+0039 #DIGIT NINE -0x3a U+003a #COLON -0x3b U+003b #SEMICOLON -0x3c U+003c #LESS-THAN SIGN -0x3d U+003d #EQUALS SIGN -0x3e U+003e #GREATER-THAN SIGN -0x3f U+003f #QUESTION MARK -0x40 U+0040 #COMMERCIAL AT -0x41 U+0041 #LATIN CAPITAL LETTER A -0x42 U+0042 #LATIN CAPITAL LETTER B -0x43 U+0043 #LATIN CAPITAL LETTER C -0x44 U+0044 #LATIN CAPITAL LETTER D -0x45 U+0045 #LATIN CAPITAL LETTER E -0x46 U+0046 #LATIN CAPITAL LETTER F -0x47 U+0047 #LATIN CAPITAL LETTER G -0x48 U+0048 #LATIN CAPITAL LETTER H -0x49 U+0049 #LATIN CAPITAL LETTER I -0x4a U+004a #LATIN CAPITAL LETTER J -0x4b U+004b #LATIN CAPITAL LETTER K -0x4c U+004c #LATIN CAPITAL LETTER L -0x4d U+004d #LATIN CAPITAL LETTER M -0x4e U+004e #LATIN CAPITAL LETTER N -0x4f U+004f #LATIN CAPITAL LETTER O -0x50 U+0050 #LATIN CAPITAL LETTER P -0x51 U+0051 #LATIN CAPITAL LETTER Q -0x52 U+0052 #LATIN CAPITAL LETTER R -0x53 U+0053 #LATIN CAPITAL LETTER S -0x54 U+0054 #LATIN CAPITAL LETTER T -0x55 U+0055 #LATIN CAPITAL LETTER U -0x56 U+0056 #LATIN CAPITAL LETTER V -0x57 U+0057 #LATIN CAPITAL LETTER W -0x58 U+0058 #LATIN CAPITAL LETTER X -0x59 U+0059 #LATIN CAPITAL LETTER Y -0x5a U+005a #LATIN CAPITAL LETTER Z -0x5b U+005b #LEFT SQUARE BRACKET -0x5c U+005c #REVERSE SOLIDUS -0x5d U+005d #RIGHT SQUARE BRACKET -0x5e U+005e #CIRCUMFLEX ACCENT -0x5f U+005f #LOW LINE -0x60 U+0060 #GRAVE ACCENT -0x61 U+0061 #LATIN SMALL LETTER A -0x62 U+0062 #LATIN SMALL LETTER B -0x63 U+0063 #LATIN SMALL LETTER C -0x64 U+0064 #LATIN SMALL LETTER D -0x65 U+0065 #LATIN SMALL LETTER E -0x66 U+0066 #LATIN SMALL LETTER F -0x67 U+0067 #LATIN SMALL LETTER G -0x68 U+0068 #LATIN SMALL LETTER H -0x69 U+0069 #LATIN SMALL LETTER I -0x6a U+006a #LATIN SMALL LETTER J -0x6b U+006b #LATIN SMALL LETTER K -0x6c U+006c #LATIN SMALL LETTER L -0x6d U+006d #LATIN SMALL LETTER M -0x6e U+006e #LATIN SMALL LETTER N -0x6f U+006f #LATIN SMALL LETTER O -0x70 U+0070 #LATIN SMALL LETTER P -0x71 U+0071 #LATIN SMALL LETTER Q -0x72 U+0072 #LATIN SMALL LETTER R -0x73 U+0073 #LATIN SMALL LETTER S -0x74 U+0074 #LATIN SMALL LETTER T -0x75 U+0075 #LATIN SMALL LETTER U -0x76 U+0076 #LATIN SMALL LETTER V -0x77 U+0077 #LATIN SMALL LETTER W -0x78 U+0078 #LATIN SMALL LETTER X -0x79 U+0079 #LATIN SMALL LETTER Y -0x7a U+007a #LATIN SMALL LETTER Z -0x7b U+007b #LEFT CURLY BRACKET -0x7c U+007c #VERTICAL LINE -0x7d U+007d #RIGHT CURLY BRACKET -0x7e U+007e #TILDE -#0x7f U+007f #DELETE -0x80 U+00c7 #LATIN CAPITAL LETTER C WITH CEDILLA -0x81 U+00fc #LATIN SMALL LETTER U WITH DIAERESIS -0x82 U+00e9 #LATIN SMALL LETTER E WITH ACUTE -0x83 U+00e2 #LATIN SMALL LETTER A WITH CIRCUMFLEX -0x84 U+00e4 #LATIN SMALL LETTER A WITH DIAERESIS -0x85 U+016f #LATIN SMALL LETTER U WITH RING ABOVE -0x86 U+0107 #LATIN SMALL LETTER C WITH ACUTE -0x87 U+00e7 #LATIN SMALL LETTER C WITH CEDILLA -0x88 U+0142 #LATIN SMALL LETTER L WITH STROKE -0x89 U+00eb #LATIN SMALL LETTER E WITH DIAERESIS -0x8a U+0150 #LATIN CAPITAL LETTER O WITH DOUBLE ACUTE -0x8b U+0151 #LATIN SMALL LETTER O WITH DOUBLE ACUTE -0x8c U+00ee #LATIN SMALL LETTER I WITH CIRCUMFLEX -0x8d U+0179 #LATIN CAPITAL LETTER Z WITH ACUTE -0x8e U+00c4 #LATIN CAPITAL LETTER A WITH DIAERESIS -0x8f U+0106 #LATIN CAPITAL LETTER C WITH ACUTE -0x90 U+00c9 #LATIN CAPITAL LETTER E WITH ACUTE -0x91 U+0139 #LATIN CAPITAL LETTER L WITH ACUTE -0x92 U+013a #LATIN SMALL LETTER L WITH ACUTE -0x93 U+00f4 #LATIN SMALL LETTER O WITH CIRCUMFLEX -0x94 U+00f6 #LATIN SMALL LETTER O WITH DIAERESIS -0x95 U+013d #LATIN CAPITAL LETTER L WITH CARON -0x96 U+013e #LATIN SMALL LETTER L WITH CARON -0x97 U+015a #LATIN CAPITAL LETTER S WITH ACUTE -0x98 U+015b #LATIN SMALL LETTER S WITH ACUTE -0x99 U+00d6 #LATIN CAPITAL LETTER O WITH DIAERESIS -0x9a U+00dc #LATIN CAPITAL LETTER U WITH DIAERESIS -0x9b U+0164 #LATIN CAPITAL LETTER T WITH CARON -0x9c U+0165 #LATIN SMALL LETTER T WITH CARON -0x9d U+0141 #LATIN CAPITAL LETTER L WITH STROKE -0x9e U+00d7 #MULTIPLICATION SIGN -0x9f U+010d #LATIN SMALL LETTER C WITH CARON -0xa0 U+00e1 #LATIN SMALL LETTER A WITH ACUTE -0xa1 U+00ed #LATIN SMALL LETTER I WITH ACUTE -0xa2 U+00f3 #LATIN SMALL LETTER O WITH ACUTE -0xa3 U+00fa #LATIN SMALL LETTER U WITH ACUTE -0xa4 U+0104 #LATIN CAPITAL LETTER A WITH OGONEK -0xa5 U+0105 #LATIN SMALL LETTER A WITH OGONEK -0xa6 U+017d #LATIN CAPITAL LETTER Z WITH CARON -0xa7 U+017e #LATIN SMALL LETTER Z WITH CARON -0xa8 U+0118 #LATIN CAPITAL LETTER E WITH OGONEK -0xa9 U+0119 #LATIN SMALL LETTER E WITH OGONEK -0xaa U+00ac #NOT SIGN -0xab U+017a #LATIN SMALL LETTER Z WITH ACUTE -0xac U+010c #LATIN CAPITAL LETTER C WITH CARON -0xad U+015f #LATIN SMALL LETTER S WITH CEDILLA -0xae U+00ab #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK -0xaf U+00bb #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -0xb0 U+2591 #LIGHT SHADE -0xb1 U+2592 #MEDIUM SHADE -0xb2 U+2593 #DARK SHADE -0xb3 U+2502 #BOX DRAWINGS LIGHT VERTICAL -0xb4 U+2524 #BOX DRAWINGS LIGHT VERTICAL AND LEFT -0xb5 U+00c1 #LATIN CAPITAL LETTER A WITH ACUTE -0xb6 U+00c2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX -0xb7 U+011a #LATIN CAPITAL LETTER E WITH CARON -0xb8 U+015e #LATIN CAPITAL LETTER S WITH CEDILLA -0xb9 U+2563 #BOX DRAWINGS DOUBLE VERTICAL AND LEFT -0xba U+2551 #BOX DRAWINGS DOUBLE VERTICAL -0xbb U+2557 #BOX DRAWINGS DOUBLE DOWN AND LEFT -0xbc U+255d #BOX DRAWINGS DOUBLE UP AND LEFT -0xbd U+017b #LATIN CAPITAL LETTER Z WITH DOT ABOVE -0xbe U+017c #LATIN SMALL LETTER Z WITH DOT ABOVE -0xbf U+2510 #BOX DRAWINGS LIGHT DOWN AND LEFT -0xc0 U+2514 #BOX DRAWINGS LIGHT UP AND RIGHT -0xc1 U+2534 #BOX DRAWINGS LIGHT UP AND HORIZONTAL -0xc2 U+252c #BOX DRAWINGS LIGHT DOWN AND HORIZONTAL -0xc3 U+251c #BOX DRAWINGS LIGHT VERTICAL AND RIGHT -0xc4 U+2500 #BOX DRAWINGS LIGHT HORIZONTAL -0xc5 U+253c #BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL -0xc6 U+0102 #LATIN CAPITAL LETTER A WITH BREVE -0xc7 U+0103 #LATIN SMALL LETTER A WITH BREVE -0xc8 U+255a #BOX DRAWINGS DOUBLE UP AND RIGHT -0xc9 U+2554 #BOX DRAWINGS DOUBLE DOWN AND RIGHT -0xca U+2569 #BOX DRAWINGS DOUBLE UP AND HORIZONTAL -0xcb U+2566 #BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL -0xcc U+2560 #BOX DRAWINGS DOUBLE VERTICAL AND RIGHT -0xcd U+2550 #BOX DRAWINGS DOUBLE HORIZONTAL -0xce U+256c #BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL -0xcf U+00a4 #CURRENCY SIGN -0xd0 U+0111 #LATIN SMALL LETTER D WITH STROKE -0xd1 U+0110 #LATIN CAPITAL LETTER D WITH STROKE -0xd2 U+010e #LATIN CAPITAL LETTER D WITH CARON -0xd3 U+00cb #LATIN CAPITAL LETTER E WITH DIAERESIS -0xd4 U+010f #LATIN SMALL LETTER D WITH CARON -0xd5 U+0147 #LATIN CAPITAL LETTER N WITH CARON -0xd6 U+00cd #LATIN CAPITAL LETTER I WITH ACUTE -0xd7 U+00ce #LATIN CAPITAL LETTER I WITH CIRCUMFLEX -0xd8 U+011b #LATIN SMALL LETTER E WITH CARON -0xd9 U+2518 #BOX DRAWINGS LIGHT UP AND LEFT -0xda U+250c #BOX DRAWINGS LIGHT DOWN AND RIGHT -0xdb U+2588 #FULL BLOCK -0xdc U+2584 #LOWER HALF BLOCK -0xdd U+0162 #LATIN CAPITAL LETTER T WITH CEDILLA -0xde U+016e #LATIN CAPITAL LETTER U WITH RING ABOVE -0xdf U+2580 #UPPER HALF BLOCK -0xe0 U+00d3 #LATIN CAPITAL LETTER O WITH ACUTE -0xe1 U+00df #LATIN SMALL LETTER SHARP S -0xe2 U+00d4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX -0xe3 U+0143 #LATIN CAPITAL LETTER N WITH ACUTE -0xe4 U+0144 #LATIN SMALL LETTER N WITH ACUTE -0xe5 U+0148 #LATIN SMALL LETTER N WITH CARON -0xe6 U+0160 #LATIN CAPITAL LETTER S WITH CARON -0xe7 U+0161 #LATIN SMALL LETTER S WITH CARON -0xe8 U+0154 #LATIN CAPITAL LETTER R WITH ACUTE -0xe9 U+00da #LATIN CAPITAL LETTER U WITH ACUTE -0xea U+0155 #LATIN SMALL LETTER R WITH ACUTE -0xeb U+0170 #LATIN CAPITAL LETTER U WITH DOUBLE ACUTE -0xec U+00fd #LATIN SMALL LETTER Y WITH ACUTE -0xed U+00dd #LATIN CAPITAL LETTER Y WITH ACUTE -0xee U+0163 #LATIN SMALL LETTER T WITH CEDILLA -0xef U+00b4 #ACUTE ACCENT -0xf0 U+00ad #SOFT HYPHEN -0xf1 U+02dd #DOUBLE ACUTE ACCENT -0xf2 U+02db #OGONEK -0xf3 U+02c7 #CARON -0xf4 U+02d8 #BREVE -0xf5 U+00a7 #SECTION SIGN -0xf6 U+00f7 #DIVISION SIGN -0xf7 U+00b8 #CEDILLA -0xf8 U+00b0 #DEGREE SIGN -0xf9 U+00a8 #DIAERESIS -0xfa U+02d9 #DOT ABOVE -0xfb U+0171 #LATIN SMALL LETTER U WITH DOUBLE ACUTE -0xfc U+0158 #LATIN CAPITAL LETTER R WITH CARON -0xfd U+0159 #LATIN SMALL LETTER R WITH CARON -0xfe U+25a0 #BLACK SQUARE -0xff U+00a0 #NO-BREAK SPACE +################## + +0x20-0x7e idem +# +0x80 U+00c7 #LATIN CAPITAL LETTER C WITH CEDILLA +0x81 U+00fc #LATIN SMALL LETTER U WITH DIAERESIS +0x82 U+00e9 #LATIN SMALL LETTER E WITH ACUTE +0x83 U+00e2 #LATIN SMALL LETTER A WITH CIRCUMFLEX +0x84 U+00e4 #LATIN SMALL LETTER A WITH DIAERESIS +0x85 U+016f #LATIN SMALL LETTER U WITH RING ABOVE +0x86 U+0107 #LATIN SMALL LETTER C WITH ACUTE +0x87 U+00e7 #LATIN SMALL LETTER C WITH CEDILLA +0x88 U+0142 #LATIN SMALL LETTER L WITH STROKE +0x89 U+00eb #LATIN SMALL LETTER E WITH DIAERESIS +0x8a U+0150 #LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0x8b U+0151 #LATIN SMALL LETTER O WITH DOUBLE ACUTE +0x8c U+00ee #LATIN SMALL LETTER I WITH CIRCUMFLEX +0x8d U+0179 #LATIN CAPITAL LETTER Z WITH ACUTE +0x8e U+00c4 #LATIN CAPITAL LETTER A WITH DIAERESIS +0x8f U+0106 #LATIN CAPITAL LETTER C WITH ACUTE +0x90 U+00c9 #LATIN CAPITAL LETTER E WITH ACUTE +0x91 U+0139 #LATIN CAPITAL LETTER L WITH ACUTE +0x92 U+013a #LATIN SMALL LETTER L WITH ACUTE +0x93 U+00f4 #LATIN SMALL LETTER O WITH CIRCUMFLEX +0x94 U+00f6 #LATIN SMALL LETTER O WITH DIAERESIS +0x95 U+013d #LATIN CAPITAL LETTER L WITH CARON +0x96 U+013e #LATIN SMALL LETTER L WITH CARON +0x97 U+015a #LATIN CAPITAL LETTER S WITH ACUTE +0x98 U+015b #LATIN SMALL LETTER S WITH ACUTE +0x99 U+00d6 #LATIN CAPITAL LETTER O WITH DIAERESIS +0x9a U+00dc #LATIN CAPITAL LETTER U WITH DIAERESIS +0x9b U+0164 #LATIN CAPITAL LETTER T WITH CARON +0x9c U+0165 #LATIN SMALL LETTER T WITH CARON +0x9d U+0141 #LATIN CAPITAL LETTER L WITH STROKE +0x9e U+00d7 #MULTIPLICATION SIGN +0x9f U+010d #LATIN SMALL LETTER C WITH CARON +0xa0 U+00e1 #LATIN SMALL LETTER A WITH ACUTE +0xa1 U+00ed #LATIN SMALL LETTER I WITH ACUTE +0xa2 U+00f3 #LATIN SMALL LETTER O WITH ACUTE +0xa3 U+00fa #LATIN SMALL LETTER U WITH ACUTE +0xa4 U+0104 #LATIN CAPITAL LETTER A WITH OGONEK +0xa5 U+0105 #LATIN SMALL LETTER A WITH OGONEK +0xa6 U+017d #LATIN CAPITAL LETTER Z WITH CARON +0xa7 U+017e #LATIN SMALL LETTER Z WITH CARON +0xa8 U+0118 #LATIN CAPITAL LETTER E WITH OGONEK +0xa9 U+0119 #LATIN SMALL LETTER E WITH OGONEK +0xaa U+00ac #NOT SIGN +0xab U+017a #LATIN SMALL LETTER Z WITH ACUTE +0xac U+010c #LATIN CAPITAL LETTER C WITH CARON +0xad U+015f #LATIN SMALL LETTER S WITH CEDILLA +0xae U+00ab #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xaf U+00bb #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xb0 U+2591 #LIGHT SHADE +0xb1 U+2592 #MEDIUM SHADE +0xb2 U+2593 #DARK SHADE +0xb3 U+2502 #BOX DRAWINGS LIGHT VERTICAL +0xb4 U+2524 #BOX DRAWINGS LIGHT VERTICAL AND LEFT +0xb5 U+00c1 #LATIN CAPITAL LETTER A WITH ACUTE +0xb6 U+00c2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xb7 U+011a #LATIN CAPITAL LETTER E WITH CARON +0xb8 U+015e #LATIN CAPITAL LETTER S WITH CEDILLA +0xb9 U+2563 #BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0xba U+2551 #BOX DRAWINGS DOUBLE VERTICAL +0xbb U+2557 #BOX DRAWINGS DOUBLE DOWN AND LEFT +0xbc U+255d #BOX DRAWINGS DOUBLE UP AND LEFT +0xbd U+017b #LATIN CAPITAL LETTER Z WITH DOT ABOVE +0xbe U+017c #LATIN SMALL LETTER Z WITH DOT ABOVE +0xbf U+2510 #BOX DRAWINGS LIGHT DOWN AND LEFT +0xc0 U+2514 #BOX DRAWINGS LIGHT UP AND RIGHT +0xc1 U+2534 #BOX DRAWINGS LIGHT UP AND HORIZONTAL +0xc2 U+252c #BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0xc3 U+251c #BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0xc4 U+2500 #BOX DRAWINGS LIGHT HORIZONTAL +0xc5 U+253c #BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0xc6 U+0102 #LATIN CAPITAL LETTER A WITH BREVE +0xc7 U+0103 #LATIN SMALL LETTER A WITH BREVE +0xc8 U+255a #BOX DRAWINGS DOUBLE UP AND RIGHT +0xc9 U+2554 #BOX DRAWINGS DOUBLE DOWN AND RIGHT +0xca U+2569 #BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xcb U+2566 #BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xcc U+2560 #BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xcd U+2550 #BOX DRAWINGS DOUBLE HORIZONTAL +0xce U+256c #BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xcf U+00a4 #CURRENCY SIGN +0xd0 U+0111 #LATIN SMALL LETTER D WITH STROKE +0xd1 U+0110 #LATIN CAPITAL LETTER D WITH STROKE +0xd2 U+010e #LATIN CAPITAL LETTER D WITH CARON +0xd3 U+00cb #LATIN CAPITAL LETTER E WITH DIAERESIS +0xd4 U+010f #LATIN SMALL LETTER D WITH CARON +0xd5 U+0147 #LATIN CAPITAL LETTER N WITH CARON +0xd6 U+00cd #LATIN CAPITAL LETTER I WITH ACUTE +0xd7 U+00ce #LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xd8 U+011b #LATIN SMALL LETTER E WITH CARON +0xd9 U+2518 #BOX DRAWINGS LIGHT UP AND LEFT +0xda U+250c #BOX DRAWINGS LIGHT DOWN AND RIGHT +0xdb U+2588 #FULL BLOCK +0xdc U+2584 #LOWER HALF BLOCK +0xdd U+0162 #LATIN CAPITAL LETTER T WITH CEDILLA +0xde U+016e #LATIN CAPITAL LETTER U WITH RING ABOVE +0xdf U+2580 #UPPER HALF BLOCK +0xe0 U+00d3 #LATIN CAPITAL LETTER O WITH ACUTE +0xe1 U+00df #LATIN SMALL LETTER SHARP S +0xe2 U+00d4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xe3 U+0143 #LATIN CAPITAL LETTER N WITH ACUTE +0xe4 U+0144 #LATIN SMALL LETTER N WITH ACUTE +0xe5 U+0148 #LATIN SMALL LETTER N WITH CARON +0xe6 U+0160 #LATIN CAPITAL LETTER S WITH CARON +0xe7 U+0161 #LATIN SMALL LETTER S WITH CARON +0xe8 U+0154 #LATIN CAPITAL LETTER R WITH ACUTE +0xe9 U+00da #LATIN CAPITAL LETTER U WITH ACUTE +0xea U+0155 #LATIN SMALL LETTER R WITH ACUTE +0xeb U+0170 #LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0xec U+00fd #LATIN SMALL LETTER Y WITH ACUTE +0xed U+00dd #LATIN CAPITAL LETTER Y WITH ACUTE +0xee U+0163 #LATIN SMALL LETTER T WITH CEDILLA +0xef U+00b4 #ACUTE ACCENT +0xf0 U+00ad #SOFT HYPHEN +0xf1 U+02dd #DOUBLE ACUTE ACCENT +0xf2 U+02db #OGONEK +0xf3 U+02c7 #CARON +0xf4 U+02d8 #BREVE +0xf5 U+00a7 #SECTION SIGN +0xf6 U+00f7 #DIVISION SIGN +0xf7 U+00b8 #CEDILLA +0xf8 U+00b0 #DEGREE SIGN +0xf9 U+00a8 #DIAERESIS +0xfa U+02d9 #DOT ABOVE +0xfb U+0171 #LATIN SMALL LETTER U WITH DOUBLE ACUTE +0xfc U+0158 #LATIN CAPITAL LETTER R WITH CARON +0xfd U+0159 #LATIN SMALL LETTER R WITH CARON +0xfe U+25a0 #BLACK SQUARE +0xff U+00a0 #NO-BREAK SPACE diff --git a/src/chrtrans/cp862_uni.tbl b/src/chrtrans/cp862_uni.tbl index 3d21c138..60d9692e 100644 --- a/src/chrtrans/cp862_uni.tbl +++ b/src/chrtrans/cp862_uni.tbl @@ -2,7 +2,7 @@ Mcp862 #Name as a Display Charset (used on Options screen). -ODosHebrew (cp862) +OHebrew (cp862) # Name: cp862_DOSHebrew to Unicode table # Unicode version: 2.0 diff --git a/src/chrtrans/cp864_uni.tbl b/src/chrtrans/cp864_uni.tbl index d6e92431..8411f8b7 100644 --- a/src/chrtrans/cp864_uni.tbl +++ b/src/chrtrans/cp864_uni.tbl @@ -2,7 +2,7 @@ Mcp864 #Name as a Display Charset (used on Options screen). -ODosArabic (cp864) +OArabic (cp864) # Name: cp864_DOSArabic to Unicode table # Unicode version: 2.0 diff --git a/src/chrtrans/cp866_uni.tbl b/src/chrtrans/cp866_uni.tbl index 2d02ce1b..55ce0718 100644 --- a/src/chrtrans/cp866_uni.tbl +++ b/src/chrtrans/cp866_uni.tbl @@ -3,7 +3,7 @@ Mcp866 #Name as a Display Charset (used on Options screen) -ODosCyrillic (cp866) +OCyrillic (cp866) # # Name: cp866_DOSCyrillicRussian to Unicode table # Unicode version: 2.0 diff --git a/src/chrtrans/cp869_uni.tbl b/src/chrtrans/cp869_uni.tbl index 21cdeb95..1f418728 100644 --- a/src/chrtrans/cp869_uni.tbl +++ b/src/chrtrans/cp869_uni.tbl @@ -2,7 +2,7 @@ Mcp869 #Name as a Display Charset (used on Options screen) -ODosGreek2 (cp869) +OGreek2 (cp869) # Name: cp869_DOSGreek2 to Unicode table # Unicode version: 2.0 diff --git a/src/chrtrans/def7_uni.tbl b/src/chrtrans/def7_uni.tbl index 66a63f76..bd6b1b1c 100644 --- a/src/chrtrans/def7_uni.tbl +++ b/src/chrtrans/def7_uni.tbl @@ -1350,13 +1350,13 @@ U+208c:_= U+208d:( U+208e:) # Old euro currency sign glyph: -U+20A0:CE +#U+20A0:CE U+20a3:Ff U+20a4:Li U+20a7:Pt U+20a9:W= -# New euro currency sign glyph ? -# U+20AC:EUR +# New euro currency sign glyph: +U+20AC:EUR U+2103:oC U+2105:c/o U+2109:oF @@ -1754,6 +1754,7 @@ U+266e:Mx U+266f:# 0x58 U+2713 U+2717 # check marks -> x U+2720:-X +# CJK area: 0x20 U+3000 # ideographic space U+3001:,_ U+3002:._ @@ -2014,6 +2015,17 @@ U+3229:10c U+327f:KSC U+33c2:am U+33d8:pm +# +# +#There are four special ranges of characters that are represented only by +#their start and end characters <...> +# +# The CJK Ideographs Area (U+4E00 - U+9FFF) +# The Hangul Syllables Area (U+AC00 - U+D7A3) +# The Surrogates Area (U+D800 - U+DFFF) +# The Private Use Area (U+E000 - U+F8FF) +# +# U+fb00:ff U+fb01:fi U+fb02:fl @@ -2172,12 +2184,12 @@ U+001d:GS U+001e:RS U+001f:US U+007f:DT -U+0080:PA -U+0081:HO -# Most of these characters (82-9F) may be inflicted on us +# Most of these characters (80-9F) may be inflicted on us # by MS FrontPages which uses Unicode notation such as ™ # but there are no assigned letters in Unicode 128-159 range. # It is assumed in the code that those codepoints are from windows-1252. +#U+0080:PA +#U+0081:HO #U+0082:BH #U+0083:NH #U+0084:IN @@ -2189,10 +2201,10 @@ U+0081:HO #U+008a:VS #U+008b:PD #U+008c:PU -U+008d:RI -U+008e:SS2 -U+008f:SS3 -U+0090:DCS +#U+008d:RI +#U+008e:SS2 +#U+008f:SS3 +#U+0090:DCS #U+0091:P1 #U+0092:P2 #U+0093:TS @@ -2202,15 +2214,13 @@ U+0090:DCS #U+0097:EG #U+0098:SS #U+0099:GC -U+009a:SC +#U+009a:SC #U+009b:CSI #U+009c:ST #U+009d:OC #U+009e:PM #U+009f:AC -# Characters in Private Use Area (e000-f8ff) do not have ussigned numbers. - # Let's try to show a question mark for character that cannot # be shown. U+fffd is used for invalid characters. # It works, but let's stick with UHHH representatiion. - FM diff --git a/src/chrtrans/iso01_uni.tbl b/src/chrtrans/iso01_uni.tbl index f792164d..5b697e02 100644 --- a/src/chrtrans/iso01_uni.tbl +++ b/src/chrtrans/iso01_uni.tbl @@ -8,7 +8,7 @@ D0 Miso-8859-1 #Name as a Display Charset (used on Options screen) -OISO Latin 1 +OWestern (ISO-8859-1) # # Name: ISO 8859-1 (1987) to Unicode diff --git a/src/chrtrans/iso02_uni.tbl b/src/chrtrans/iso02_uni.tbl index af97bc55..7fa0df92 100644 --- a/src/chrtrans/iso02_uni.tbl +++ b/src/chrtrans/iso02_uni.tbl @@ -2,7 +2,7 @@ Miso-8859-2 #Name as a Display Charset (used on Options screen) -OISO Latin 2 +OEastern European (ISO-8859-2) # # Name: ISO 8859-2 (1987) to Unicode diff --git a/src/chrtrans/iso03_uni.tbl b/src/chrtrans/iso03_uni.tbl index bb8cd90f..a3c1f07a 100644 --- a/src/chrtrans/iso03_uni.tbl +++ b/src/chrtrans/iso03_uni.tbl @@ -2,7 +2,7 @@ Miso-8859-3 #Name as a Display Charset (used on Options screen) -OISO Latin 3 +OLatin 3 (ISO-8859-3) # # Name: ISO 8859-3 (1988) to Unicode diff --git a/src/chrtrans/iso04_uni.tbl b/src/chrtrans/iso04_uni.tbl index 3f54afda..29be0be5 100644 --- a/src/chrtrans/iso04_uni.tbl +++ b/src/chrtrans/iso04_uni.tbl @@ -2,7 +2,7 @@ Miso-8859-4 #Name as a Display Charset (used on Options screen) -OISO Latin 4 +OLatin 4 (ISO-8859-4) # # Name: ISO 8859-4 (1988) to Unicode diff --git a/src/chrtrans/iso05_uni.tbl b/src/chrtrans/iso05_uni.tbl index 40cdc24d..a715b64f 100644 --- a/src/chrtrans/iso05_uni.tbl +++ b/src/chrtrans/iso05_uni.tbl @@ -2,7 +2,7 @@ Miso-8859-5 #Name as a Display Charset (used on Options screen) -OISO 8859-5 Cyrillic +OCyrillic (ISO-8859-5) # # Name: ISO 8859-5 (1988) to Unicode diff --git a/src/chrtrans/iso06_uni.tbl b/src/chrtrans/iso06_uni.tbl index c9418864..549b592d 100644 --- a/src/chrtrans/iso06_uni.tbl +++ b/src/chrtrans/iso06_uni.tbl @@ -2,7 +2,7 @@ Miso-8859-6 #Name as a Display Charset (used on Options screen). -OISO 8859-6 Arabic +OArabic (ISO-8859-6) # # Name: ISO 8859-6 (1987) to Unicode diff --git a/src/chrtrans/iso07_uni.tbl b/src/chrtrans/iso07_uni.tbl index 368209bf..dffca758 100644 --- a/src/chrtrans/iso07_uni.tbl +++ b/src/chrtrans/iso07_uni.tbl @@ -2,7 +2,7 @@ Miso-8859-7 #Name as a Display Charset (used on Options screen) -OISO 8859-7 Greek +OGreek (ISO-8859-7) # # Name: ISO 8859-7 (1987) to Unicode diff --git a/src/chrtrans/iso08_uni.tbl b/src/chrtrans/iso08_uni.tbl index 4d83f5c4..050be29d 100644 --- a/src/chrtrans/iso08_uni.tbl +++ b/src/chrtrans/iso08_uni.tbl @@ -2,7 +2,7 @@ Miso-8859-8 #Name as a Display Charset (used on Options screen). -OISO 8859-8 Hebrew +OHebrew (ISO-8859-8) # # Name: ISO 8859-8 (1988) to Unicode diff --git a/src/chrtrans/iso09_uni.tbl b/src/chrtrans/iso09_uni.tbl index 1b204835..5abe799f 100644 --- a/src/chrtrans/iso09_uni.tbl +++ b/src/chrtrans/iso09_uni.tbl @@ -2,7 +2,7 @@ Miso-8859-9 #Name as a Display Charset (used on Options screen) -OISO 8859-9 (Latin 5) +OTurkish (ISO-8859-9) # # Name: ISO 8859-9 (1989) to Unicode diff --git a/src/chrtrans/iso10_uni.tbl b/src/chrtrans/iso10_uni.tbl index be3c02cd..ab8128f0 100644 --- a/src/chrtrans/iso10_uni.tbl +++ b/src/chrtrans/iso10_uni.tbl @@ -10,7 +10,7 @@ D0 Miso-8859-10 #Name as a Display Charset (used on Options screen) -OISO 8859-10 +OISO-8859-10 0x20-0x7e idem #0x7f U+2302 diff --git a/src/chrtrans/koi8r_uni.tbl b/src/chrtrans/koi8r_uni.tbl index 09e8743c..69eef3c2 100644 --- a/src/chrtrans/koi8r_uni.tbl +++ b/src/chrtrans/koi8r_uni.tbl @@ -1,5 +1,5 @@ # Options screen name for this character set -OKOI8-R Cyrillic +OCyrillic (KOI8-R) # MIME name for this charset Mkoi8-r diff --git a/src/chrtrans/makefile.dos b/src/chrtrans/makefile.dos index 6871b87a..57c45dfc 100644 --- a/src/chrtrans/makefile.dos +++ b/src/chrtrans/makefile.dos @@ -13,7 +13,9 @@ CFLAGS = $(MCFLAGS) CC = gcc MCFLAGS = -O3 -DDOSPATH -DNO_TTYTYP \ --I../../WWW/library/implement -I../../djgpp/tcplib/include \ +-I. \ +-I../../WWW/library/implement \ +-I../../djgpp/tcplib/include \ -I../../djgpp/tcplib/include/tcp .SUFFIXES: .tbl @@ -81,6 +83,7 @@ cp1256_uni.h: cp1256_uni.tbl makeuctb.exe cp1257_uni.h: cp1257_uni.tbl makeuctb.exe cp437_uni.h: cp437_uni.tbl makeuctb.exe cp737_uni.h: cp737_uni.tbl makeuctb.exe +cp775_uni.h: cp775_uni.tbl makeuctb.exe cp850_uni.h: cp850_uni.tbl makeuctb.exe cp852_uni.h: cp852_uni.tbl makeuctb.exe cp862_uni.h: cp862_uni.tbl makeuctb.exe diff --git a/src/chrtrans/makefile.in b/src/chrtrans/makefile.in index 7cb02699..31dd3bc0 100644 --- a/src/chrtrans/makefile.in +++ b/src/chrtrans/makefile.in @@ -14,6 +14,13 @@ top_srcdir = @top_srcdir@ srcdir = @srcdir@ VPATH = $(srcdir) +top_builddir = ../.. + +LIBS = @LIBS@ $(SITE_LIBS) +LDFLAGS = @LDFLAGS@ + +INTLLIB = @INTLDIR_MAKE@@INTLLIBS@ + WWWINC = WWW/Library/Implementation SITE_DEFS = # FIXME: set in parent makefile @@ -21,7 +28,8 @@ CC = @CC@ CPP = @CPP@ CFLAGS = @CFLAGS@ CPP_OPTS = @DEFS@ @CPPFLAGS@ \ - -I../.. \ + -I$(top_builddir) \ + -I$(top_srcdir)/intl \ -I$(top_srcdir)/src \ -I$(top_srcdir)/src/chrtrans \ -I$(top_srcdir)/$(WWWINC) \ @@ -79,7 +87,7 @@ default: $(FONTMAP_INC) tables: $(TABLES) makeuctb$x: makeuctb.o - $(CC) $(CC_OPTS) -o $@ makeuctb.o + $(CC) $(CC_OPTS) -o $@ makeuctb.o $(INTLLIB) $(LIBS) makeuctb.o: $(srcdir)/UCkd.h $(srcdir)/makeuctb.c @@ -107,6 +115,7 @@ cp1256_uni.h: $(srcdir)/cp1256_uni.tbl makeuctb$x cp1257_uni.h: $(srcdir)/cp1257_uni.tbl makeuctb$x cp437_uni.h: $(srcdir)/cp437_uni.tbl makeuctb$x cp737_uni.h: $(srcdir)/cp737_uni.tbl makeuctb$x +cp775_uni.h: $(srcdir)/cp775_uni.tbl makeuctb$x cp850_uni.h: $(srcdir)/cp850_uni.tbl makeuctb$x cp852_uni.h: $(srcdir)/cp852_uni.tbl makeuctb$x cp862_uni.h: $(srcdir)/cp862_uni.tbl makeuctb$x diff --git a/src/chrtrans/makeuctb.c b/src/chrtrans/makeuctb.c index 874d971f..cc721723 100644 --- a/src/chrtrans/makeuctb.c +++ b/src/chrtrans/makeuctb.c @@ -16,13 +16,7 @@ * version 2, or at your option any later version. */ -#ifdef NOTDEFINED -#include <stdio.h> -#include <stdlib.h> -#include <sysexits.h> -#include <string.h> -#include <ctype.h> -#else +#define DONT_USE_SOCKS5 #include <HTUtils.h> #include <tcp.h> /* @@ -31,7 +25,6 @@ #ifdef exit #undef exit #endif /* exit */ -#endif /* NODEFINED */ #ifndef TOLOWER #define TOLOWER(c) (isupper((unsigned char)c) ? tolower((unsigned char)c) : (c)) @@ -47,6 +40,16 @@ */ typedef u16 unicode; +/* + * Since we're writing the formatted file to stdout, ensure that we flush + * everything before leaving, since some old (and a few not-so-old) platforms + * that do not implement POSIX 'exit()'. + */ +#define done(code) \ + fflush(stdout); \ + fflush(stderr); \ + exit(code) + PRIVATE void usage ARGS1( char *, argv0) { @@ -56,7 +59,7 @@ PRIVATE void usage ARGS1( argv0); fprintf(stderr, "Utility to convert .tbl into .h files for Lynx compilation.\n"); - exit(EX_USAGE); + done(EX_USAGE); } /* copied from HTString.c, not everybody has strncasecmp */ @@ -129,7 +132,7 @@ PRIVATE void addpair_str ARGS2( if (!themap_str.entries) { fprintf(stderr, "%s: Out of memory\n", tblname); - exit(EX_DATAERR); + done(EX_DATAERR); } } else { /* @@ -149,7 +152,7 @@ PRIVATE void addpair_str ARGS2( if (themap_str.entry_ct > 1999) { fprintf(stderr, "ERROR: Only 2000 unicode replacement strings permitted!\n"); - exit(EX_DATAERR); + done(EX_DATAERR); } themap_str.entries[themap_str.entry_ct].unicode = un; themap_str.entries[themap_str.entry_ct].replace_str = str; @@ -195,7 +198,7 @@ PRIVATE void addpair ARGS2( */ if (unicount[fp] > 254) { fprintf(stderr, "ERROR: Only 255 unicodes/glyph permitted!\n"); - exit(EX_DATAERR); + done(EX_DATAERR); } unitable[fp][unicount[fp]] = un; unicount[fp]++; @@ -233,7 +236,7 @@ PUBLIC int main ARGS2( ctbl = fopen(tblname = argv[1], "r"); if (!ctbl) { perror(tblname); - exit(EX_NOINPUT); + done(EX_NOINPUT); } } @@ -383,11 +386,11 @@ PUBLIC int main ARGS2( un0 = getunicode(&p); if (un0 < 0) { fprintf(stderr, "Bad input line: %s\n", buffer); - exit(EX_DATAERR); + done(EX_DATAERR); fprintf(stderr, "%s: Bad Unicode range corresponding to font position range 0x%x-0x%x\n", tblname, fp0, fp1); - exit(EX_DATAERR); + done(EX_DATAERR); } un1 = un0; while (*p == ' ' || *p == '\t') { @@ -404,7 +407,7 @@ PUBLIC int main ARGS2( "%s: Bad Unicode range U+%x-U+%x\n", tblname, un0, un1); fprintf(stderr, "Bad input line: %s\n", buffer); - exit(EX_DATAERR); + done(EX_DATAERR); } while (*p == ' ' || *p == '\t') { p++; @@ -421,7 +424,7 @@ PUBLIC int main ARGS2( if (!(p1 = tbuf)) { fprintf(stderr, "%s: Out of memory\n", tblname); - exit(EX_DATAERR); + done(EX_DATAERR); } if (*p == '"') { /* @@ -481,7 +484,7 @@ PUBLIC int main ARGS2( fp0 = strtol(p, &p1, 0); if (p1 == p) { fprintf(stderr, "Bad input line: %s\n", buffer); - exit(EX_DATAERR); + done(EX_DATAERR); } p = p1; @@ -493,7 +496,7 @@ PUBLIC int main ARGS2( fp1 = strtol(p, &p1, 0); if (p1 == p) { fprintf(stderr, "Bad input line: %s\n", buffer); - exit(EX_DATAERR); + done(EX_DATAERR); } p = p1; } else { @@ -504,13 +507,13 @@ PUBLIC int main ARGS2( fprintf(stderr, "%s: Glyph number (0x%x) larger than font length\n", tblname, fp0); - exit(EX_DATAERR); + done(EX_DATAERR); } if (fp1 && (fp1 < fp0 || fp1 >= fontlen)) { fprintf(stderr, "%s: Bad end of range (0x%x)\n", tblname, fp1); - exit(EX_DATAERR); + done(EX_DATAERR); } if (fp1) { @@ -537,7 +540,7 @@ PUBLIC int main ARGS2( tblname); fprintf(stderr, " there should be a Unicode range.\n"); - exit(EX_DATAERR); + done(EX_DATAERR); } p++; un1 = getunicode(&p); @@ -545,7 +548,7 @@ PUBLIC int main ARGS2( fprintf(stderr, "%s: Bad Unicode range corresponding to font position range 0x%x-0x%x\n", tblname, fp0, fp1); - exit(EX_DATAERR); + done(EX_DATAERR); } if (un1 - un0 != fp1 - fp0) { fprintf(stderr, @@ -554,7 +557,7 @@ PUBLIC int main ARGS2( fprintf(stderr, " as font position range 0x%x-0x%x\n", fp0, fp1); - exit(EX_DATAERR); + done(EX_DATAERR); } for (i = fp0; i <= fp1; i++) { addpair(i,un0-fp0+i); @@ -585,7 +588,7 @@ PUBLIC int main ARGS2( fprintf(stderr, "%s: Bad Unicode range 0x%x-0x%x\n", tblname, un0, un1); - exit(EX_DATAERR); + done(EX_DATAERR); } for (un0++; un0 <= un1; un0++) { addpair(fp0, un0); @@ -631,12 +634,14 @@ PUBLIC int main ARGS2( } else if (this_LYNXcharset[0] == '\0') { strncpy(this_LYNXcharset,this_MIMEcharset,UC_MAXLEN_LYNXCSNAME); } +/***** DO NOT produce trailing spaces! if ((i = strlen(this_LYNXcharset)) < UC_LEN_LYNXCSNAME) { for (; i < UC_LEN_LYNXCSNAME; i++) { this_LYNXcharset[i] = ' '; } this_LYNXcharset[i] = '\0'; } +*******/ #ifdef NOTDEFINED fprintf(stderr,"this_MIMEcharset: %s.\n",this_MIMEcharset); fprintf(stderr,"this_LYNXcharset: %s.\n",this_LYNXcharset); @@ -655,8 +660,8 @@ PUBLIC int main ARGS2( p++, i++) { id_append[i+1] = isalnum(*p) ? *p : '_'; } + id_append[i+1] = '\0'; } - id_append[i+1] = '\0'; fprintf(stderr, " (%s).\n", id_append); printf("\ @@ -760,5 +765,5 @@ dfont_replacedesc%s,%d,%d)\n", id_append, this_MIMEcharset, this_LYNXcharset, id_append, id_append, nuni, id_append, lowest_eight, RawOrEnc); - exit(EX_OK); + done(EX_OK); } diff --git a/src/chrtrans/utf8_uni.tbl b/src/chrtrans/utf8_uni.tbl index 67ff3460..9fc470ad 100644 --- a/src/chrtrans/utf8_uni.tbl +++ b/src/chrtrans/utf8_uni.tbl @@ -7,7 +7,7 @@ Mutf-8 #Name as a Display Charset (used on Options screen) -OUNICODE UTF-8 +OUNICODE (UTF-8) # Some kind of raw Unicode? # Use 6 for for really "raw" 16bit UCS-2, 7 for UTF-8, ... |