diff options
author | Thomas E. Dickey <dickey@invisible-island.net> | 1998-11-06 16:18:09 -0500 |
---|---|---|
committer | Thomas E. Dickey <dickey@invisible-island.net> | 1998-11-06 16:18:09 -0500 |
commit | 18024037b515bfff83e0230b35151babe6005e18 (patch) | |
tree | b8c80055282a00883284722a3dd6bb25234e07f0 /src/chrtrans | |
parent | 3d8ecbe48af249fa0d77ce4d273e32ce4b7e6e18 (diff) | |
download | lynx-snapshots-18024037b515bfff83e0230b35151babe6005e18.tar.gz |
snapshot of project "lynx", label v2-8-1dev_4
Diffstat (limited to 'src/chrtrans')
36 files changed, 1684 insertions, 978 deletions
diff --git a/src/chrtrans/Makefile.old b/src/chrtrans/Makefile.old index 42b49eec..0ca53fc2 100644 --- a/src/chrtrans/Makefile.old +++ b/src/chrtrans/Makefile.old @@ -30,7 +30,6 @@ TABLES= \ cp1257_uni.h \ cp437_uni.h \ cp737_uni.h \ - cp775_uni.h \ cp850_uni.h \ cp852_uni.h \ cp862_uni.h \ @@ -77,7 +76,6 @@ cp1256_uni.h: cp1256_uni.tbl makeuctb cp1257_uni.h: cp1257_uni.tbl makeuctb cp437_uni.h: cp437_uni.tbl makeuctb cp737_uni.h: cp737_uni.tbl makeuctb -cp775_uni.h: cp775_uni.tbl makeuctb cp850_uni.h: cp850_uni.tbl makeuctb cp852_uni.h: cp852_uni.tbl makeuctb cp862_uni.h: cp862_uni.tbl makeuctb diff --git a/src/chrtrans/README.format b/src/chrtrans/README.format index 636afd32..4ced0a14 100644 --- a/src/chrtrans/README.format +++ b/src/chrtrans/README.format @@ -26,7 +26,7 @@ b) directives: start with a keyword which may be abbreviated to one letter (first letter must be capitalized), followed by space and a value. Currently recognized: - + OptionName The name under which this should appear on the O)ptions screen in the list for Display Character Set @@ -53,7 +53,7 @@ c) character translation definitions: 0x41 U+0041 U+0391 ... and are used for "forward" translation (mapping this charset to Unicode) - AS WELL AS "back" translation (mapping Unicodes to an 8-bit + AS WELL AS "back" translation (mapping Unicodes to an 8-bit [incl. 7-bit ASCII] code). For the "forward" direction, only the first Unicode is used; for @@ -63,7 +63,7 @@ c) character translation definitions: The above example line would tell the chartrans mechanism: "For this charset, code position 65 [hex 0x41] contains Unicode U+0041 (LATIN CAPITAL LETTER A). For translation of Unicodes to - this charset, use byte value 65 [hex 0x41] for U+0041 (LATIN CAPITAL + this charset, use byte value 65 [hex 0x41] for U+0041 (LATIN CAPITAL LETTER A) as well as for U+0391 (GREEK CAPITAL LETTER ALPHA)." [Note that for bytes in the ASCII range 0x00-0x7F, the forward translations @@ -82,10 +82,6 @@ c) character translation definitions: * where <range> ::= <fontpos>-<fontpos> * and <unicode> ::= U+<h><h><h><h> * and <h> ::= <hexadecimal digit> - * - [Note that <fontpos> _without_ targets assumed notdefined, - so tables from ftp.unicode.org need no patching.] - d) string replacement definitions: @@ -93,10 +89,10 @@ d) string replacement definitions: U+00cd:I' - which would mean "Replace Unicode U+00cd (LATIN CAPITAL LETTER I WITH + which would mean "Replace Unicode U+00cd (LATIN CAPITAL LETTER I WITH ACUTE" with the string (consisting of two character) I' (if no other translation is available)." Please note that replacement definitions - in certain charset table will override ones from the Default table. + in certnain charset table will override ones from Default table. Note that everything after the ':' is currently taken VERBATIM, so careful with trailing blanks etc. @@ -115,7 +111,7 @@ d) string replacement definitions: Motivation: -- It is an extension of the format already in use for Linux (kernel, +- It is an extention of the format already in use for Linux (kernel, kbd package), those files can be used with some minimal editing. - It is easy to convert Unicode tables for other charsets, as they diff --git a/src/chrtrans/README.tables b/src/chrtrans/README.tables index 5993ccee..be6dac6a 100644 --- a/src/chrtrans/README.tables +++ b/src/chrtrans/README.tables @@ -1,13 +1,12 @@ -The translation table files in this directory were collected from -several sources (among them ftp://ftp.unicode.org, Linux kbd package, -ftp://dkuug.dk/) and are believed to be correct in their mappings, -but not checked in detail. The Unicode/UCS2 values -for some of the RFC 1345 Mnemonic codes are out of date, -a cleanup and update would be needed for serious use. -[See also http://czyborra.com/charsets/iso8859.html for codepages survey.] +The translation table files in this directory are _examples only_. +They were collected from several sources (among them ftp://ftp.unicode.org, +Linux kbd package, ftp://dkuug.dk/) and are believed to be correct +in their mappings, but not checked in detail. The Unicode/UCS2 values +for some of the RFC 1345 Mnemonic codes are out of date, a cleanup and +update would be needed for serious use. More translation files can be easily provided (and new character entities -added to entities.h), this set is just to test whether the system works +added to entities.h), this set is just to test whether the system works in principle (and also how it behaves with incomplete data...) See the file README.format for a brief explanation of what's in the @@ -28,7 +27,7 @@ charset known to Lynx) you currently have to manually edit UCdomap.c, in two places: a) Near the top, you will find a bunch of lines (some may be commented out) - + #include "<fn>.h" Add or comment out as you wish. But it is probably safest to leave the @@ -45,7 +44,7 @@ did under a)...) [The <something> is derived from the charset's MIME name. if in doubt, check the last lines of the corresponding ...uni.h file.] c) To let make automatically notice when you have changed one of the - table files, and automatically regenerate the *uni.h file(s), + table files, and automatically regenerate the *uni.h file(s), you also have to add any new tables to both src/Makefile *and* src/chrtrans/Makefile. Or, for auto-config, the equivalent files named makefile.in before running ./configure, or makefile after running diff --git a/src/chrtrans/UCkd.h b/src/chrtrans/UCkd.h index 104882d7..1e55716e 100644 --- a/src/chrtrans/UCkd.h +++ b/src/chrtrans/UCkd.h @@ -34,7 +34,7 @@ struct unipair { }; struct unipair_str { u16 unicode; - CONST char * replace_str; + char * replace_str; }; struct unimapdesc { u16 entry_ct; diff --git a/src/chrtrans/cp1250_uni.tbl b/src/chrtrans/cp1250_uni.tbl index 49ba9008..41e8581d 100644 --- a/src/chrtrans/cp1250_uni.tbl +++ b/src/chrtrans/cp1250_uni.tbl @@ -2,155 +2,280 @@ Mwindows-1250 #Name as a Display Charset (used on Options screen) -OEastern European (windows-1250) +OWinLatin2 (cp1250) # -# Name: cp1250 to Unicode table +# Name: cp1250_WinLatin2 to Unicode table # Unicode version: 2.0 # Table version: 2.00 # Table format: Format A -# Date: 04/15/98 -# -# Contact: cpxlate@microsoft.com -# +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> # General notes: none # # Format: Three tab-separated columns -# Column #1 is the cp1250 code (in hex) +# Column #1 is the cp1250_WinLatin2 code (in hex) # Column #2 is the Unicode (in hex as 0xXXXX) # Column #3 is the Unicode name (follows a comment sign, '#') # -# The entries are in cp1250 order +# The entries are in cp1250_WinLatin2 order # -################## +#0x00 U+0000 #NULL +#0x01 U+0001 #START OF HEADING +#0x02 U+0002 #START OF TEXT +#0x03 U+0003 #END OF TEXT +#0x04 U+0004 #END OF TRANSMISSION +#0x05 U+0005 #ENQUIRY +#0x06 U+0006 #ACKNOWLEDGE +#0x07 U+0007 #BELL +#0x08 U+0008 #BACKSPACE +#0x09 U+0009 #HORIZONTAL TABULATION +#0x0A U+000A #LINE FEED +#0x0B U+000B #VERTICAL TABULATION +#0x0C U+000C #FORM FEED +#0x0D U+000D #CARRIAGE RETURN +#0x0E U+000E #SHIFT OUT +#0x0F U+000F #SHIFT IN +#0x10 U+0010 #DATA LINK ESCAPE +#0x11 U+0011 #DEVICE CONTROL ONE +#0x12 U+0012 #DEVICE CONTROL TWO +#0x13 U+0013 #DEVICE CONTROL THREE +#0x14 U+0014 #DEVICE CONTROL FOUR +#0x15 U+0015 #NEGATIVE ACKNOWLEDGE +#0x16 U+0016 #SYNCHRONOUS IDLE +#0x17 U+0017 #END OF TRANSMISSION BLOCK +#0x18 U+0018 #CANCEL +#0x19 U+0019 #END OF MEDIUM +#0x1A U+001A #SUBSTITUTE +#0x1B U+001B #ESCAPE +#0x1C U+001C #FILE SEPARATOR +#0x1D U+001D #GROUP SEPARATOR +#0x1E U+001E #RECORD SEPARATOR +#0x1F U+001F #UNIT SEPARATOR +0x20 U+0020 #SPACE +0x21 U+0021 #EXCLAMATION MARK +0x22 U+0022 #QUOTATION MARK +0x23 U+0023 #NUMBER SIGN +0x24 U+0024 #DOLLAR SIGN +0x25 U+0025 #PERCENT SIGN +0x26 U+0026 #AMPERSAND +0x27 U+0027 #APOSTROPHE +0x28 U+0028 #LEFT PARENTHESIS +0x29 U+0029 #RIGHT PARENTHESIS +0x2A U+002A #ASTERISK +0x2B U+002B #PLUS SIGN +0x2C U+002C #COMMA +0x2D U+002D #HYPHEN-MINUS +0x2E U+002E #FULL STOP +0x2F U+002F #SOLIDUS +0x30 U+0030 #DIGIT ZERO +0x31 U+0031 #DIGIT ONE +0x32 U+0032 #DIGIT TWO +0x33 U+0033 #DIGIT THREE +0x34 U+0034 #DIGIT FOUR +0x35 U+0035 #DIGIT FIVE +0x36 U+0036 #DIGIT SIX +0x37 U+0037 #DIGIT SEVEN +0x38 U+0038 #DIGIT EIGHT +0x39 U+0039 #DIGIT NINE +0x3A U+003A #COLON +0x3B U+003B #SEMICOLON +0x3C U+003C #LESS-THAN SIGN +0x3D U+003D #EQUALS SIGN +0x3E U+003E #GREATER-THAN SIGN +0x3F U+003F #QUESTION MARK +0x40 U+0040 #COMMERCIAL AT +0x41 U+0041 #LATIN CAPITAL LETTER A +0x42 U+0042 #LATIN CAPITAL LETTER B +0x43 U+0043 #LATIN CAPITAL LETTER C +0x44 U+0044 #LATIN CAPITAL LETTER D +0x45 U+0045 #LATIN CAPITAL LETTER E +0x46 U+0046 #LATIN CAPITAL LETTER F +0x47 U+0047 #LATIN CAPITAL LETTER G +0x48 U+0048 #LATIN CAPITAL LETTER H +0x49 U+0049 #LATIN CAPITAL LETTER I +0x4A U+004A #LATIN CAPITAL LETTER J +0x4B U+004B #LATIN CAPITAL LETTER K +0x4C U+004C #LATIN CAPITAL LETTER L +0x4D U+004D #LATIN CAPITAL LETTER M +0x4E U+004E #LATIN CAPITAL LETTER N +0x4F U+004F #LATIN CAPITAL LETTER O +0x50 U+0050 #LATIN CAPITAL LETTER P +0x51 U+0051 #LATIN CAPITAL LETTER Q +0x52 U+0052 #LATIN CAPITAL LETTER R +0x53 U+0053 #LATIN CAPITAL LETTER S +0x54 U+0054 #LATIN CAPITAL LETTER T +0x55 U+0055 #LATIN CAPITAL LETTER U +0x56 U+0056 #LATIN CAPITAL LETTER V +0x57 U+0057 #LATIN CAPITAL LETTER W +0x58 U+0058 #LATIN CAPITAL LETTER X +0x59 U+0059 #LATIN CAPITAL LETTER Y +0x5A U+005A #LATIN CAPITAL LETTER Z +0x5B U+005B #LEFT SQUARE BRACKET +0x5C U+005C #REVERSE SOLIDUS +0x5D U+005D #RIGHT SQUARE BRACKET +0x5E U+005E #CIRCUMFLEX ACCENT +0x5F U+005F #LOW LINE +0x60 U+0060 #GRAVE ACCENT +0x61 U+0061 #LATIN SMALL LETTER A +0x62 U+0062 #LATIN SMALL LETTER B +0x63 U+0063 #LATIN SMALL LETTER C +0x64 U+0064 #LATIN SMALL LETTER D +0x65 U+0065 #LATIN SMALL LETTER E +0x66 U+0066 #LATIN SMALL LETTER F +0x67 U+0067 #LATIN SMALL LETTER G +0x68 U+0068 #LATIN SMALL LETTER H +0x69 U+0069 #LATIN SMALL LETTER I +0x6A U+006A #LATIN SMALL LETTER J +0x6B U+006B #LATIN SMALL LETTER K +0x6C U+006C #LATIN SMALL LETTER L +0x6D U+006D #LATIN SMALL LETTER M +0x6E U+006E #LATIN SMALL LETTER N +0x6F U+006F #LATIN SMALL LETTER O +0x70 U+0070 #LATIN SMALL LETTER P +0x71 U+0071 #LATIN SMALL LETTER Q +0x72 U+0072 #LATIN SMALL LETTER R +0x73 U+0073 #LATIN SMALL LETTER S +0x74 U+0074 #LATIN SMALL LETTER T +0x75 U+0075 #LATIN SMALL LETTER U +0x76 U+0076 #LATIN SMALL LETTER V +0x77 U+0077 #LATIN SMALL LETTER W +0x78 U+0078 #LATIN SMALL LETTER X +0x79 U+0079 #LATIN SMALL LETTER Y +0x7A U+007A #LATIN SMALL LETTER Z +0x7B U+007B #LEFT CURLY BRACKET +0x7C U+007C #VERTICAL LINE +0x7D U+007D #RIGHT CURLY BRACKET +0x7E U+007E #TILDE +#0x7F U+007F #DELETE +#0x80 #UNDEFINED +#0x81 #UNDEFINED +0x82 U+201A #SINGLE LOW-9 QUOTATION MARK +#0x83 #UNDEFINED +0x84 U+201E #DOUBLE LOW-9 QUOTATION MARK +0x85 U+2026 #HORIZONTAL ELLIPSIS +0x86 U+2020 #DAGGER +0x87 U+2021 #DOUBLE DAGGER +#0x88 #UNDEFINED +0x89 U+2030 #PER MILLE SIGN +0x8A U+0160 #LATIN CAPITAL LETTER S WITH CARON +0x8B U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK +0x8C U+015A #LATIN CAPITAL LETTER S WITH ACUTE +0x8D U+0164 #LATIN CAPITAL LETTER T WITH CARON +0x8E U+017D #LATIN CAPITAL LETTER Z WITH CARON +0x8F U+0179 #LATIN CAPITAL LETTER Z WITH ACUTE +#0x90 #UNDEFINED +0x91 U+2018 #LEFT SINGLE QUOTATION MARK +0x92 U+2019 #RIGHT SINGLE QUOTATION MARK +0x93 U+201C #LEFT DOUBLE QUOTATION MARK +0x94 U+201D #RIGHT DOUBLE QUOTATION MARK +0x95 U+2022 #BULLET +0x96 U+2013 #EN DASH +0x97 U+2014 #EM DASH +#0x98 #UNDEFINED +0x99 U+2122 #TRADE MARK SIGN +0x9A U+0161 #LATIN SMALL LETTER S WITH CARON +0x9B U+203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +0x9C U+015B #LATIN SMALL LETTER S WITH ACUTE +0x9D U+0165 #LATIN SMALL LETTER T WITH CARON +0x9E U+017E #LATIN SMALL LETTER Z WITH CARON +0x9F U+017A #LATIN SMALL LETTER Z WITH ACUTE +0xA0 U+00A0 #NO-BREAK SPACE +0xA1 U+02C7 #CARON +0xA2 U+02D8 #BREVE +0xA3 U+0141 #LATIN CAPITAL LETTER L WITH STROKE +0xA4 U+00A4 #CURRENCY SIGN +0xA5 U+0104 #LATIN CAPITAL LETTER A WITH OGONEK +0xA6 U+00A6 #BROKEN BAR +0xA7 U+00A7 #SECTION SIGN +0xA8 U+00A8 #DIAERESIS +0xA9 U+00A9 #COPYRIGHT SIGN +0xAA U+015E #LATIN CAPITAL LETTER S WITH CEDILLA +0xAB U+00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +#0xAC #UNDEFINED +0xAD U+00AD #SOFT HYPHEN +0xAE U+00AE #REGISTERED SIGN +0xAF U+017B #LATIN CAPITAL LETTER Z WITH DOT ABOVE +0xB0 U+00B0 #DEGREE SIGN +0xB1 U+00B1 #PLUS-MINUS SIGN +0xB2 U+02DB #OGONEK +0xB3 U+0142 #LATIN SMALL LETTER L WITH STROKE +0xB4 U+00B4 #ACUTE ACCENT +0xB5 U+00B5 #MICRO SIGN +0xB6 U+00B6 #PILCROW SIGN +0xB7 U+00B7 #MIDDLE DOT +0xB8 U+00B8 #CEDILLA +0xB9 U+0105 #LATIN SMALL LETTER A WITH OGONEK +0xBA U+015F #LATIN SMALL LETTER S WITH CEDILLA +0xBB U+00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC U+013D #LATIN CAPITAL LETTER L WITH CARON +0xBD U+02DD #DOUBLE ACUTE ACCENT +0xBE U+013E #LATIN SMALL LETTER L WITH CARON +0xBF U+017C #LATIN SMALL LETTER Z WITH DOT ABOVE +0xC0 U+0154 #LATIN CAPITAL LETTER R WITH ACUTE +0xC1 U+00C1 #LATIN CAPITAL LETTER A WITH ACUTE +0xC2 U+00C2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 U+0102 #LATIN CAPITAL LETTER A WITH BREVE +0xC4 U+00C4 #LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 U+0139 #LATIN CAPITAL LETTER L WITH ACUTE +0xC6 U+0106 #LATIN CAPITAL LETTER C WITH ACUTE +0xC7 U+00C7 #LATIN CAPITAL LETTER C WITH CEDILLA +0xC8 U+010C #LATIN CAPITAL LETTER C WITH CARON +0xC9 U+00C9 #LATIN CAPITAL LETTER E WITH ACUTE +0xCA U+0118 #LATIN CAPITAL LETTER E WITH OGONEK +0xCB U+00CB #LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC U+011A #LATIN CAPITAL LETTER E WITH CARON +0xCD U+00CD #LATIN CAPITAL LETTER I WITH ACUTE +0xCE U+00CE #LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF U+010E #LATIN CAPITAL LETTER D WITH CARON +0xD0 U+0110 #LATIN CAPITAL LETTER D WITH STROKE +0xD1 U+0143 #LATIN CAPITAL LETTER N WITH ACUTE +0xD2 U+0147 #LATIN CAPITAL LETTER N WITH CARON +0xD3 U+00D3 #LATIN CAPITAL LETTER O WITH ACUTE +0xD4 U+00D4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 U+0150 #LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0xD6 U+00D6 #LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 U+00D7 #MULTIPLICATION SIGN +0xD8 U+0158 #LATIN CAPITAL LETTER R WITH CARON +0xD9 U+016E #LATIN CAPITAL LETTER U WITH RING ABOVE +0xDA U+00DA #LATIN CAPITAL LETTER U WITH ACUTE +0xDB U+0170 #LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0xDC U+00DC #LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD U+00DD #LATIN CAPITAL LETTER Y WITH ACUTE +0xDE U+0162 #LATIN CAPITAL LETTER T WITH CEDILLA +0xDF U+00DF #LATIN SMALL LETTER SHARP S +0xE0 U+0155 #LATIN SMALL LETTER R WITH ACUTE +0xE1 U+00E1 #LATIN SMALL LETTER A WITH ACUTE +0xE2 U+00E2 #LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 U+0103 #LATIN SMALL LETTER A WITH BREVE +0xE4 U+00E4 #LATIN SMALL LETTER A WITH DIAERESIS +0xE5 U+013A #LATIN SMALL LETTER L WITH ACUTE +0xE6 U+0107 #LATIN SMALL LETTER C WITH ACUTE +0xE7 U+00E7 #LATIN SMALL LETTER C WITH CEDILLA +0xE8 U+010D #LATIN SMALL LETTER C WITH CARON +0xE9 U+00E9 #LATIN SMALL LETTER E WITH ACUTE +0xEA U+0119 #LATIN SMALL LETTER E WITH OGONEK +0xEB U+00EB #LATIN SMALL LETTER E WITH DIAERESIS +0xEC U+011B #LATIN SMALL LETTER E WITH CARON +0xED U+00ED #LATIN SMALL LETTER I WITH ACUTE +0xEE U+00EE #LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF U+010F #LATIN SMALL LETTER D WITH CARON +0xF0 U+0111 #LATIN SMALL LETTER D WITH STROKE +0xF1 U+0144 #LATIN SMALL LETTER N WITH ACUTE +0xF2 U+0148 #LATIN SMALL LETTER N WITH CARON +0xF3 U+00F3 #LATIN SMALL LETTER O WITH ACUTE +0xF4 U+00F4 #LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 U+0151 #LATIN SMALL LETTER O WITH DOUBLE ACUTE +0xF6 U+00F6 #LATIN SMALL LETTER O WITH DIAERESIS +0xF7 U+00F7 #DIVISION SIGN +0xF8 U+0159 #LATIN SMALL LETTER R WITH CARON +0xF9 U+016F #LATIN SMALL LETTER U WITH RING ABOVE +0xFA U+00FA #LATIN SMALL LETTER U WITH ACUTE +0xFB U+0171 #LATIN SMALL LETTER U WITH DOUBLE ACUTE +0xFC U+00FC #LATIN SMALL LETTER U WITH DIAERESIS +0xFD U+00FD #LATIN SMALL LETTER Y WITH ACUTE +0xFE U+0163 #LATIN SMALL LETTER T WITH CEDILLA +0xFF U+02D9 #DOT ABOVE + -0x20-0x7e idem -# -0x80 U+20AC #EURO SIGN -0x81 #UNDEFINED -0x82 U+201A #SINGLE LOW-9 QUOTATION MARK -0x83 #UNDEFINED -0x84 U+201E #DOUBLE LOW-9 QUOTATION MARK -0x85 U+2026 #HORIZONTAL ELLIPSIS -0x86 U+2020 #DAGGER -0x87 U+2021 #DOUBLE DAGGER -0x88 #UNDEFINED -0x89 U+2030 #PER MILLE SIGN -0x8A U+0160 #LATIN CAPITAL LETTER S WITH CARON -0x8B U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK -0x8C U+015A #LATIN CAPITAL LETTER S WITH ACUTE -0x8D U+0164 #LATIN CAPITAL LETTER T WITH CARON -0x8E U+017D #LATIN CAPITAL LETTER Z WITH CARON -0x8F U+0179 #LATIN CAPITAL LETTER Z WITH ACUTE -0x90 #UNDEFINED -0x91 U+2018 #LEFT SINGLE QUOTATION MARK -0x92 U+2019 #RIGHT SINGLE QUOTATION MARK -0x93 U+201C #LEFT DOUBLE QUOTATION MARK -0x94 U+201D #RIGHT DOUBLE QUOTATION MARK -0x95 U+2022 #BULLET -0x96 U+2013 #EN DASH -0x97 U+2014 #EM DASH -0x98 #UNDEFINED -0x99 U+2122 #TRADE MARK SIGN -0x9A U+0161 #LATIN SMALL LETTER S WITH CARON -0x9B U+203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK -0x9C U+015B #LATIN SMALL LETTER S WITH ACUTE -0x9D U+0165 #LATIN SMALL LETTER T WITH CARON -0x9E U+017E #LATIN SMALL LETTER Z WITH CARON -0x9F U+017A #LATIN SMALL LETTER Z WITH ACUTE -0xA0 U+00A0 #NO-BREAK SPACE -0xA1 U+02C7 #CARON -0xA2 U+02D8 #BREVE -0xA3 U+0141 #LATIN CAPITAL LETTER L WITH STROKE -0xA4 U+00A4 #CURRENCY SIGN -0xA5 U+0104 #LATIN CAPITAL LETTER A WITH OGONEK -0xA6 U+00A6 #BROKEN BAR -0xA7 U+00A7 #SECTION SIGN -0xA8 U+00A8 #DIAERESIS -0xA9 U+00A9 #COPYRIGHT SIGN -0xAA U+015E #LATIN CAPITAL LETTER S WITH CEDILLA -0xAB U+00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK -0xAC U+00AC #NOT SIGN -0xAD U+00AD #SOFT HYPHEN -0xAE U+00AE #REGISTERED SIGN -0xAF U+017B #LATIN CAPITAL LETTER Z WITH DOT ABOVE -0xB0 U+00B0 #DEGREE SIGN -0xB1 U+00B1 #PLUS-MINUS SIGN -0xB2 U+02DB #OGONEK -0xB3 U+0142 #LATIN SMALL LETTER L WITH STROKE -0xB4 U+00B4 #ACUTE ACCENT -0xB5 U+00B5 #MICRO SIGN -0xB6 U+00B6 #PILCROW SIGN -0xB7 U+00B7 #MIDDLE DOT -0xB8 U+00B8 #CEDILLA -0xB9 U+0105 #LATIN SMALL LETTER A WITH OGONEK -0xBA U+015F #LATIN SMALL LETTER S WITH CEDILLA -0xBB U+00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -0xBC U+013D #LATIN CAPITAL LETTER L WITH CARON -0xBD U+02DD #DOUBLE ACUTE ACCENT -0xBE U+013E #LATIN SMALL LETTER L WITH CARON -0xBF U+017C #LATIN SMALL LETTER Z WITH DOT ABOVE -0xC0 U+0154 #LATIN CAPITAL LETTER R WITH ACUTE -0xC1 U+00C1 #LATIN CAPITAL LETTER A WITH ACUTE -0xC2 U+00C2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX -0xC3 U+0102 #LATIN CAPITAL LETTER A WITH BREVE -0xC4 U+00C4 #LATIN CAPITAL LETTER A WITH DIAERESIS -0xC5 U+0139 #LATIN CAPITAL LETTER L WITH ACUTE -0xC6 U+0106 #LATIN CAPITAL LETTER C WITH ACUTE -0xC7 U+00C7 #LATIN CAPITAL LETTER C WITH CEDILLA -0xC8 U+010C #LATIN CAPITAL LETTER C WITH CARON -0xC9 U+00C9 #LATIN CAPITAL LETTER E WITH ACUTE -0xCA U+0118 #LATIN CAPITAL LETTER E WITH OGONEK -0xCB U+00CB #LATIN CAPITAL LETTER E WITH DIAERESIS -0xCC U+011A #LATIN CAPITAL LETTER E WITH CARON -0xCD U+00CD #LATIN CAPITAL LETTER I WITH ACUTE -0xCE U+00CE #LATIN CAPITAL LETTER I WITH CIRCUMFLEX -0xCF U+010E #LATIN CAPITAL LETTER D WITH CARON -0xD0 U+0110 #LATIN CAPITAL LETTER D WITH STROKE -0xD1 U+0143 #LATIN CAPITAL LETTER N WITH ACUTE -0xD2 U+0147 #LATIN CAPITAL LETTER N WITH CARON -0xD3 U+00D3 #LATIN CAPITAL LETTER O WITH ACUTE -0xD4 U+00D4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX -0xD5 U+0150 #LATIN CAPITAL LETTER O WITH DOUBLE ACUTE -0xD6 U+00D6 #LATIN CAPITAL LETTER O WITH DIAERESIS -0xD7 U+00D7 #MULTIPLICATION SIGN -0xD8 U+0158 #LATIN CAPITAL LETTER R WITH CARON -0xD9 U+016E #LATIN CAPITAL LETTER U WITH RING ABOVE -0xDA U+00DA #LATIN CAPITAL LETTER U WITH ACUTE -0xDB U+0170 #LATIN CAPITAL LETTER U WITH DOUBLE ACUTE -0xDC U+00DC #LATIN CAPITAL LETTER U WITH DIAERESIS -0xDD U+00DD #LATIN CAPITAL LETTER Y WITH ACUTE -0xDE U+0162 #LATIN CAPITAL LETTER T WITH CEDILLA -0xDF U+00DF #LATIN SMALL LETTER SHARP S -0xE0 U+0155 #LATIN SMALL LETTER R WITH ACUTE -0xE1 U+00E1 #LATIN SMALL LETTER A WITH ACUTE -0xE2 U+00E2 #LATIN SMALL LETTER A WITH CIRCUMFLEX -0xE3 U+0103 #LATIN SMALL LETTER A WITH BREVE -0xE4 U+00E4 #LATIN SMALL LETTER A WITH DIAERESIS -0xE5 U+013A #LATIN SMALL LETTER L WITH ACUTE -0xE6 U+0107 #LATIN SMALL LETTER C WITH ACUTE -0xE7 U+00E7 #LATIN SMALL LETTER C WITH CEDILLA -0xE8 U+010D #LATIN SMALL LETTER C WITH CARON -0xE9 U+00E9 #LATIN SMALL LETTER E WITH ACUTE -0xEA U+0119 #LATIN SMALL LETTER E WITH OGONEK -0xEB U+00EB #LATIN SMALL LETTER E WITH DIAERESIS -0xEC U+011B #LATIN SMALL LETTER E WITH CARON -0xED U+00ED #LATIN SMALL LETTER I WITH ACUTE -0xEE U+00EE #LATIN SMALL LETTER I WITH CIRCUMFLEX -0xEF U+010F #LATIN SMALL LETTER D WITH CARON -0xF0 U+0111 #LATIN SMALL LETTER D WITH STROKE -0xF1 U+0144 #LATIN SMALL LETTER N WITH ACUTE -0xF2 U+0148 #LATIN SMALL LETTER N WITH CARON -0xF3 U+00F3 #LATIN SMALL LETTER O WITH ACUTE -0xF4 U+00F4 #LATIN SMALL LETTER O WITH CIRCUMFLEX -0xF5 U+0151 #LATIN SMALL LETTER O WITH DOUBLE ACUTE -0xF6 U+00F6 #LATIN SMALL LETTER O WITH DIAERESIS -0xF7 U+00F7 #DIVISION SIGN -0xF8 U+0159 #LATIN SMALL LETTER R WITH CARON -0xF9 U+016F #LATIN SMALL LETTER U WITH RING ABOVE -0xFA U+00FA #LATIN SMALL LETTER U WITH ACUTE -0xFB U+0171 #LATIN SMALL LETTER U WITH DOUBLE ACUTE -0xFC U+00FC #LATIN SMALL LETTER U WITH DIAERESIS -0xFD U+00FD #LATIN SMALL LETTER Y WITH ACUTE -0xFE U+0163 #LATIN SMALL LETTER T WITH CEDILLA -0xFF U+02D9 #DOT ABOVE diff --git a/src/chrtrans/cp1251_uni.tbl b/src/chrtrans/cp1251_uni.tbl index 541f8640..e9bb9460 100644 --- a/src/chrtrans/cp1251_uni.tbl +++ b/src/chrtrans/cp1251_uni.tbl @@ -2,155 +2,153 @@ Mwindows-1251 #Name as a Display Charset (used on Options screen) -OCyrillic (windows-1251) +OWinCyrillic (cp1251) # -# Name: cp1251 to Unicode table +# Name: cp1251_WinCyrillic to Unicode table # Unicode version: 2.0 # Table version: 2.00 # Table format: Format A -# Date: 04/15/98 -# -# Contact: cpxlate@microsoft.com -# +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> # General notes: none # # Format: Three tab-separated columns -# Column #1 is the cp1251 code (in hex) +# Column #1 is the cp1251_WinCyrillic code (in hex) # Column #2 is the Unicode (in hex as 0xXXXX) # Column #3 is the Unicode name (follows a comment sign, '#') # -# The entries are in cp1251 order +# The entries are in cp1251_WinCyrillic order # -################## - -0x20-0x7e idem # -0x80 U+0402 #CYRILLIC CAPITAL LETTER DJE -0x81 U+0403 #CYRILLIC CAPITAL LETTER GJE -0x82 U+201A #SINGLE LOW-9 QUOTATION MARK -0x83 U+0453 #CYRILLIC SMALL LETTER GJE -0x84 U+201E #DOUBLE LOW-9 QUOTATION MARK -0x85 U+2026 #HORIZONTAL ELLIPSIS -0x86 U+2020 #DAGGER -0x87 U+2021 #DOUBLE DAGGER -0x88 U+20AC #EURO SIGN -0x89 U+2030 #PER MILLE SIGN -0x8A U+0409 #CYRILLIC CAPITAL LETTER LJE -0x8B U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK -0x8C U+040A #CYRILLIC CAPITAL LETTER NJE -0x8D U+040C #CYRILLIC CAPITAL LETTER KJE -0x8E U+040B #CYRILLIC CAPITAL LETTER TSHE -0x8F U+040F #CYRILLIC CAPITAL LETTER DZHE -0x90 U+0452 #CYRILLIC SMALL LETTER DJE -0x91 U+2018 #LEFT SINGLE QUOTATION MARK -0x92 U+2019 #RIGHT SINGLE QUOTATION MARK -0x93 U+201C #LEFT DOUBLE QUOTATION MARK -0x94 U+201D #RIGHT DOUBLE QUOTATION MARK -0x95 U+2022 #BULLET -0x96 U+2013 #EN DASH -0x97 U+2014 #EM DASH -0x98 #UNDEFINED -0x99 U+2122 #TRADE MARK SIGN -0x9A U+0459 #CYRILLIC SMALL LETTER LJE -0x9B U+203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK -0x9C U+045A #CYRILLIC SMALL LETTER NJE -0x9D U+045C #CYRILLIC SMALL LETTER KJE -0x9E U+045B #CYRILLIC SMALL LETTER TSHE -0x9F U+045F #CYRILLIC SMALL LETTER DZHE -0xA0 U+00A0 #NO-BREAK SPACE -0xA1 U+040E #CYRILLIC CAPITAL LETTER SHORT U -0xA2 U+045E #CYRILLIC SMALL LETTER SHORT U -0xA3 U+0408 #CYRILLIC CAPITAL LETTER JE -0xA4 U+00A4 #CURRENCY SIGN -0xA5 U+0490 #CYRILLIC CAPITAL LETTER GHE WITH UPTURN -0xA6 U+00A6 #BROKEN BAR -0xA7 U+00A7 #SECTION SIGN -0xA8 U+0401 #CYRILLIC CAPITAL LETTER IO -0xA9 U+00A9 #COPYRIGHT SIGN -0xAA U+0404 #CYRILLIC CAPITAL LETTER UKRAINIAN IE -0xAB U+00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK -0xAC U+00AC #NOT SIGN -0xAD U+00AD #SOFT HYPHEN -0xAE U+00AE #REGISTERED SIGN -0xAF U+0407 #CYRILLIC CAPITAL LETTER YI -0xB0 U+00B0 #DEGREE SIGN -0xB1 U+00B1 #PLUS-MINUS SIGN -0xB2 U+0406 #CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I -0xB3 U+0456 #CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I -0xB4 U+0491 #CYRILLIC SMALL LETTER GHE WITH UPTURN -0xB5 U+00B5 #MICRO SIGN -0xB6 U+00B6 #PILCROW SIGN -0xB7 U+00B7 #MIDDLE DOT -0xB8 U+0451 #CYRILLIC SMALL LETTER IO -0xB9 U+2116 #NUMERO SIGN -0xBA U+0454 #CYRILLIC SMALL LETTER UKRAINIAN IE -0xBB U+00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -0xBC U+0458 #CYRILLIC SMALL LETTER JE -0xBD U+0405 #CYRILLIC CAPITAL LETTER DZE -0xBE U+0455 #CYRILLIC SMALL LETTER DZE -0xBF U+0457 #CYRILLIC SMALL LETTER YI -0xC0 U+0410 #CYRILLIC CAPITAL LETTER A -0xC1 U+0411 #CYRILLIC CAPITAL LETTER BE -0xC2 U+0412 #CYRILLIC CAPITAL LETTER VE -0xC3 U+0413 #CYRILLIC CAPITAL LETTER GHE -0xC4 U+0414 #CYRILLIC CAPITAL LETTER DE -0xC5 U+0415 #CYRILLIC CAPITAL LETTER IE -0xC6 U+0416 #CYRILLIC CAPITAL LETTER ZHE -0xC7 U+0417 #CYRILLIC CAPITAL LETTER ZE -0xC8 U+0418 #CYRILLIC CAPITAL LETTER I -0xC9 U+0419 #CYRILLIC CAPITAL LETTER SHORT I -0xCA U+041A #CYRILLIC CAPITAL LETTER KA -0xCB U+041B #CYRILLIC CAPITAL LETTER EL -0xCC U+041C #CYRILLIC CAPITAL LETTER EM -0xCD U+041D #CYRILLIC CAPITAL LETTER EN -0xCE U+041E #CYRILLIC CAPITAL LETTER O -0xCF U+041F #CYRILLIC CAPITAL LETTER PE -0xD0 U+0420 #CYRILLIC CAPITAL LETTER ER -0xD1 U+0421 #CYRILLIC CAPITAL LETTER ES -0xD2 U+0422 #CYRILLIC CAPITAL LETTER TE -0xD3 U+0423 #CYRILLIC CAPITAL LETTER U -0xD4 U+0424 #CYRILLIC CAPITAL LETTER EF -0xD5 U+0425 #CYRILLIC CAPITAL LETTER HA -0xD6 U+0426 #CYRILLIC CAPITAL LETTER TSE -0xD7 U+0427 #CYRILLIC CAPITAL LETTER CHE -0xD8 U+0428 #CYRILLIC CAPITAL LETTER SHA -0xD9 U+0429 #CYRILLIC CAPITAL LETTER SHCHA -0xDA U+042A #CYRILLIC CAPITAL LETTER HARD SIGN -0xDB U+042B #CYRILLIC CAPITAL LETTER YERU -0xDC U+042C #CYRILLIC CAPITAL LETTER SOFT SIGN -0xDD U+042D #CYRILLIC CAPITAL LETTER E -0xDE U+042E #CYRILLIC CAPITAL LETTER YU -0xDF U+042F #CYRILLIC CAPITAL LETTER YA -0xE0 U+0430 #CYRILLIC SMALL LETTER A -0xE1 U+0431 #CYRILLIC SMALL LETTER BE -0xE2 U+0432 #CYRILLIC SMALL LETTER VE -0xE3 U+0433 #CYRILLIC SMALL LETTER GHE -0xE4 U+0434 #CYRILLIC SMALL LETTER DE -0xE5 U+0435 #CYRILLIC SMALL LETTER IE -0xE6 U+0436 #CYRILLIC SMALL LETTER ZHE -0xE7 U+0437 #CYRILLIC SMALL LETTER ZE -0xE8 U+0438 #CYRILLIC SMALL LETTER I -0xE9 U+0439 #CYRILLIC SMALL LETTER SHORT I -0xEA U+043A #CYRILLIC SMALL LETTER KA -0xEB U+043B #CYRILLIC SMALL LETTER EL -0xEC U+043C #CYRILLIC SMALL LETTER EM -0xED U+043D #CYRILLIC SMALL LETTER EN -0xEE U+043E #CYRILLIC SMALL LETTER O -0xEF U+043F #CYRILLIC SMALL LETTER PE -0xF0 U+0440 #CYRILLIC SMALL LETTER ER -0xF1 U+0441 #CYRILLIC SMALL LETTER ES -0xF2 U+0442 #CYRILLIC SMALL LETTER TE -0xF3 U+0443 #CYRILLIC SMALL LETTER U -0xF4 U+0444 #CYRILLIC SMALL LETTER EF -0xF5 U+0445 #CYRILLIC SMALL LETTER HA -0xF6 U+0446 #CYRILLIC SMALL LETTER TSE -0xF7 U+0447 #CYRILLIC SMALL LETTER CHE -0xF8 U+0448 #CYRILLIC SMALL LETTER SHA -0xF9 U+0449 #CYRILLIC SMALL LETTER SHCHA -0xFA U+044A #CYRILLIC SMALL LETTER HARD SIGN -0xFB U+044B #CYRILLIC SMALL LETTER YERU -0xFC U+044C #CYRILLIC SMALL LETTER SOFT SIGN -0xFD U+044D #CYRILLIC SMALL LETTER E -0xFE U+044E #CYRILLIC SMALL LETTER YU -0xFF U+044F #CYRILLIC SMALL LETTER YA +0x20-0x7f idem +# +0x80 U+0402 #CYRILLIC CAPITAL LETTER DJE +0x81 U+0403 #CYRILLIC CAPITAL LETTER GJE +0x82 U+201A #SINGLE LOW-9 QUOTATION MARK +0x83 U+0453 #CYRILLIC SMALL LETTER GJE +0x84 U+201E #DOUBLE LOW-9 QUOTATION MARK +0x85 U+2026 #HORIZONTAL ELLIPSIS +0x86 U+2020 #DAGGER +0x87 U+2021 #DOUBLE DAGGER +#0x88 #UNDEFINED +0x89 U+2030 #PER MILLE SIGN +0x8A U+0409 #CYRILLIC CAPITAL LETTER LJE +0x8B U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK +0x8C U+040A #CYRILLIC CAPITAL LETTER NJE +0x8D U+040C #CYRILLIC CAPITAL LETTER KJE +0x8E U+040B #CYRILLIC CAPITAL LETTER TSHE +0x8F U+040F #CYRILLIC CAPITAL LETTER DZHE +0x90 U+0452 #CYRILLIC SMALL LETTER DJE +0x91 U+2018 #LEFT SINGLE QUOTATION MARK +0x92 U+2019 #RIGHT SINGLE QUOTATION MARK +0x93 U+201C #LEFT DOUBLE QUOTATION MARK +0x94 U+201D #RIGHT DOUBLE QUOTATION MARK +0x95 U+2022 #BULLET +0x96 U+2013 #EN DASH +0x97 U+2014 #EM DASH +#0x98 #UNDEFINED +0x99 U+2122 #TRADE MARK SIGN +0x9A U+0459 #CYRILLIC SMALL LETTER LJE +0x9B U+203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +0x9C U+045A #CYRILLIC SMALL LETTER NJE +0x9D U+045C #CYRILLIC SMALL LETTER KJE +0x9E U+045B #CYRILLIC SMALL LETTER TSHE +0x9F U+045F #CYRILLIC SMALL LETTER DZHE +0xA0 U+00A0 #NO-BREAK SPACE +0xA1 U+040E #CYRILLIC CAPITAL LETTER SHORT U +0xA2 U+045E #CYRILLIC SMALL LETTER SHORT U +0xA3 U+0408 #CYRILLIC CAPITAL LETTER JE +0xA4 U+00A4 #CURRENCY SIGN +0xA5 U+0490 #CYRILLIC CAPITAL LETTER GHE WITH UPTURN +0xA6 U+00A6 #BROKEN BAR +0xA7 U+00A7 #SECTION SIGN +0xA8 U+0401 #CYRILLIC CAPITAL LETTER IO +0xA9 U+00A9 #COPYRIGHT SIGN +0xAA U+0404 #CYRILLIC CAPITAL LETTER UKRAINIAN IE +0xAB U+00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC U+00AC #NOT SIGN +0xAD U+00AD #SOFT HYPHEN +0xAE U+00AE #REGISTERED SIGN +0xAF U+0407 #CYRILLIC CAPITAL LETTER YI +0xB0 U+00B0 #DEGREE SIGN +0xB1 U+00B1 #PLUS-MINUS SIGN +0xB2 U+0406 #CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +0xB3 U+0456 #CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +0xB4 U+0491 #CYRILLIC SMALL LETTER GHE WITH UPTURN +0xB5 U+00B5 #MICRO SIGN +0xB6 U+00B6 #PILCROW SIGN +0xB7 U+00B7 #MIDDLE DOT +0xB8 U+0451 #CYRILLIC SMALL LETTER IO +0xB9 U+2116 #NUMERO SIGN +0xBA U+0454 #CYRILLIC SMALL LETTER UKRAINIAN IE +0xBB U+00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC U+0458 #CYRILLIC SMALL LETTER JE +0xBD U+0405 #CYRILLIC CAPITAL LETTER DZE +0xBE U+0455 #CYRILLIC SMALL LETTER DZE +0xBF U+0457 #CYRILLIC SMALL LETTER YI +0xC0 U+0410 #CYRILLIC CAPITAL LETTER A +0xC1 U+0411 #CYRILLIC CAPITAL LETTER BE +0xC2 U+0412 #CYRILLIC CAPITAL LETTER VE +0xC3 U+0413 #CYRILLIC CAPITAL LETTER GHE +0xC4 U+0414 #CYRILLIC CAPITAL LETTER DE +0xC5 U+0415 #CYRILLIC CAPITAL LETTER IE +0xC6 U+0416 #CYRILLIC CAPITAL LETTER ZHE +0xC7 U+0417 #CYRILLIC CAPITAL LETTER ZE +0xC8 U+0418 #CYRILLIC CAPITAL LETTER I +0xC9 U+0419 #CYRILLIC CAPITAL LETTER SHORT I +0xCA U+041A #CYRILLIC CAPITAL LETTER KA +0xCB U+041B #CYRILLIC CAPITAL LETTER EL +0xCC U+041C #CYRILLIC CAPITAL LETTER EM +0xCD U+041D #CYRILLIC CAPITAL LETTER EN +0xCE U+041E #CYRILLIC CAPITAL LETTER O +0xCF U+041F #CYRILLIC CAPITAL LETTER PE +0xD0 U+0420 #CYRILLIC CAPITAL LETTER ER +0xD1 U+0421 #CYRILLIC CAPITAL LETTER ES +0xD2 U+0422 #CYRILLIC CAPITAL LETTER TE +0xD3 U+0423 #CYRILLIC CAPITAL LETTER U +0xD4 U+0424 #CYRILLIC CAPITAL LETTER EF +0xD5 U+0425 #CYRILLIC CAPITAL LETTER HA +0xD6 U+0426 #CYRILLIC CAPITAL LETTER TSE +0xD7 U+0427 #CYRILLIC CAPITAL LETTER CHE +0xD8 U+0428 #CYRILLIC CAPITAL LETTER SHA +0xD9 U+0429 #CYRILLIC CAPITAL LETTER SHCHA +0xDA U+042A #CYRILLIC CAPITAL LETTER HARD SIGN +0xDB U+042B #CYRILLIC CAPITAL LETTER YERU +0xDC U+042C #CYRILLIC CAPITAL LETTER SOFT SIGN +0xDD U+042D #CYRILLIC CAPITAL LETTER E +0xDE U+042E #CYRILLIC CAPITAL LETTER YU +0xDF U+042F #CYRILLIC CAPITAL LETTER YA +0xE0 U+0430 #CYRILLIC SMALL LETTER A +0xE1 U+0431 #CYRILLIC SMALL LETTER BE +0xE2 U+0432 #CYRILLIC SMALL LETTER VE +0xE3 U+0433 #CYRILLIC SMALL LETTER GHE +0xE4 U+0434 #CYRILLIC SMALL LETTER DE +0xE5 U+0435 #CYRILLIC SMALL LETTER IE +0xE6 U+0436 #CYRILLIC SMALL LETTER ZHE +0xE7 U+0437 #CYRILLIC SMALL LETTER ZE +0xE8 U+0438 #CYRILLIC SMALL LETTER I +0xE9 U+0439 #CYRILLIC SMALL LETTER SHORT I +0xEA U+043A #CYRILLIC SMALL LETTER KA +0xEB U+043B #CYRILLIC SMALL LETTER EL +0xEC U+043C #CYRILLIC SMALL LETTER EM +0xED U+043D #CYRILLIC SMALL LETTER EN +0xEE U+043E #CYRILLIC SMALL LETTER O +0xEF U+043F #CYRILLIC SMALL LETTER PE +0xF0 U+0440 #CYRILLIC SMALL LETTER ER +0xF1 U+0441 #CYRILLIC SMALL LETTER ES +0xF2 U+0442 #CYRILLIC SMALL LETTER TE +0xF3 U+0443 #CYRILLIC SMALL LETTER U +0xF4 U+0444 #CYRILLIC SMALL LETTER EF +0xF5 U+0445 #CYRILLIC SMALL LETTER HA +0xF6 U+0446 #CYRILLIC SMALL LETTER TSE +0xF7 U+0447 #CYRILLIC SMALL LETTER CHE +0xF8 U+0448 #CYRILLIC SMALL LETTER SHA +0xF9 U+0449 #CYRILLIC SMALL LETTER SHCHA +0xFA U+044A #CYRILLIC SMALL LETTER HARD SIGN +0xFB U+044B #CYRILLIC SMALL LETTER YERU +0xFC U+044C #CYRILLIC SMALL LETTER SOFT SIGN +0xFD U+044D #CYRILLIC SMALL LETTER E +0xFE U+044E #CYRILLIC SMALL LETTER YU +0xFF U+044F #CYRILLIC SMALL LETTER YA diff --git a/src/chrtrans/cp1252_uni.tbl b/src/chrtrans/cp1252_uni.tbl index 09ee9b6a..d8adb2fa 100644 --- a/src/chrtrans/cp1252_uni.tbl +++ b/src/chrtrans/cp1252_uni.tbl @@ -6,155 +6,280 @@ D0 Mwindows-1252 #Name as a Display Charset (used on Options screen) -OWestern (windows-1252) +OWinLatin1 (cp1252) # -# Name: cp1252 to Unicode table -# Unicode version: 2.0 -# Table version: 2.00 +# Name: cp1252_WinLatin1 to Unicode table +# Unicode version: 1.1 +# Table version: 1.1 # Table format: Format A -# Date: 04/15/98 -# -# Contact: cpxlate@microsoft.com -# +# Date: 03/31/95 +# Authors: Michel Suignard <michelsu@microsoft.com> +# Lori Hoerth <lorih@microsoft.com> # General notes: none # # Format: Three tab-separated columns -# Column #1 is the cp1252 code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #1 is the cp1252_WinLatin1 code (in hex) +# Column #2 is the Unicode (in hex as U+XXXX) # Column #3 is the Unicode name (follows a comment sign, '#') # -# The entries are in cp1252 order +# The entries are in cp1252_WinLatin1 order # -################## +#0x00 U+0000 #Null +#0x01 U+0001 #START OF HEADING +#0x02 U+0002 #START OF TEXT +#0x03 U+0003 #END OF TEXT +#0x04 U+0004 #END OF TRANSMISSION +#0x05 U+0005 #ENQUIRY +#0x06 U+0006 #ACKNOWLEDGE +#0x07 U+0007 #BELL +#0x08 U+0008 #BACKSPACE +#0x09 U+0009 #HORIZONTAL TABULATION +#0x0a U+000a #LINE FEED +#0x0b U+000b #VERTICAL TABULATION +#0x0c U+000c #FORM FEED +#0x0d U+000d #CARRIAGE RETURN +#0x0e U+000e #SHIFT OUT +#0x0f U+000f #SHIFT IN +#0x10 U+0010 #DATA LINK ESCAPE +#0x11 U+0011 #DEVICE CONTROL ONE +#0x12 U+0012 #DEVICE CONTROL TWO +#0x13 U+0013 #DEVICE CONTROL THREE +#0x14 U+0014 #DEVICE CONTROL FOUR +#0x15 U+0015 #NEGATIVE ACKNOWLEDGE +#0x16 U+0016 #SYNCHRONOUS IDLE +#0x17 U+0017 #END OF TRANSMISSION BLOCK +#0x18 U+0018 #CANCEL +#0x19 U+0019 #END OF MEDIUM +#0x1a U+001a #SUBSTITUTE +#0x1b U+001b #ESCAPE +#0x1c U+001c #FILE SEPARATOR +#0x1d U+001d #GROUP SEPARATOR +#0x1e U+001e #RECORD SEPARATOR +#0x1f U+001f #UNIT SEPARATOR +0x20 U+0020 #SPACE +0x21 U+0021 #EXCLAMATION MARK +0x22 U+0022 #QUOTATION MARK +0x23 U+0023 #NUMBER SIGN +0x24 U+0024 #DOLLAR SIGN +0x25 U+0025 #PERCENT SIGN +0x26 U+0026 #AMPERSAND +0x27 U+0027 #APOSTROPHE +0x28 U+0028 #LEFT PARENTHESIS +0x29 U+0029 #RIGHT PARENTHESIS +0x2a U+002a #ASTERISK +0x2b U+002b #PLUS SIGN +0x2c U+002c #COMMA +0x2d U+002d #HYPHEN-MINUS +0x2e U+002e #FULL STOP +0x2f U+002f #SOLIDUS +0x30 U+0030 #DIGIT ZERO +0x31 U+0031 #DIGIT ONE +0x32 U+0032 #DIGIT TWO +0x33 U+0033 #DIGIT THREE +0x34 U+0034 #DIGIT FOUR +0x35 U+0035 #DIGIT FIVE +0x36 U+0036 #DIGIT SIX +0x37 U+0037 #DIGIT SEVEN +0x38 U+0038 #DIGIT EIGHT +0x39 U+0039 #DIGIT NINE +0x3a U+003a #COLON +0x3b U+003b #SEMICOLON +0x3c U+003c #LESS-THAN SIGN +0x3d U+003d #EQUALS SIGN +0x3e U+003e #GREATER-THAN SIGN +0x3f U+003f #QUESTION MARK +0x40 U+0040 #COMMERCIAL AT +0x41 U+0041 #LATIN CAPITAL LETTER A +0x42 U+0042 #LATIN CAPITAL LETTER B +0x43 U+0043 #LATIN CAPITAL LETTER C +0x44 U+0044 #LATIN CAPITAL LETTER D +0x45 U+0045 #LATIN CAPITAL LETTER E +0x46 U+0046 #LATIN CAPITAL LETTER F +0x47 U+0047 #LATIN CAPITAL LETTER G +0x48 U+0048 #LATIN CAPITAL LETTER H +0x49 U+0049 #LATIN CAPITAL LETTER I +0x4a U+004a #LATIN CAPITAL LETTER J +0x4b U+004b #LATIN CAPITAL LETTER K +0x4c U+004c #LATIN CAPITAL LETTER L +0x4d U+004d #LATIN CAPITAL LETTER M +0x4e U+004e #LATIN CAPITAL LETTER N +0x4f U+004f #LATIN CAPITAL LETTER O +0x50 U+0050 #LATIN CAPITAL LETTER P +0x51 U+0051 #LATIN CAPITAL LETTER Q +0x52 U+0052 #LATIN CAPITAL LETTER R +0x53 U+0053 #LATIN CAPITAL LETTER S +0x54 U+0054 #LATIN CAPITAL LETTER T +0x55 U+0055 #LATIN CAPITAL LETTER U +0x56 U+0056 #LATIN CAPITAL LETTER V +0x57 U+0057 #LATIN CAPITAL LETTER W +0x58 U+0058 #LATIN CAPITAL LETTER X +0x59 U+0059 #LATIN CAPITAL LETTER Y +0x5a U+005a #LATIN CAPITAL LETTER Z +0x5b U+005b #LEFT SQUARE BRACKET +0x5c U+005c #REVERSE SOLIDUS +0x5d U+005d #RIGHT SQUARE BRACKET +0x5e U+005e #CIRCUMFLEX ACCENT +0x5f U+005f #LOW LINE +0x60 U+0060 #GRAVE ACCENT +0x61 U+0061 #LATIN SMALL LETTER A +0x62 U+0062 #LATIN SMALL LETTER B +0x63 U+0063 #LATIN SMALL LETTER C +0x64 U+0064 #LATIN SMALL LETTER D +0x65 U+0065 #LATIN SMALL LETTER E +0x66 U+0066 #LATIN SMALL LETTER F +0x67 U+0067 #LATIN SMALL LETTER G +0x68 U+0068 #LATIN SMALL LETTER H +0x69 U+0069 #LATIN SMALL LETTER I +0x6a U+006a #LATIN SMALL LETTER J +0x6b U+006b #LATIN SMALL LETTER K +0x6c U+006c #LATIN SMALL LETTER L +0x6d U+006d #LATIN SMALL LETTER M +0x6e U+006e #LATIN SMALL LETTER N +0x6f U+006f #LATIN SMALL LETTER O +0x70 U+0070 #LATIN SMALL LETTER P +0x71 U+0071 #LATIN SMALL LETTER Q +0x72 U+0072 #LATIN SMALL LETTER R +0x73 U+0073 #LATIN SMALL LETTER S +0x74 U+0074 #LATIN SMALL LETTER T +0x75 U+0075 #LATIN SMALL LETTER U +0x76 U+0076 #LATIN SMALL LETTER V +0x77 U+0077 #LATIN SMALL LETTER W +0x78 U+0078 #LATIN SMALL LETTER X +0x79 U+0079 #LATIN SMALL LETTER Y +0x7a U+007a #LATIN SMALL LETTER Z +0x7b U+007b #LEFT CURLY BRACKET +0x7c U+007c #VERTICAL LINE +0x7d U+007d #RIGHT CURLY BRACKET +0x7e U+007e #TILDE +0x7f U+007f #DELETE +#0x80 U+0080 #NOT USED +#0x81 U+0081 #NOT USED +0x82 U+201a #SINGLE LOW-9 QUOTATION MARK +0x83 U+0192 #LATIN SMALL LETTER F WITH HOOK +0x84 U+201e #DOUBLE LOW-9 QUOTATION MARK +0x85 U+2026 #HORIZONTAL ELLIPSIS +0x86 U+2020 #DAGGER +0x87 U+2021 #DOUBLE DAGGER +0x88 U+02c6 #MODIFIER LETTER CIRCUMFLEX ACCENT +0x89 U+2030 #PER MILLE SIGN +0x8a U+0160 #LATIN CAPITAL LETTER S WITH CARON +0x8b U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK +0x8c U+0152 #LATIN CAPITAL LIGATURE OE +#0x8d U+008d #NOT USED +#0x8e U+008e #NOT USED +#0x8f U+008f #NOT USED +#0x90 U+0090 #NOT USED +0x91 U+2018 #LEFT SINGLE QUOTATION MARK +0x92 U+2019 #RIGHT SINGLE QUOTATION MARK +0x93 U+201c #LEFT DOUBLE QUOTATION MARK +0x94 U+201d #RIGHT DOUBLE QUOTATION MARK +0x95 U+2022 #BULLET +0x96 U+2013 #EN DASH +0x97 U+2014 #EM DASH +0x98 U+02dc #SMALL TILDE +0x99 U+2122 #TRADE MARK SIGN +0x9a U+0161 #LATIN SMALL LETTER S WITH CARON +0x9b U+203a #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +0x9c U+0153 #LATIN SMALL LIGATURE OE +#0x9d U+009d #NOT USED +#0x9e U+009e #NOT USED +0x9f U+0178 #LATIN CAPITAL LETTER Y WITH DIAERESIS +0xa0 U+00a0 #NO-BREAK SPACE +0xa1 U+00a1 #INVERTED EXCLAMATION MARK +0xa2 U+00a2 #CENT SIGN +0xa3 U+00a3 #POUND SIGN +0xa4 U+00a4 #CURRENCY SIGN +0xa5 U+00a5 #YEN SIGN +0xa6 U+00a6 #BROKEN BAR +0xa7 U+00a7 #SECTION SIGN +0xa8 U+00a8 #DIAERESIS +0xa9 U+00a9 #COPYRIGHT SIGN +0xaa U+00aa #FEMININE ORDINAL INDICATOR +0xab U+00ab #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xac U+00ac #NOT SIGN +0xad U+00ad #SOFT HYPHEN +0xae U+00ae #REGISTERED SIGN +0xaf U+00af #MACRON +0xb0 U+00b0 #DEGREE SIGN +0xb1 U+00b1 #PLUS-MINUS SIGN +0xb2 U+00b2 #SUPERSCRIPT TWO +0xb3 U+00b3 #SUPERSCRIPT THREE +0xb4 U+00b4 #ACUTE ACCENT +0xb5 U+00b5 #MICRO SIGN +0xb6 U+00b6 #PILCROW SIGN +0xb7 U+00b7 #MIDDLE DOT +0xb8 U+00b8 #CEDILLA +0xb9 U+00b9 #SUPERSCRIPT ONE +0xba U+00ba #MASCULINE ORDINAL INDICATOR +0xbb U+00bb #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xbc U+00bc #VULGAR FRACTION ONE QUARTER +0xbd U+00bd #VULGAR FRACTION ONE HALF +0xbe U+00be #VULGAR FRACTION THREE QUARTERS +0xbf U+00bf #INVERTED QUESTION MARK +0xc0 U+00c0 #LATIN CAPITAL LETTER A WITH GRAVE +0xc1 U+00c1 #LATIN CAPITAL LETTER A WITH ACUTE +0xc2 U+00c2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xc3 U+00c3 #LATIN CAPITAL LETTER A WITH TILDE +0xc4 U+00c4 #LATIN CAPITAL LETTER A WITH DIAERESIS +0xc5 U+00c5 #LATIN CAPITAL LETTER A WITH RING ABOVE +0xc6 U+00c6 #LATIN CAPITAL LIGATURE AE +0xc7 U+00c7 #LATIN CAPITAL LETTER C WITH CEDILLA +0xc8 U+00c8 #LATIN CAPITAL LETTER E WITH GRAVE +0xc9 U+00c9 #LATIN CAPITAL LETTER E WITH ACUTE +0xca U+00ca #LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xcb U+00cb #LATIN CAPITAL LETTER E WITH DIAERESIS +0xcc U+00cc #LATIN CAPITAL LETTER I WITH GRAVE +0xcd U+00cd #LATIN CAPITAL LETTER I WITH ACUTE +0xce U+00ce #LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xcf U+00cf #LATIN CAPITAL LETTER I WITH DIAERESIS +0xd0 U+00d0 #LATIN CAPITAL LETTER ETH +0xd1 U+00d1 #LATIN CAPITAL LETTER N WITH TILDE +0xd2 U+00d2 #LATIN CAPITAL LETTER O WITH GRAVE +0xd3 U+00d3 #LATIN CAPITAL LETTER O WITH ACUTE +0xd4 U+00d4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xd5 U+00d5 #LATIN CAPITAL LETTER O WITH TILDE +0xd6 U+00d6 #LATIN CAPITAL LETTER O WITH DIAERESIS +0xd7 U+00d7 #MULTIPLICATION SIGN +0xd8 U+00d8 #LATIN CAPITAL LETTER O WITH STROKE +0xd9 U+00d9 #LATIN CAPITAL LETTER U WITH GRAVE +0xda U+00da #LATIN CAPITAL LETTER U WITH ACUTE +0xdb U+00db #LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xdc U+00dc #LATIN CAPITAL LETTER U WITH DIAERESIS +0xdd U+00dd #LATIN CAPITAL LETTER Y WITH ACUTE +0xde U+00de #LATIN CAPITAL LETTER THORN +0xdf U+00df #LATIN SMALL LETTER SHARP S +0xe0 U+00e0 #LATIN SMALL LETTER A WITH GRAVE +0xe1 U+00e1 #LATIN SMALL LETTER A WITH ACUTE +0xe2 U+00e2 #LATIN SMALL LETTER A WITH CIRCUMFLEX +0xe3 U+00e3 #LATIN SMALL LETTER A WITH TILDE +0xe4 U+00e4 #LATIN SMALL LETTER A WITH DIAERESIS +0xe5 U+00e5 #LATIN SMALL LETTER A WITH RING ABOVE +0xe6 U+00e6 #LATIN SMALL LIGATURE AE +0xe7 U+00e7 #LATIN SMALL LETTER C WITH CEDILLA +0xe8 U+00e8 #LATIN SMALL LETTER E WITH GRAVE +0xe9 U+00e9 #LATIN SMALL LETTER E WITH ACUTE +0xea U+00ea #LATIN SMALL LETTER E WITH CIRCUMFLEX +0xeb U+00eb #LATIN SMALL LETTER E WITH DIAERESIS +0xec U+00ec #LATIN SMALL LETTER I WITH GRAVE +0xed U+00ed #LATIN SMALL LETTER I WITH ACUTE +0xee U+00ee #LATIN SMALL LETTER I WITH CIRCUMFLEX +0xef U+00ef #LATIN SMALL LETTER I WITH DIAERESIS +0xf0 U+00f0 #LATIN SMALL LETTER ETH +0xf1 U+00f1 #LATIN SMALL LETTER N WITH TILDE +0xf2 U+00f2 #LATIN SMALL LETTER O WITH GRAVE +0xf3 U+00f3 #LATIN SMALL LETTER O WITH ACUTE +0xf4 U+00f4 #LATIN SMALL LETTER O WITH CIRCUMFLEX +0xf5 U+00f5 #LATIN SMALL LETTER O WITH TILDE +0xf6 U+00f6 #LATIN SMALL LETTER O WITH DIAERESIS +0xf7 U+00f7 #DIVISION SIGN +0xf8 U+00f8 #LATIN SMALL LETTER O WITH STROKE +0xf9 U+00f9 #LATIN SMALL LETTER U WITH GRAVE +0xfa U+00fa #LATIN SMALL LETTER U WITH ACUTE +0xfb U+00fb #LATIN SMALL LETTER U WITH CIRCUMFLEX +0xfc U+00fc #LATIN SMALL LETTER U WITH DIAERESIS +0xfd U+00fd #LATIN SMALL LETTER Y WITH ACUTE +0xfe U+00fe #LATIN SMALL LETTER THORN +0xff U+00ff #LATIN SMALL LETTER Y WITH DIAERESIS + -0x20-0x7e idem -# -0x80 U+20AC #EURO SIGN -0x81 #UNDEFINED -0x82 U+201A #SINGLE LOW-9 QUOTATION MARK -0x83 U+0192 #LATIN SMALL LETTER F WITH HOOK -0x84 U+201E #DOUBLE LOW-9 QUOTATION MARK -0x85 U+2026 #HORIZONTAL ELLIPSIS -0x86 U+2020 #DAGGER -0x87 U+2021 #DOUBLE DAGGER -0x88 U+02C6 #MODIFIER LETTER CIRCUMFLEX ACCENT -0x89 U+2030 #PER MILLE SIGN -0x8A U+0160 #LATIN CAPITAL LETTER S WITH CARON -0x8B U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK -0x8C U+0152 #LATIN CAPITAL LIGATURE OE -0x8D #UNDEFINED -0x8E U+017D #LATIN CAPITAL LETTER Z WITH CARON -0x8F #UNDEFINED -0x90 #UNDEFINED -0x91 U+2018 #LEFT SINGLE QUOTATION MARK -0x92 U+2019 #RIGHT SINGLE QUOTATION MARK -0x93 U+201C #LEFT DOUBLE QUOTATION MARK -0x94 U+201D #RIGHT DOUBLE QUOTATION MARK -0x95 U+2022 #BULLET -0x96 U+2013 #EN DASH -0x97 U+2014 #EM DASH -0x98 U+02DC #SMALL TILDE -0x99 U+2122 #TRADE MARK SIGN -0x9A U+0161 #LATIN SMALL LETTER S WITH CARON -0x9B U+203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK -0x9C U+0153 #LATIN SMALL LIGATURE OE -0x9D #UNDEFINED -0x9E U+017E #LATIN SMALL LETTER Z WITH CARON -0x9F U+0178 #LATIN CAPITAL LETTER Y WITH DIAERESIS -0xA0 U+00A0 #NO-BREAK SPACE -0xA1 U+00A1 #INVERTED EXCLAMATION MARK -0xA2 U+00A2 #CENT SIGN -0xA3 U+00A3 #POUND SIGN -0xA4 U+00A4 #CURRENCY SIGN -0xA5 U+00A5 #YEN SIGN -0xA6 U+00A6 #BROKEN BAR -0xA7 U+00A7 #SECTION SIGN -0xA8 U+00A8 #DIAERESIS -0xA9 U+00A9 #COPYRIGHT SIGN -0xAA U+00AA #FEMININE ORDINAL INDICATOR -0xAB U+00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK -0xAC U+00AC #NOT SIGN -0xAD U+00AD #SOFT HYPHEN -0xAE U+00AE #REGISTERED SIGN -0xAF U+00AF #MACRON -0xB0 U+00B0 #DEGREE SIGN -0xB1 U+00B1 #PLUS-MINUS SIGN -0xB2 U+00B2 #SUPERSCRIPT TWO -0xB3 U+00B3 #SUPERSCRIPT THREE -0xB4 U+00B4 #ACUTE ACCENT -0xB5 U+00B5 #MICRO SIGN -0xB6 U+00B6 #PILCROW SIGN -0xB7 U+00B7 #MIDDLE DOT -0xB8 U+00B8 #CEDILLA -0xB9 U+00B9 #SUPERSCRIPT ONE -0xBA U+00BA #MASCULINE ORDINAL INDICATOR -0xBB U+00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -0xBC U+00BC #VULGAR FRACTION ONE QUARTER -0xBD U+00BD #VULGAR FRACTION ONE HALF -0xBE U+00BE #VULGAR FRACTION THREE QUARTERS -0xBF U+00BF #INVERTED QUESTION MARK -0xC0 U+00C0 #LATIN CAPITAL LETTER A WITH GRAVE -0xC1 U+00C1 #LATIN CAPITAL LETTER A WITH ACUTE -0xC2 U+00C2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX -0xC3 U+00C3 #LATIN CAPITAL LETTER A WITH TILDE -0xC4 U+00C4 #LATIN CAPITAL LETTER A WITH DIAERESIS -0xC5 U+00C5 #LATIN CAPITAL LETTER A WITH RING ABOVE -0xC6 U+00C6 #LATIN CAPITAL LETTER AE -0xC7 U+00C7 #LATIN CAPITAL LETTER C WITH CEDILLA -0xC8 U+00C8 #LATIN CAPITAL LETTER E WITH GRAVE -0xC9 U+00C9 #LATIN CAPITAL LETTER E WITH ACUTE -0xCA U+00CA #LATIN CAPITAL LETTER E WITH CIRCUMFLEX -0xCB U+00CB #LATIN CAPITAL LETTER E WITH DIAERESIS -0xCC U+00CC #LATIN CAPITAL LETTER I WITH GRAVE -0xCD U+00CD #LATIN CAPITAL LETTER I WITH ACUTE -0xCE U+00CE #LATIN CAPITAL LETTER I WITH CIRCUMFLEX -0xCF U+00CF #LATIN CAPITAL LETTER I WITH DIAERESIS -0xD0 U+00D0 #LATIN CAPITAL LETTER ETH -0xD1 U+00D1 #LATIN CAPITAL LETTER N WITH TILDE -0xD2 U+00D2 #LATIN CAPITAL LETTER O WITH GRAVE -0xD3 U+00D3 #LATIN CAPITAL LETTER O WITH ACUTE -0xD4 U+00D4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX -0xD5 U+00D5 #LATIN CAPITAL LETTER O WITH TILDE -0xD6 U+00D6 #LATIN CAPITAL LETTER O WITH DIAERESIS -0xD7 U+00D7 #MULTIPLICATION SIGN -0xD8 U+00D8 #LATIN CAPITAL LETTER O WITH STROKE -0xD9 U+00D9 #LATIN CAPITAL LETTER U WITH GRAVE -0xDA U+00DA #LATIN CAPITAL LETTER U WITH ACUTE -0xDB U+00DB #LATIN CAPITAL LETTER U WITH CIRCUMFLEX -0xDC U+00DC #LATIN CAPITAL LETTER U WITH DIAERESIS -0xDD U+00DD #LATIN CAPITAL LETTER Y WITH ACUTE -0xDE U+00DE #LATIN CAPITAL LETTER THORN -0xDF U+00DF #LATIN SMALL LETTER SHARP S -0xE0 U+00E0 #LATIN SMALL LETTER A WITH GRAVE -0xE1 U+00E1 #LATIN SMALL LETTER A WITH ACUTE -0xE2 U+00E2 #LATIN SMALL LETTER A WITH CIRCUMFLEX -0xE3 U+00E3 #LATIN SMALL LETTER A WITH TILDE -0xE4 U+00E4 #LATIN SMALL LETTER A WITH DIAERESIS -0xE5 U+00E5 #LATIN SMALL LETTER A WITH RING ABOVE -0xE6 U+00E6 #LATIN SMALL LETTER AE -0xE7 U+00E7 #LATIN SMALL LETTER C WITH CEDILLA -0xE8 U+00E8 #LATIN SMALL LETTER E WITH GRAVE -0xE9 U+00E9 #LATIN SMALL LETTER E WITH ACUTE -0xEA U+00EA #LATIN SMALL LETTER E WITH CIRCUMFLEX -0xEB U+00EB #LATIN SMALL LETTER E WITH DIAERESIS -0xEC U+00EC #LATIN SMALL LETTER I WITH GRAVE -0xED U+00ED #LATIN SMALL LETTER I WITH ACUTE -0xEE U+00EE #LATIN SMALL LETTER I WITH CIRCUMFLEX -0xEF U+00EF #LATIN SMALL LETTER I WITH DIAERESIS -0xF0 U+00F0 #LATIN SMALL LETTER ETH -0xF1 U+00F1 #LATIN SMALL LETTER N WITH TILDE -0xF2 U+00F2 #LATIN SMALL LETTER O WITH GRAVE -0xF3 U+00F3 #LATIN SMALL LETTER O WITH ACUTE -0xF4 U+00F4 #LATIN SMALL LETTER O WITH CIRCUMFLEX -0xF5 U+00F5 #LATIN SMALL LETTER O WITH TILDE -0xF6 U+00F6 #LATIN SMALL LETTER O WITH DIAERESIS -0xF7 U+00F7 #DIVISION SIGN -0xF8 U+00F8 #LATIN SMALL LETTER O WITH STROKE -0xF9 U+00F9 #LATIN SMALL LETTER U WITH GRAVE -0xFA U+00FA #LATIN SMALL LETTER U WITH ACUTE -0xFB U+00FB #LATIN SMALL LETTER U WITH CIRCUMFLEX -0xFC U+00FC #LATIN SMALL LETTER U WITH DIAERESIS -0xFD U+00FD #LATIN SMALL LETTER Y WITH ACUTE -0xFE U+00FE #LATIN SMALL LETTER THORN -0xFF U+00FF #LATIN SMALL LETTER Y WITH DIAERESIS diff --git a/src/chrtrans/cp1253_uni.tbl b/src/chrtrans/cp1253_uni.tbl index b53e44a2..097dae2b 100644 --- a/src/chrtrans/cp1253_uni.tbl +++ b/src/chrtrans/cp1253_uni.tbl @@ -2,47 +2,45 @@ Mwindows-1253 #Name as a Display Charset (used on Options screen) -OGreek (windows-1253) +OWinGreek (cp1253) -# -# Name: cp1253 to Unicode table +# Name: cp1253_WinGreek to Unicode table # Unicode version: 2.0 # Table version: 2.00 # Table format: Format A -# Date: 04/15/98 -# -# Contact: cpxlate@microsoft.com -# +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> # General notes: none # # Format: Three tab-separated columns -# Column #1 is the cp1253 code (in hex) +# Column #1 is the cp1253_WinGreek code (in hex) # Column #2 is the Unicode (in hex as 0xXXXX) # Column #3 is the Unicode name (follows a comment sign, '#') # -# The entries are in cp1253 order +# The entries are in cp1253_WinGreek order # ################## -0x20-0x7e idem +0x20-0x7f idem # -0x80 U+20AC #EURO SIGN -0x81 #UNDEFINED +#0x80 #UNDEFINED +#0x81 #UNDEFINED 0x82 U+201A #SINGLE LOW-9 QUOTATION MARK 0x83 U+0192 #LATIN SMALL LETTER F WITH HOOK 0x84 U+201E #DOUBLE LOW-9 QUOTATION MARK 0x85 U+2026 #HORIZONTAL ELLIPSIS 0x86 U+2020 #DAGGER 0x87 U+2021 #DOUBLE DAGGER -0x88 #UNDEFINED +#0x88 #UNDEFINED 0x89 U+2030 #PER MILLE SIGN -0x8A #UNDEFINED +#0x8A #UNDEFINED 0x8B U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK -0x8C #UNDEFINED -0x8D #UNDEFINED -0x8E #UNDEFINED -0x8F #UNDEFINED -0x90 #UNDEFINED +#0x8C #UNDEFINED +#0x8D #UNDEFINED +#0x8E #UNDEFINED +#0x8F #UNDEFINED +#0x90 #UNDEFINED 0x91 U+2018 #LEFT SINGLE QUOTATION MARK 0x92 U+2019 #RIGHT SINGLE QUOTATION MARK 0x93 U+201C #LEFT DOUBLE QUOTATION MARK @@ -50,14 +48,14 @@ OGreek (windows-1253) 0x95 U+2022 #BULLET 0x96 U+2013 #EN DASH 0x97 U+2014 #EM DASH -0x98 #UNDEFINED +#0x98 #UNDEFINED 0x99 U+2122 #TRADE MARK SIGN -0x9A #UNDEFINED +#0x9A #UNDEFINED 0x9B U+203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK -0x9C #UNDEFINED -0x9D #UNDEFINED -0x9E #UNDEFINED -0x9F #UNDEFINED +#0x9C #UNDEFINED +#0x9D #UNDEFINED +#0x9E #UNDEFINED +#0x9F #UNDEFINED 0xA0 U+00A0 #NO-BREAK SPACE 0xA1 U+0385 #GREEK DIALYTIKA TONOS 0xA2 U+0386 #GREEK CAPITAL LETTER ALPHA WITH TONOS @@ -68,7 +66,7 @@ OGreek (windows-1253) 0xA7 U+00A7 #SECTION SIGN 0xA8 U+00A8 #DIAERESIS 0xA9 U+00A9 #COPYRIGHT SIGN -0xAA #UNDEFINED +#0xAA #UNDEFINED 0xAB U+00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0xAC U+00AC #NOT SIGN 0xAD U+00AD #SOFT HYPHEN @@ -108,7 +106,7 @@ OGreek (windows-1253) 0xCF U+039F #GREEK CAPITAL LETTER OMICRON 0xD0 U+03A0 #GREEK CAPITAL LETTER PI 0xD1 U+03A1 #GREEK CAPITAL LETTER RHO -0xD2 #UNDEFINED +#0xD2 #UNDEFINED 0xD3 U+03A3 #GREEK CAPITAL LETTER SIGMA 0xD4 U+03A4 #GREEK CAPITAL LETTER TAU 0xD5 U+03A5 #GREEK CAPITAL LETTER UPSILON @@ -153,4 +151,4 @@ OGreek (windows-1253) 0xFC U+03CC #GREEK SMALL LETTER OMICRON WITH TONOS 0xFD U+03CD #GREEK SMALL LETTER UPSILON WITH TONOS 0xFE U+03CE #GREEK SMALL LETTER OMEGA WITH TONOS -0xFF #UNDEFINED +#0xFF #UNDEFINED diff --git a/src/chrtrans/cp1255_uni.tbl b/src/chrtrans/cp1255_uni.tbl index 6c4ca7d0..5f4d761a 100644 --- a/src/chrtrans/cp1255_uni.tbl +++ b/src/chrtrans/cp1255_uni.tbl @@ -2,94 +2,92 @@ Mwindows-1255 #Name as a Display Charset (used on Options screen). -OHebrew (windows-1255) +OWinHebrew (cp1255) -# -# Name: cp1255 to Unicode table +# Name: cp1255_WinHebrew to Unicode table # Unicode version: 2.0 # Table version: 2.00 # Table format: Format A -# Date: 04/15/98 -# -# Contact: cpxlate@microsoft.com -# +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> # General notes: none # # Format: Three tab-separated columns -# Column #1 is the cp1255 code (in hex) +# Column #1 is the cp1255_WinHebrew code (in hex) # Column #2 is the Unicode (in hex as 0xXXXX) # Column #3 is the Unicode name (follows a comment sign, '#') # -# The entries are in cp1255 order +# The entries are in cp1255_WinHebrew order # ################## -0x20-0x7e idem +0x20-0x7f idem # -0x80 U+20AC #EURO SIGN -0x81 #UNDEFINED -0x82 U+201A #SINGLE LOW-9 QUOTATION MARK -0x83 U+0192 #LATIN SMALL LETTER F WITH HOOK -0x84 U+201E #DOUBLE LOW-9 QUOTATION MARK +#0x80 #UNDEFINED +#0x81 #UNDEFINED +0x82 U+201A #LOW SINGLE COMMA QUOTATION MARK +0x83 U+0192 #LATIN SMALL LETTER SCRIPT F +0x84 U+201E #LOW DOUBLE COMMA QUOTATION MARK 0x85 U+2026 #HORIZONTAL ELLIPSIS 0x86 U+2020 #DAGGER 0x87 U+2021 #DOUBLE DAGGER -0x88 U+02C6 #MODIFIER LETTER CIRCUMFLEX ACCENT +0x88 U+02C6 #MODIFIER LETTER CIRCUMFLEX 0x89 U+2030 #PER MILLE SIGN -0x8A #UNDEFINED -0x8B U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK -0x8C #UNDEFINED -0x8D #UNDEFINED -0x8E #UNDEFINED -0x8F #UNDEFINED -0x90 #UNDEFINED -0x91 U+2018 #LEFT SINGLE QUOTATION MARK -0x92 U+2019 #RIGHT SINGLE QUOTATION MARK -0x93 U+201C #LEFT DOUBLE QUOTATION MARK -0x94 U+201D #RIGHT DOUBLE QUOTATION MARK +#0x8A #UNDEFINED +0x8B U+2039 #LEFT POINTING SINGLE GUILLEMET +#0x8C #UNDEFINED +#0x8D #UNDEFINED +#0x8E #UNDEFINED +#0x8F #UNDEFINED +#0x90 #UNDEFINED +0x91 U+2018 #SINGLE TURNED COMMA QUOTATION MARK +0x92 U+2019 #SINGLE COMMA QUOTATION MARK +0x93 U+201C #DOUBLE TURNED COMMA QUOTATION MARK +0x94 U+201D #DOUBLE COMMA QUOTATION MARK 0x95 U+2022 #BULLET 0x96 U+2013 #EN DASH 0x97 U+2014 #EM DASH -0x98 U+02DC #SMALL TILDE -0x99 U+2122 #TRADE MARK SIGN -0x9A #UNDEFINED -0x9B U+203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK -0x9C #UNDEFINED -0x9D #UNDEFINED -0x9E #UNDEFINED -0x9F #UNDEFINED -0xA0 U+00A0 #NO-BREAK SPACE -0xA1 U+00A1 #INVERTED EXCLAMATION MARK +0x98 U+02DC #SPACING TILDE +0x99 U+2122 #TRADEMARK +#0x9A #UNDEFINED +0x9B U+203A #RIGHT POINTING SINGLE GUILLEMET +#0x9C #UNDEFINED +#0x9D #UNDEFINED +#0x9E #UNDEFINED +#0x9F #UNDEFINED +0xA0 U+00A0 #NON-BREAKING SPACE +#0xA1 #UNDEFINED 0xA2 U+00A2 #CENT SIGN 0xA3 U+00A3 #POUND SIGN 0xA4 U+20AA #NEW SHEQEL SIGN 0xA5 U+00A5 #YEN SIGN -0xA6 U+00A6 #BROKEN BAR +0xA6 U+00A6 #BROKEN VERTICAL BAR 0xA7 U+00A7 #SECTION SIGN -0xA8 U+00A8 #DIAERESIS +0xA8 U+00A8 #SPACING DIAERESIS 0xA9 U+00A9 #COPYRIGHT SIGN -0xAA U+00D7 #MULTIPLICATION SIGN -0xAB U+00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +#0xAA #UNDEFINED +0xAB U+00AB #LEFT POINTING GUILLEMET 0xAC U+00AC #NOT SIGN 0xAD U+00AD #SOFT HYPHEN -0xAE U+00AE #REGISTERED SIGN -0xAF U+00AF #MACRON +0xAE U+00AE #REGISTERED TRADE MARK SIGN +0xAF U+00AF #SPACING MACRON 0xB0 U+00B0 #DEGREE SIGN -0xB1 U+00B1 #PLUS-MINUS SIGN -0xB2 U+00B2 #SUPERSCRIPT TWO -0xB3 U+00B3 #SUPERSCRIPT THREE -0xB4 U+00B4 #ACUTE ACCENT +0xB1 U+00B1 #PLUS-OR-MINUS SIGN +0xB2 U+00B2 #SUPERSCRIPT DIGIT TWO +0xB3 U+00B3 #SUPERSCRIPT DIGIT THREE +0xB4 U+00B4 #SPACING ACUTE 0xB5 U+00B5 #MICRO SIGN -0xB6 U+00B6 #PILCROW SIGN +0xB6 U+00B6 #PARAGRAPH SIGN 0xB7 U+00B7 #MIDDLE DOT -0xB8 U+00B8 #CEDILLA -0xB9 U+00B9 #SUPERSCRIPT ONE -0xBA U+00F7 #DIVISION SIGN -0xBB U+00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -0xBC U+00BC #VULGAR FRACTION ONE QUARTER -0xBD U+00BD #VULGAR FRACTION ONE HALF -0xBE U+00BE #VULGAR FRACTION THREE QUARTERS -0xBF U+00BF #INVERTED QUESTION MARK +#0xB8 #UNDEFINED +0xB9 U+00B9 #SUPERSCRIPT DIGIT ONE +#0xBA #UNDEFINED +0xBB U+00BB #RIGHT POINTING GUILLEMET +0xBC U+00BC #FRACTION ONE QUARTER +0xBD U+00BD #FRACTION ONE HALF +0xBE U+00BE #FRACTION THREE QUARTERS +#0xBF #UNDEFINED 0xC0 U+05B0 #HEBREW POINT SHEVA 0xC1 U+05B1 #HEBREW POINT HATAF SEGOL 0xC2 U+05B2 #HEBREW POINT HATAF PATAH @@ -100,28 +98,28 @@ OHebrew (windows-1255) 0xC7 U+05B7 #HEBREW POINT PATAH 0xC8 U+05B8 #HEBREW POINT QAMATS 0xC9 U+05B9 #HEBREW POINT HOLAM -0xCA #UNDEFINED +0xCA U+05BA #HEBREW POINT 0xCB U+05BB #HEBREW POINT QUBUTS -0xCC U+05BC #HEBREW POINT DAGESH OR MAPIQ +0xCC U+05BC #HEBREW POINT DAGESH 0xCD U+05BD #HEBREW POINT METEG 0xCE U+05BE #HEBREW PUNCTUATION MAQAF 0xCF U+05BF #HEBREW POINT RAFE -0xD0 U+05C0 #HEBREW PUNCTUATION PASEQ +0xD0 U+05C0 #HEBREW POINT PASEQ 0xD1 U+05C1 #HEBREW POINT SHIN DOT 0xD2 U+05C2 #HEBREW POINT SIN DOT 0xD3 U+05C3 #HEBREW PUNCTUATION SOF PASUQ -0xD4 U+05F0 #HEBREW LIGATURE YIDDISH DOUBLE VAV -0xD5 U+05F1 #HEBREW LIGATURE YIDDISH VAV YOD -0xD6 U+05F2 #HEBREW LIGATURE YIDDISH DOUBLE YOD -0xD7 U+05F3 #HEBREW PUNCTUATION GERESH -0xD8 U+05F4 #HEBREW PUNCTUATION GERSHAYIM -0xD9 #UNDEFINED -0xDA #UNDEFINED -0xDB #UNDEFINED -0xDC #UNDEFINED -0xDD #UNDEFINED -0xDE #UNDEFINED -0xDF #UNDEFINED +0xD4 U+05F0 #HEBREW LETTER DOUBLE VAV +0xD5 U+05F1 #HEBREW LETTER VAV YOD +0xD6 U+05F2 #HEBREW LETTER DOUBLE YOD +#0xD7 #UNDEFINED +#0xD8 #UNDEFINED +#0xD9 #UNDEFINED +#0xDA #UNDEFINED +#0xDB #UNDEFINED +#0xDC #UNDEFINED +#0xDD #UNDEFINED +#0xDE #UNDEFINED +#0xDF #UNDEFINED 0xE0 U+05D0 #HEBREW LETTER ALEF 0xE1 U+05D1 #HEBREW LETTER BET 0xE2 U+05D2 #HEBREW LETTER GIMEL @@ -149,8 +147,8 @@ OHebrew (windows-1255) 0xF8 U+05E8 #HEBREW LETTER RESH 0xF9 U+05E9 #HEBREW LETTER SHIN 0xFA U+05EA #HEBREW LETTER TAV -0xFB #UNDEFINED -0xFC #UNDEFINED +#0xFB #UNDEFINED +#0xFC #UNDEFINED 0xFD U+200E #LEFT-TO-RIGHT MARK 0xFE U+200F #RIGHT-TO-LEFT MARK -0xFF #UNDEFINED +#0xFF #UNDEFINED diff --git a/src/chrtrans/cp1256_uni.tbl b/src/chrtrans/cp1256_uni.tbl index 23af4614..6d85d221 100644 --- a/src/chrtrans/cp1256_uni.tbl +++ b/src/chrtrans/cp1256_uni.tbl @@ -2,112 +2,110 @@ Mwindows-1256 #Name as a Display Charset (used on Options screen). -OArabic (windows-1256) +OWinArabic (cp1256) -# -# Name: cp1256 to Unicode table +# Name: cp1256_WinArabic to Unicode table # Unicode version: 2.0 # Table version: 2.00 # Table format: Format A -# Date: 04/15/98 -# -# Contact: cpxlate@microsoft.com -# +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> # General notes: none # # Format: Three tab-separated columns -# Column #1 is the cp1256 code (in hex) +# Column #1 is the cp1256_WinArabic code (in hex) # Column #2 is the Unicode (in hex as 0xXXXX) # Column #3 is the Unicode name (follows a comment sign, '#') # -# The entries are in cp1256 order +# The entries are in cp1256_WinArabic order # ################## -0x20-0x7e idem +0x20-0x7f idem # -0x80 U+20AC #EURO SIGN -0x81 U+067E #ARABIC LETTER PEH -0x82 U+201A #SINGLE LOW-9 QUOTATION MARK -0x83 U+0192 #LATIN SMALL LETTER F WITH HOOK -0x84 U+201E #DOUBLE LOW-9 QUOTATION MARK +#0x80 #UNDEFINED +0x81 U+067E #ARABIC TAA WITH THREE DOTS BELOW +0x82 U+201A #LOW SINGLE COMMA QUOTATION MARK +0x83 U+0192 #LATIN SMALL LETTER SCRIPT F +0x84 U+201E #LOW DOUBLE COMMA QUOTATION MARK 0x85 U+2026 #HORIZONTAL ELLIPSIS 0x86 U+2020 #DAGGER 0x87 U+2021 #DOUBLE DAGGER -0x88 U+02C6 #MODIFIER LETTER CIRCUMFLEX ACCENT +0x88 U+02C6 #MODIFIER LETTER CIRCUMFLEX 0x89 U+2030 #PER MILLE SIGN -0x8A #UNDEFINED -0x8B U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK -0x8C U+0152 #LATIN CAPITAL LIGATURE OE -0x8D U+0686 #ARABIC LETTER TCHEH -0x8E U+0698 #ARABIC LETTER JEH -0x8F #UNDEFINED -0x90 U+06AF #ARABIC LETTER GAF -0x91 U+2018 #LEFT SINGLE QUOTATION MARK -0x92 U+2019 #RIGHT SINGLE QUOTATION MARK -0x93 U+201C #LEFT DOUBLE QUOTATION MARK -0x94 U+201D #RIGHT DOUBLE QUOTATION MARK +#0x8A #UNDEFINED +0x8B U+2039 #LEFT POINTING SINGLE GUILLEMET +0x8C U+0152 #LATIN CAPITAL LETTER O E +0x8D U+0686 #ARABIC HAA WITH MIDDLE THREE DOTS DOWNWARD +0x8E U+0698 #ARABIC RA WITH THREE DOTS ABOVE +#0x8F #UNDEFINED +0x90 U+06AF #ARABIC GAF +0x91 U+2018 #SINGLE TURNED COMMA QUOTATION MARK +0x92 U+2019 #SINGLE COMMA QUOTATION MARK +0x93 U+201C #DOUBLE TURNED COMMA QUOTATION MARK +0x94 U+201D #DOUBLE COMMA QUOTATION MARK 0x95 U+2022 #BULLET 0x96 U+2013 #EN DASH 0x97 U+2014 #EM DASH -0x98 #UNDEFINED -0x99 U+2122 #TRADE MARK SIGN -0x9A #UNDEFINED -0x9B U+203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK -0x9C U+0153 #LATIN SMALL LIGATURE OE +#0x98 #UNDEFINED +0x99 U+2122 #TRADEMARK +#0x9A #UNDEFINED +0x9B U+203A #RIGHT POINTING SINGLE GUILLEMET +0x9C U+0153 #LATIN SMALL LETTER O E 0x9D U+200C #ZERO WIDTH NON-JOINER 0x9E U+200D #ZERO WIDTH JOINER -0x9F #UNDEFINED -0xA0 U+00A0 #NO-BREAK SPACE +#0x9F #UNDEFINED +0xA0 U+00A0 #NON-BREAKING SPACE 0xA1 U+060C #ARABIC COMMA 0xA2 U+00A2 #CENT SIGN 0xA3 U+00A3 #POUND SIGN 0xA4 U+00A4 #CURRENCY SIGN 0xA5 U+00A5 #YEN SIGN -0xA6 U+00A6 #BROKEN BAR +0xA6 U+00A6 #BROKEN VERTICAL BAR 0xA7 U+00A7 #SECTION SIGN -0xA8 U+00A8 #DIAERESIS +0xA8 U+00A8 #SPACING DIAERESIS 0xA9 U+00A9 #COPYRIGHT SIGN -0xAA #UNDEFINED -0xAB U+00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +#0xAA #UNDEFINED +0xAB U+00AB #LEFT POINTING GUILLEMET 0xAC U+00AC #NOT SIGN 0xAD U+00AD #SOFT HYPHEN -0xAE U+00AE #REGISTERED SIGN -0xAF U+00AF #MACRON +0xAE U+00AE #REGISTERED TRADE MARK SIGN +0xAF U+00AF #SPACING MACRON 0xB0 U+00B0 #DEGREE SIGN -0xB1 U+00B1 #PLUS-MINUS SIGN -0xB2 U+00B2 #SUPERSCRIPT TWO -0xB3 U+00B3 #SUPERSCRIPT THREE -0xB4 U+00B4 #ACUTE ACCENT +0xB1 U+00B1 #PLUS-OR-MINUS SIGN +0xB2 U+00B2 #SUPERSCRIPT DIGIT TWO +0xB3 U+00B3 #SUPERSCRIPT DIGIT THREE +0xB4 U+00B4 #SPACING ACUTE 0xB5 U+00B5 #MICRO SIGN -0xB6 U+00B6 #PILCROW SIGN +0xB6 U+00B6 #PARAGRAPH SIGN 0xB7 U+00B7 #MIDDLE DOT -0xB8 U+00B8 #CEDILLA -0xB9 U+00B9 #SUPERSCRIPT ONE +0xB8 U+00B8 #SPACING CEDILLA +0xB9 U+00B9 #SUPERSCRIPT DIGIT ONE 0xBA U+061B #ARABIC SEMICOLON -0xBB U+00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -0xBC U+00BC #VULGAR FRACTION ONE QUARTER -0xBD U+00BD #VULGAR FRACTION ONE HALF -0xBE U+00BE #VULGAR FRACTION THREE QUARTERS +0xBB U+00BB #RIGHT POINTING GUILLEMET +0xBC U+00BC #FRACTION ONE QUARTER +0xBD U+00BD #FRACTION ONE HALF +0xBE U+00BE #FRACTION THREE QUARTERS 0xBF U+061F #ARABIC QUESTION MARK -0xC0 #UNDEFINED -0xC1 U+0621 #ARABIC LETTER HAMZA -0xC2 U+0622 #ARABIC LETTER ALEF WITH MADDA ABOVE -0xC3 U+0623 #ARABIC LETTER ALEF WITH HAMZA ABOVE -0xC4 U+0624 #ARABIC LETTER WAW WITH HAMZA ABOVE -0xC5 U+0625 #ARABIC LETTER ALEF WITH HAMZA BELOW -0xC6 U+0626 #ARABIC LETTER YEH WITH HAMZA ABOVE +#0xC0 #UNDEFINED +0xC1 U+0621 #ARABIC LETTER HAMZAH +0xC2 U+0622 #ARABIC LETTER MADDAH ON ALEF +0xC3 U+0623 #ARABIC LETTER HAMZAH ON ALEF +0xC4 U+0624 #ARABIC LETTER HAMZAH ON WAW +0xC5 U+0625 #ARABIC LETTER HAMZAH UNDER ALEF +0xC6 U+0626 #ARABIC LETTER HAMZAH ON YA 0xC7 U+0627 #ARABIC LETTER ALEF -0xC8 U+0628 #ARABIC LETTER BEH -0xC9 U+0629 #ARABIC LETTER TEH MARBUTA -0xCA U+062A #ARABIC LETTER TEH -0xCB U+062B #ARABIC LETTER THEH +0xC8 U+0628 #ARABIC LETTER BAA +0xC9 U+0629 #ARABIC LETTER TAA MARBUTAH +0xCA U+062A #ARABIC LETTER TAA +0xCB U+062B #ARABIC LETTER THAA 0xCC U+062C #ARABIC LETTER JEEM -0xCD U+062D #ARABIC LETTER HAH -0xCE U+062E #ARABIC LETTER KHAH +0xCD U+062D #ARABIC LETTER HAA +0xCE U+062E #ARABIC LETTER KHAA 0xCF U+062F #ARABIC LETTER DAL 0xD0 U+0630 #ARABIC LETTER THAL -0xD1 U+0631 #ARABIC LETTER REH +0xD1 U+0631 #ARABIC LETTER RA 0xD2 U+0632 #ARABIC LETTER ZAIN 0xD3 U+0633 #ARABIC LETTER SEEN 0xD4 U+0634 #ARABIC LETTER SHEEN @@ -115,42 +113,42 @@ OArabic (windows-1256) 0xD6 U+0636 #ARABIC LETTER DAD 0xD7 U+00D7 #MULTIPLICATION SIGN 0xD8 U+0637 #ARABIC LETTER TAH -0xD9 U+0638 #ARABIC LETTER ZAH +0xD9 U+0638 #ARABIC LETTER DHAH 0xDA U+0639 #ARABIC LETTER AIN 0xDB U+063A #ARABIC LETTER GHAIN 0xDC U+0640 #ARABIC TATWEEL -0xDD U+0641 #ARABIC LETTER FEH +0xDD U+0641 #ARABIC LETTER FA 0xDE U+0642 #ARABIC LETTER QAF -0xDF U+0643 #ARABIC LETTER KAF -0xE0 U+00E0 #LATIN SMALL LETTER A WITH GRAVE +0xDF U+0643 #ARABIC LETTER CAF +0xE0 U+00E0 #LATIN SMALL LETTER A GRAVE 0xE1 U+0644 #ARABIC LETTER LAM -0xE2 U+00E2 #LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE2 U+00E2 #LATIN SMALL LETTER A CIRCUMFLEX 0xE3 U+0645 #ARABIC LETTER MEEM 0xE4 U+0646 #ARABIC LETTER NOON -0xE5 U+0647 #ARABIC LETTER HEH +0xE5 U+0647 #ARABIC LETTER HA 0xE6 U+0648 #ARABIC LETTER WAW -0xE7 U+00E7 #LATIN SMALL LETTER C WITH CEDILLA -0xE8 U+00E8 #LATIN SMALL LETTER E WITH GRAVE -0xE9 U+00E9 #LATIN SMALL LETTER E WITH ACUTE -0xEA U+00EA #LATIN SMALL LETTER E WITH CIRCUMFLEX -0xEB U+00EB #LATIN SMALL LETTER E WITH DIAERESIS -0xEC U+0649 #ARABIC LETTER ALEF MAKSURA -0xED U+064A #ARABIC LETTER YEH -0xEE U+00EE #LATIN SMALL LETTER I WITH CIRCUMFLEX -0xEF U+00EF #LATIN SMALL LETTER I WITH DIAERESIS +0xE7 U+00E7 #LATIN SMALL LETTER C CEDILLA +0xE8 U+00E8 #LATIN SMALL LETTER E GRAVE +0xE9 U+00E9 #LATIN SMALL LETTER E ACUTE +0xEA U+00EA #LATIN SMALL LETTER E CIRCUMFLEX +0xEB U+00EB #LATIN SMALL LETTER E DIAERESIS +0xEC U+0649 #ARABIC LETTER ALEF MAQSURAH +0xED U+064A #ARABIC LETTER YA +0xEE U+00EE #LATIN SMALL LETTER I CIRCUMFLEX +0xEF U+00EF #LATIN SMALL LETTER I DIAERESIS 0xF0 U+064B #ARABIC FATHATAN 0xF1 U+064C #ARABIC DAMMATAN 0xF2 U+064D #ARABIC KASRATAN -0xF3 U+064E #ARABIC FATHA -0xF4 U+00F4 #LATIN SMALL LETTER O WITH CIRCUMFLEX -0xF5 U+064F #ARABIC DAMMA -0xF6 U+0650 #ARABIC KASRA +0xF3 U+064E #ARABIC FATHAH +0xF4 U+00F4 #LATIN SMALL LETTER O CIRCUMFLEX +0xF5 U+064F #ARABIC DAMMAH +0xF6 U+0650 #ARABIC KASRAH 0xF7 U+00F7 #DIVISION SIGN -0xF8 U+0651 #ARABIC SHADDA -0xF9 U+00F9 #LATIN SMALL LETTER U WITH GRAVE +0xF8 U+0651 #ARABIC SHADDAH +0xF9 U+00F9 #LATIN SMALL LETTER U GRAVE 0xFA U+0652 #ARABIC SUKUN -0xFB U+00FB #LATIN SMALL LETTER U WITH CIRCUMFLEX -0xFC U+00FC #LATIN SMALL LETTER U WITH DIAERESIS +0xFB U+00FB #LATIN SMALL LETTER U CIRCUMFLEX +0xFC U+00FC #LATIN SMALL LETTER U DIAERESIS 0xFD U+200E #LEFT-TO-RIGHT MARK 0xFE U+200F #RIGHT-TO-LEFT MARK -0xFF #UNDEFINED +#0xFF #UNDEFINED diff --git a/src/chrtrans/cp1257_uni.tbl b/src/chrtrans/cp1257_uni.tbl index 4c1e70e6..794df717 100644 --- a/src/chrtrans/cp1257_uni.tbl +++ b/src/chrtrans/cp1257_uni.tbl @@ -2,47 +2,169 @@ Mwindows-1257 #Name as a Display Charset (used on Options screen) -OBaltic Rim (windows-1257) +OWinBaltRim (cp1257) -# -# Name: cp1257 to Unicode table +# Name: cp1257_WinBaltic to Unicode table # Unicode version: 2.0 # Table version: 2.00 # Table format: Format A -# Date: 04/15/98 -# -# Contact: cpxlate@microsoft.com -# +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> # General notes: none # # Format: Three tab-separated columns -# Column #1 is the cp1257 code (in hex) +# Column #1 is the cp1257_WinBaltic code (in hex) # Column #2 is the Unicode (in hex as 0xXXXX) # Column #3 is the Unicode name (follows a comment sign, '#') # -# The entries are in cp1257 order +# The entries are in cp1257_WinBaltic order # -################## - -0x20-0x7e idem -# -0x80 U+20AC #EURO SIGN -0x81 #UNDEFINED -0x82 U+201A #SINGLE LOW-9 QUOTATION MARK -0x83 #UNDEFINED -0x84 U+201E #DOUBLE LOW-9 QUOTATION MARK +#0x00 U+0000 #NULL +#0x01 U+0001 #START OF HEADING +#0x02 U+0002 #START OF TEXT +#0x03 U+0003 #END OF TEXT +#0x04 U+0004 #END OF TRANSMISSION +#0x05 U+0005 #ENQUIRY +#0x06 U+0006 #ACKNOWLEDGE +#0x07 U+0007 #BELL +#0x08 U+0008 #BACKSPACE +#0x09 U+0009 #HORIZONTAL TABULATION +#0x0A U+000A #LINE FEED +#0x0B U+000B #VERTICAL TABULATION +#0x0C U+000C #FORM FEED +#0x0D U+000D #CARRIAGE RETURN +#0x0E U+000E #SHIFT OUT +#0x0F U+000F #SHIFT IN +#0x10 U+0010 #DATA LINK ESCAPE +#0x11 U+0011 #DEVICE CONTROL ONE +#0x12 U+0012 #DEVICE CONTROL TWO +#0x13 U+0013 #DEVICE CONTROL THREE +#0x14 U+0014 #DEVICE CONTROL FOUR +#0x15 U+0015 #NEGATIVE ACKNOWLEDGE +#0x16 U+0016 #SYNCHRONOUS IDLE +#0x17 U+0017 #END OF TRANSMISSION BLOCK +#0x18 U+0018 #CANCEL +#0x19 U+0019 #END OF MEDIUM +#0x1A U+001A #SUBSTITUTE +#0x1B U+001B #ESCAPE +#0x1C U+001C #FILE SEPARATOR +#0x1D U+001D #GROUP SEPARATOR +#0x1E U+001E #RECORD SEPARATOR +#0x1F U+001F #UNIT SEPARATOR +0x20 U+0020 #SPACE +0x21 U+0021 #EXCLAMATION MARK +0x22 U+0022 #QUOTATION MARK +0x23 U+0023 #NUMBER SIGN +0x24 U+0024 #DOLLAR SIGN +0x25 U+0025 #PERCENT SIGN +0x26 U+0026 #AMPERSAND +0x27 U+0027 #APOSTROPHE +0x28 U+0028 #LEFT PARENTHESIS +0x29 U+0029 #RIGHT PARENTHESIS +0x2A U+002A #ASTERISK +0x2B U+002B #PLUS SIGN +0x2C U+002C #COMMA +0x2D U+002D #HYPHEN-MINUS +0x2E U+002E #FULL STOP +0x2F U+002F #SOLIDUS +0x30 U+0030 #DIGIT 0 +0x31 U+0031 #DIGIT 1 +0x32 U+0032 #DIGIT 2 +0x33 U+0033 #DIGIT 3 +0x34 U+0034 #DIGIT 4 +0x35 U+0035 #DIGIT 5 +0x36 U+0036 #DIGIT 6 +0x37 U+0037 #DIGIT 7 +0x38 U+0038 #DIGIT 8 +0x39 U+0039 #DIGIT 9 +0x3A U+003A #COLON +0x3B U+003B #SEMICOLON +0x3C U+003C #LESS-THAN SIGN +0x3D U+003D #EQUALS SIGN +0x3E U+003E #GREATER-THAN SIGN +0x3F U+003F #QUESTION MARK +0x40 U+0040 #COMMERCIAL AT +0x41 U+0041 #LATIN CAPITAL A +0x42 U+0042 #LATIN CAPITAL B +0x43 U+0043 #LATIN CAPITAL C +0x44 U+0044 #LATIN CAPITAL D +0x45 U+0045 #LATIN CAPITAL E +0x46 U+0046 #LATIN CAPITAL F +0x47 U+0047 #LATIN CAPITAL G +0x48 U+0048 #LATIN CAPITAL H +0x49 U+0049 #LATIN CAPITAL I +0x4A U+004A #LATIN CAPITAL J +0x4B U+004B #LATIN CAPITAL K +0x4C U+004C #LATIN CAPITAL L +0x4D U+004D #LATIN CAPITAL M +0x4E U+004E #LATIN CAPITAL N +0x4F U+004F #LATIN CAPITAL O +0x50 U+0050 #LATIN CAPITAL P +0x51 U+0051 #LATIN CAPITAL Q +0x52 U+0052 #LATIN CAPITAL R +0x53 U+0053 #LATIN CAPITAL S +0x54 U+0054 #LATIN CAPITAL T +0x55 U+0055 #LATIN CAPITAL U +0x56 U+0056 #LATIN CAPITAL V +0x57 U+0057 #LATIN CAPITAL W +0x58 U+0058 #LATIN CAPITAL X +0x59 U+0059 #LATIN CAPITAL Y +0x5A U+005A #LATIN CAPITAL Z +0x5B U+005B #LEFT SQUARE BRACKET +0x5C U+005C #BACKSLASH +0x5D U+005D #RIGHT SQUARE BRACKET +0x5E U+005E #CIRCUMFLEX +0x5F U+005F #LOW LINE +0x60 U+0060 #GRAVE +0x61 U+0061 #LATIN SMALL A +0x62 U+0062 #LATIN SMALL B +0x63 U+0063 #LATIN SMALL C +0x64 U+0064 #LATIN SMALL D +0x65 U+0065 #LATIN SMALL E +0x66 U+0066 #LATIN SMALL F +0x67 U+0067 #LATIN SMALL G +0x68 U+0068 #LATIN SMALL H +0x69 U+0069 #LATIN SMALL I +0x6A U+006A #LATIN SMALL J +0x6B U+006B #LATIN SMALL K +0x6C U+006C #LATIN SMALL L +0x6D U+006D #LATIN SMALL M +0x6E U+006E #LATIN SMALL N +0x6F U+006F #LATIN SMALL O +0x70 U+0070 #LATIN SMALL P +0x71 U+0071 #LATIN SMALL Q +0x72 U+0072 #LATIN SMALL R +0x73 U+0073 #LATIN SMALL S +0x74 U+0074 #LATIN SMALL T +0x75 U+0075 #LATIN SMALL U +0x76 U+0076 #LATIN SMALL V +0x77 U+0077 #LATIN SMALL W +0x78 U+0078 #LATIN SMALL X +0x79 U+0079 #LATIN SMALL Y +0x7A U+007A #LATIN SMALL Z +0x7B U+007B #LEFT CURLY BRACKET +0x7C U+007C #VERTICAL LINE +0x7D U+007D #RIGHT CURLY BRACKET +0x7E U+007E #TILDE +#0x7F U+007F #DELETE +#0x80 #UNDEFINED +#0x81 #UNDEFINED +0x82 U+201A #LOW SINGLE COMMA QUOTATION MARK +#0x83 #UNDEFINED +0x84 U+201E #LOW DOUBLE COMMA QUOTATION MARK 0x85 U+2026 #HORIZONTAL ELLIPSIS 0x86 U+2020 #DAGGER 0x87 U+2021 #DOUBLE DAGGER -0x88 #UNDEFINED +#0x88 #UNDEFINED 0x89 U+2030 #PER MILLE SIGN -0x8A #UNDEFINED -0x8B U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK -0x8C #UNDEFINED +#0x8A #UNDEFINED +0x8B U+2039 #LEFT POINTING SINGLE GUILLEMENT +#0x8C #UNDEFINED 0x8D U+00A8 #DIAERESIS -0x8E U+02C7 #CARON +0x8E U+02C7 #HACEK 0x8F U+00B8 #CEDILLA -0x90 #UNDEFINED +#0x90 #UNDEFINED 0x91 U+2018 #LEFT SINGLE QUOTATION MARK 0x92 U+2019 #RIGHT SINGLE QUOTATION MARK 0x93 U+201C #LEFT DOUBLE QUOTATION MARK @@ -50,108 +172,107 @@ OBaltic Rim (windows-1257) 0x95 U+2022 #BULLET 0x96 U+2013 #EN DASH 0x97 U+2014 #EM DASH -0x98 #UNDEFINED +#0x98 #UNDEFINED 0x99 U+2122 #TRADE MARK SIGN -0x9A #UNDEFINED -0x9B U+203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK -0x9C #UNDEFINED +#0x9A #UNDEFINED +0x9B U+203A #RIGHT POINTING SINGLE GUILLEMENT +#0x9C #UNDEFINED 0x9D U+00AF #MACRON 0x9E U+02DB #OGONEK -0x9F #UNDEFINED +#0x9F #UNDEFINED 0xA0 U+00A0 #NO-BREAK SPACE -0xA1 #UNDEFINED +#0xA1 #UNDEFINED 0xA2 U+00A2 #CENT SIGN 0xA3 U+00A3 #POUND SIGN 0xA4 U+00A4 #CURRENCY SIGN -0xA5 #UNDEFINED +#0xA5 #UNDEFINED 0xA6 U+00A6 #BROKEN BAR 0xA7 U+00A7 #SECTION SIGN -0xA8 U+00D8 #LATIN CAPITAL LETTER O WITH STROKE +0xA8 U+00D8 #LATIN CAPITAL O STROKE 0xA9 U+00A9 #COPYRIGHT SIGN -0xAA U+0156 #LATIN CAPITAL LETTER R WITH CEDILLA -0xAB U+00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAA U+0156 #LATIN CAPITAL R CEDILLA +0xAB U+00AB #LEFT POINTING GUILLEMENT 0xAC U+00AC #NOT SIGN 0xAD U+00AD #SOFT HYPHEN 0xAE U+00AE #REGISTERED SIGN -0xAF U+00C6 #LATIN CAPITAL LETTER AE +0xAF U+00C6 #LATIN CAPITAL AE LIGATURE 0xB0 U+00B0 #DEGREE SIGN 0xB1 U+00B1 #PLUS-MINUS SIGN -0xB2 U+00B2 #SUPERSCRIPT TWO -0xB3 U+00B3 #SUPERSCRIPT THREE -0xB4 U+00B4 #ACUTE ACCENT +0xB2 U+00B2 #SUPERSCRIPT 2 +0xB3 U+00B3 #SUPERSCRIPT 3 +0xB4 U+00B4 #ACUTE 0xB5 U+00B5 #MICRO SIGN 0xB6 U+00B6 #PILCROW SIGN 0xB7 U+00B7 #MIDDLE DOT -0xB8 U+00F8 #LATIN SMALL LETTER O WITH STROKE +0xB8 U+00F8 #LATIN SAMLL O STROKE 0xB9 U+00B9 #SUPERSCRIPT ONE -0xBA U+0157 #LATIN SMALL LETTER R WITH CEDILLA -0xBB U+00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -0xBC U+00BC #VULGAR FRACTION ONE QUARTER -0xBD U+00BD #VULGAR FRACTION ONE HALF -0xBE U+00BE #VULGAR FRACTION THREE QUARTERS -0xBF U+00E6 #LATIN SMALL LETTER AE -0xC0 U+0104 #LATIN CAPITAL LETTER A WITH OGONEK -0xC1 U+012E #LATIN CAPITAL LETTER I WITH OGONEK -0xC2 U+0100 #LATIN CAPITAL LETTER A WITH MACRON -0xC3 U+0106 #LATIN CAPITAL LETTER C WITH ACUTE -0xC4 U+00C4 #LATIN CAPITAL LETTER A WITH DIAERESIS -0xC5 U+00C5 #LATIN CAPITAL LETTER A WITH RING ABOVE -0xC6 U+0118 #LATIN CAPITAL LETTER E WITH OGONEK -0xC7 U+0112 #LATIN CAPITAL LETTER E WITH MACRON -0xC8 U+010C #LATIN CAPITAL LETTER C WITH CARON -0xC9 U+00C9 #LATIN CAPITAL LETTER E WITH ACUTE -0xCA U+0179 #LATIN CAPITAL LETTER Z WITH ACUTE -0xCB U+0116 #LATIN CAPITAL LETTER E WITH DOT ABOVE -0xCC U+0122 #LATIN CAPITAL LETTER G WITH CEDILLA -0xCD U+0136 #LATIN CAPITAL LETTER K WITH CEDILLA -0xCE U+012A #LATIN CAPITAL LETTER I WITH MACRON -0xCF U+013B #LATIN CAPITAL LETTER L WITH CEDILLA -0xD0 U+0160 #LATIN CAPITAL LETTER S WITH CARON -0xD1 U+0143 #LATIN CAPITAL LETTER N WITH ACUTE -0xD2 U+0145 #LATIN CAPITAL LETTER N WITH CEDILLA -0xD3 U+00D3 #LATIN CAPITAL LETTER O WITH ACUTE -0xD4 U+014C #LATIN CAPITAL LETTER O WITH MACRON -0xD5 U+00D5 #LATIN CAPITAL LETTER O WITH TILDE -0xD6 U+00D6 #LATIN CAPITAL LETTER O WITH DIAERESIS +0xBA U+0157 #LATIN SMALL R CEDILLA +0xBB U+00BB #RIGHT POINTING GUILLEMENT +0xBC U+00BC #FRACTION 1/4 +0xBD U+00BD #FRACTION 1/2 +0xBE U+00BE #FRACTION 3/4 +0xBF U+00E6 #LATIN SMALL AE LIGATURE +0xC0 U+0104 #LATIN CAPITAL A OGONEK +0xC1 U+012E #LATIN CAPITAL I OGONEK +0xC2 U+0100 #LATIN CAPITAL A MACRON +0xC3 U+0106 #LATIN CAPITAL C ACUTE +0xC4 U+00C4 #LATIN CAPITAL A DIAERESIS +0xC5 U+00C5 #LATIN CAPITAL A RING ABOVE +0xC6 U+0118 #LATIN CAPITAL E OGONEK +0xC7 U+0112 #LATIN CAPITAL E MACRON +0xC8 U+010C #LATIN CAPITAL C HACEK +0xC9 U+00C9 #LATIN CAPITAL E ACUTE +0xCA U+0179 #LATIN CAPITAL Z ACUTE +0xCB U+0116 #LATIN CAPITAL E DOT ABOVE +0xCC U+0122 #LATIN CAPITAL G CEDILLA +0xCD U+0136 #LATIN CAPITAL K CEDILLA +0xCE U+012A #LATIN CAPITAL I MACRON +0xCF U+013B #LATIN CAPITAL L CEDILLA +0xD0 U+0160 #LATIN CAPITAL S HACEK +0xD1 U+0143 #LATIN CAPITAL N ACUTE +0xD2 U+0145 #LATIN CAPITAL N CEDILLA +0xD3 U+00D3 #LATIN CAPITAL O ACUTE +0xD4 U+014C #LATIN CAPITAL O MACRON +0xD5 U+00D5 #LATIN CAPITAL O TILDE +0xD6 U+00D6 #LATIN CAPITAL O DIAERESIS 0xD7 U+00D7 #MULTIPLICATION SIGN -0xD8 U+0172 #LATIN CAPITAL LETTER U WITH OGONEK -0xD9 U+0141 #LATIN CAPITAL LETTER L WITH STROKE -0xDA U+015A #LATIN CAPITAL LETTER S WITH ACUTE -0xDB U+016A #LATIN CAPITAL LETTER U WITH MACRON -0xDC U+00DC #LATIN CAPITAL LETTER U WITH DIAERESIS -0xDD U+017B #LATIN CAPITAL LETTER Z WITH DOT ABOVE -0xDE U+017D #LATIN CAPITAL LETTER Z WITH CARON -0xDF U+00DF #LATIN SMALL LETTER SHARP S -0xE0 U+0105 #LATIN SMALL LETTER A WITH OGONEK -0xE1 U+012F #LATIN SMALL LETTER I WITH OGONEK -0xE2 U+0101 #LATIN SMALL LETTER A WITH MACRON -0xE3 U+0107 #LATIN SMALL LETTER C WITH ACUTE -0xE4 U+00E4 #LATIN SMALL LETTER A WITH DIAERESIS -0xE5 U+00E5 #LATIN SMALL LETTER A WITH RING ABOVE -0xE6 U+0119 #LATIN SMALL LETTER E WITH OGONEK -0xE7 U+0113 #LATIN SMALL LETTER E WITH MACRON -0xE8 U+010D #LATIN SMALL LETTER C WITH CARON -0xE9 U+00E9 #LATIN SMALL LETTER E WITH ACUTE -0xEA U+017A #LATIN SMALL LETTER Z WITH ACUTE -0xEB U+0117 #LATIN SMALL LETTER E WITH DOT ABOVE -0xEC U+0123 #LATIN SMALL LETTER G WITH CEDILLA -0xED U+0137 #LATIN SMALL LETTER K WITH CEDILLA -0xEE U+012B #LATIN SMALL LETTER I WITH MACRON -0xEF U+013C #LATIN SMALL LETTER L WITH CEDILLA -0xF0 U+0161 #LATIN SMALL LETTER S WITH CARON -0xF1 U+0144 #LATIN SMALL LETTER N WITH ACUTE -0xF2 U+0146 #LATIN SMALL LETTER N WITH CEDILLA -0xF3 U+00F3 #LATIN SMALL LETTER O WITH ACUTE -0xF4 U+014D #LATIN SMALL LETTER O WITH MACRON -0xF5 U+00F5 #LATIN SMALL LETTER O WITH TILDE -0xF6 U+00F6 #LATIN SMALL LETTER O WITH DIAERESIS +0xD8 U+0172 #LATIN CAPITAL U OGONEK +0xD9 U+0141 #LATIN CAPITAL L STROKE +0xDA U+015A #LATIN CAPITAL S ACUTE +0xDB U+016A #LATIN CAPITAL U MACRON +0xDC U+00DC #LATIN CAPITAL U DIAERESIS +0xDD U+017B #LATIN CAPITAL Z DOT ABOVE +0xDE U+017D #LATIN CAPITAL Z HACEK +0xDF U+00DF #LATIN SMALL SHARP SS +0xE0 U+0105 #LATIN SMALL A OGONEK +0xE1 U+012F #LATIN SMALL I OGONEK +0xE2 U+0101 #LATIN SMALL A MACRON +0xE3 U+0107 #LATIN SMALL C ACUTE +0xE4 U+00E4 #LATIN SMALL A DIAERESIS +0xE5 U+00E5 #LATIN SMALL A RING ABOVE +0xE6 U+0119 #LATIN SMALL E OGONEK +0xE7 U+0113 #LATIN SMALL E MACRON +0xE8 U+010D #LATIN SMALL C HACEK +0xE9 U+00E9 #LATIN SMALL E ACUTE +0xEA U+017A #LATIN SMALL Z ACUTE +0xEB U+0117 #LATIN SMALL E DOT ABOVE +0xEC U+0123 #LATIN SMALL G CEDILLA +0xED U+0137 #LATIN SMALL K CEDILLA +0xEE U+012B #LATIN SMALL I MACRON +0xEF U+013C #LATIN SMALL L CEDILLA +0xF0 U+0161 #LATIN SMALL S HACEK +0xF1 U+0144 #LATIN SMALL N ACUTE +0xF2 U+0146 #LATIN SMALL N CEDILLA +0xF3 U+00F3 #LATIN SMALL O ACUTE +0xF4 U+014D #LATIN SMALL O MACRON +0xF5 U+00F5 #LATIN SMALL O TILDE +0xF6 U+00F6 #LATIN SMALL O DIAERESIS 0xF7 U+00F7 #DIVISION SIGN -0xF8 U+0173 #LATIN SMALL LETTER U WITH OGONEK -0xF9 U+0142 #LATIN SMALL LETTER L WITH STROKE -0xFA U+015B #LATIN SMALL LETTER S WITH ACUTE -0xFB U+016B #LATIN SMALL LETTER U WITH MACRON -0xFC U+00FC #LATIN SMALL LETTER U WITH DIAERESIS -0xFD U+017C #LATIN SMALL LETTER Z WITH DOT ABOVE -0xFE U+017E #LATIN SMALL LETTER Z WITH CARON +0xF8 U+0173 #LATIN SMALL U OGONEK +0xF9 U+0142 #LATIN SMALL L STROKE +0xFA U+015B #LATIN SMALL S ACUTE +0xFB U+016B #LATIN SMALL U MACRON +0xFC U+00FC #LATIN SMALL U DIAERESIS +0xFD U+017C #LATIN SMALL Z DOT ABOVE +0xFE U+017E #LATIN SMALL Z HACEK 0xFF U+02D9 #DOT ABOVE - diff --git a/src/chrtrans/cp437_uni.tbl b/src/chrtrans/cp437_uni.tbl index b84f503f..621e730e 100644 --- a/src/chrtrans/cp437_uni.tbl +++ b/src/chrtrans/cp437_uni.tbl @@ -6,21 +6,20 @@ D0 Mcp437 #Name as a Display Charset (used on Options screen) -OIBM PC US codepage (cp437) - +ODosLatinUS (cp437) # # Name: cp437_DOSLatinUS to Unicode table -# Unicode version: 2.0 -# Table version: 2.00 +# Unicode version: 1.1 +# Table version: 1.1 # Table format: Format A -# Date: 04/24/96 -# Authors: Lori Brownell <loribr@microsoft.com> -# K.D. Chang <a-kchang@microsoft.com> +# Date: 03/31/95 +# Authors: Michel Suignard <michelsu@microsoft.com> +# Lori Hoerth <lorih@microsoft.com> # General notes: none # # Format: Three tab-separated columns -# Column #1 is the cp437_DOSLatinUS code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #1 is the cp437_DosLatinUS code (in hex) +# Column #2 is the Unicode (in hex as U+XXXX) # Column #3 is the Unicode name (follows a comment sign, '#') # # The entries are in cp437_DosLatinUS order @@ -28,8 +27,6 @@ OIBM PC US codepage (cp437) # some mapppings of greek letters to latin letters added, # just for fun.. - KW # -####################################### - 0x20-0x7f idem # 0x80 U+00c7 #LATIN CAPITAL LETTER C WITH CEDILLA diff --git a/src/chrtrans/cp737_uni.tbl b/src/chrtrans/cp737_uni.tbl index 81442ccc..32f01c8c 100644 --- a/src/chrtrans/cp737_uni.tbl +++ b/src/chrtrans/cp737_uni.tbl @@ -2,7 +2,7 @@ Mcp737 #Name as a Display Charset (used on Options screen) -OGreek (cp737) +ODosGreek (cp737) # # Name: cp737_DOSGreek to Unicode table diff --git a/src/chrtrans/cp775_uni.tbl b/src/chrtrans/cp775_uni.tbl index 4d63208b..02a8b61b 100644 --- a/src/chrtrans/cp775_uni.tbl +++ b/src/chrtrans/cp775_uni.tbl @@ -2,7 +2,7 @@ Mcp775 #Name as a Display Charset (used on Options screen) -OBaltic Rim (cp775) +ODosBaltRim (cp775) # Name: cp775_DOSBaltRim to Unicode table # Unicode version: 2.0 @@ -20,10 +20,134 @@ OBaltic Rim (cp775) # # The entries are in cp775_DOSBaltRim order # -################## - -0x20-0x7e idem -# +#0x00 U+0000 #NULL +#0x01 U+0001 #START OF HEADING +#0x02 U+0002 #START OF TEXT +#0x03 U+0003 #END OF TEXT +#0x04 U+0004 #END OF TRANSMISSION +#0x05 U+0005 #ENQUIRY +#0x06 U+0006 #ACKNOWLEDGE +#0x07 U+0007 #BELL +#0x08 U+0008 #BACKSPACE +#0x09 U+0009 #HORIZONTAL TABULATION +#0x0a U+000a #LINE FEED +#0x0b U+000b #VERTICAL TABULATION +#0x0c U+000c #FORM FEED +#0x0d U+000d #CARRIAGE RETURN +#0x0e U+000e #SHIFT OUT +#0x0f U+000f #SHIFT IN +#0x10 U+0010 #DATA LINK ESCAPE +#0x11 U+0011 #DEVICE CONTROL ONE +#0x12 U+0012 #DEVICE CONTROL TWO +#0x13 U+0013 #DEVICE CONTROL THREE +#0x14 U+0014 #DEVICE CONTROL FOUR +#0x15 U+0015 #NEGATIVE ACKNOWLEDGE +#0x16 U+0016 #SYNCHRONOUS IDLE +#0x17 U+0017 #END OF TRANSMISSION BLOCK +#0x18 U+0018 #CANCEL +#0x19 U+0019 #END OF MEDIUM +#0x1a U+001a #SUBSTITUTE +#0x1b U+001b #ESCAPE +#0x1c U+001c #FILE SEPARATOR +#0x1d U+001d #GROUP SEPARATOR +#0x1e U+001e #RECORD SEPARATOR +#0x1f U+001f #UNIT SEPARATOR +0x20 U+0020 #SPACE +0x21 U+0021 #EXCLAMATION MARK +0x22 U+0022 #QUOTATION MARK +0x23 U+0023 #NUMBER SIGN +0x24 U+0024 #DOLLAR SIGN +0x25 U+0025 #PERCENT SIGN +0x26 U+0026 #AMPERSAND +0x27 U+0027 #APOSTROPHE +0x28 U+0028 #LEFT PARENTHESIS +0x29 U+0029 #RIGHT PARENTHESIS +0x2a U+002a #ASTERISK +0x2b U+002b #PLUS SIGN +0x2c U+002c #COMMA +0x2d U+002d #HYPHEN-MINUS +0x2e U+002e #FULL STOP +0x2f U+002f #SOLIDUS +0x30 U+0030 #DIGIT ZERO +0x31 U+0031 #DIGIT ONE +0x32 U+0032 #DIGIT TWO +0x33 U+0033 #DIGIT THREE +0x34 U+0034 #DIGIT FOUR +0x35 U+0035 #DIGIT FIVE +0x36 U+0036 #DIGIT SIX +0x37 U+0037 #DIGIT SEVEN +0x38 U+0038 #DIGIT EIGHT +0x39 U+0039 #DIGIT NINE +0x3a U+003a #COLON +0x3b U+003b #SEMICOLON +0x3c U+003c #LESS-THAN SIGN +0x3d U+003d #EQUALS SIGN +0x3e U+003e #GREATER-THAN SIGN +0x3f U+003f #QUESTION MARK +0x40 U+0040 #COMMERCIAL AT +0x41 U+0041 #LATIN CAPITAL LETTER A +0x42 U+0042 #LATIN CAPITAL LETTER B +0x43 U+0043 #LATIN CAPITAL LETTER C +0x44 U+0044 #LATIN CAPITAL LETTER D +0x45 U+0045 #LATIN CAPITAL LETTER E +0x46 U+0046 #LATIN CAPITAL LETTER F +0x47 U+0047 #LATIN CAPITAL LETTER G +0x48 U+0048 #LATIN CAPITAL LETTER H +0x49 U+0049 #LATIN CAPITAL LETTER I +0x4a U+004a #LATIN CAPITAL LETTER J +0x4b U+004b #LATIN CAPITAL LETTER K +0x4c U+004c #LATIN CAPITAL LETTER L +0x4d U+004d #LATIN CAPITAL LETTER M +0x4e U+004e #LATIN CAPITAL LETTER N +0x4f U+004f #LATIN CAPITAL LETTER O +0x50 U+0050 #LATIN CAPITAL LETTER P +0x51 U+0051 #LATIN CAPITAL LETTER Q +0x52 U+0052 #LATIN CAPITAL LETTER R +0x53 U+0053 #LATIN CAPITAL LETTER S +0x54 U+0054 #LATIN CAPITAL LETTER T +0x55 U+0055 #LATIN CAPITAL LETTER U +0x56 U+0056 #LATIN CAPITAL LETTER V +0x57 U+0057 #LATIN CAPITAL LETTER W +0x58 U+0058 #LATIN CAPITAL LETTER X +0x59 U+0059 #LATIN CAPITAL LETTER Y +0x5a U+005a #LATIN CAPITAL LETTER Z +0x5b U+005b #LEFT SQUARE BRACKET +0x5c U+005c #REVERSE SOLIDUS +0x5d U+005d #RIGHT SQUARE BRACKET +0x5e U+005e #CIRCUMFLEX ACCENT +0x5f U+005f #LOW LINE +0x60 U+0060 #GRAVE ACCENT +0x61 U+0061 #LATIN SMALL LETTER A +0x62 U+0062 #LATIN SMALL LETTER B +0x63 U+0063 #LATIN SMALL LETTER C +0x64 U+0064 #LATIN SMALL LETTER D +0x65 U+0065 #LATIN SMALL LETTER E +0x66 U+0066 #LATIN SMALL LETTER F +0x67 U+0067 #LATIN SMALL LETTER G +0x68 U+0068 #LATIN SMALL LETTER H +0x69 U+0069 #LATIN SMALL LETTER I +0x6a U+006a #LATIN SMALL LETTER J +0x6b U+006b #LATIN SMALL LETTER K +0x6c U+006c #LATIN SMALL LETTER L +0x6d U+006d #LATIN SMALL LETTER M +0x6e U+006e #LATIN SMALL LETTER N +0x6f U+006f #LATIN SMALL LETTER O +0x70 U+0070 #LATIN SMALL LETTER P +0x71 U+0071 #LATIN SMALL LETTER Q +0x72 U+0072 #LATIN SMALL LETTER R +0x73 U+0073 #LATIN SMALL LETTER S +0x74 U+0074 #LATIN SMALL LETTER T +0x75 U+0075 #LATIN SMALL LETTER U +0x76 U+0076 #LATIN SMALL LETTER V +0x77 U+0077 #LATIN SMALL LETTER W +0x78 U+0078 #LATIN SMALL LETTER X +0x79 U+0079 #LATIN SMALL LETTER Y +0x7a U+007a #LATIN SMALL LETTER Z +0x7b U+007b #LEFT CURLY BRACKET +0x7c U+007c #VERTICAL LINE +0x7d U+007d #RIGHT CURLY BRACKET +0x7e U+007e #TILDE +#0x7f U+007f #DELETE 0x80 U+0106 #LATIN CAPITAL LETTER C WITH ACUTE 0x81 U+00fc #LATIN SMALL LETTER U WITH DIAERESIS 0x82 U+00e9 #LATIN SMALL LETTER E WITH ACUTE diff --git a/src/chrtrans/cp850_uni.tbl b/src/chrtrans/cp850_uni.tbl index af4dbff5..05685971 100644 --- a/src/chrtrans/cp850_uni.tbl +++ b/src/chrtrans/cp850_uni.tbl @@ -8,29 +8,153 @@ D0 Mcp850 #Name as a Display Charset (used on Options screen) -OWestern (cp850) +ODosLatin1 (cp850) # # Name: cp850_DOSLatin1 to Unicode table -# Unicode version: 2.0 -# Table version: 2.00 +# Unicode version: 1.1 +# Table version: 1.1 # Table format: Format A -# Date: 04/24/96 -# Authors: Lori Brownell <loribr@microsoft.com> -# K.D. Chang <a-kchang@microsoft.com> +# Date: 03/31/95 +# Authors: Michel Suignard <michelsu@microsoft.com> +# Lori Hoerth <lorih@microsoft.com> # General notes: none # # Format: Three tab-separated columns # Column #1 is the cp850_DOSLatin1 code (in hex) -# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #2 is the Unicode (in hex as U+XXXX) # Column #3 is the Unicode name (follows a comment sign, '#') # # The entries are in cp850_DOSLatin1 order # -################## - -0x20-0x7e idem -# +#0x00 U+0000 #NULL +#0x01 U+0001 #START OF HEADING +#0x02 U+0002 #START OF TEXT +#0x03 U+0003 #END OF TEXT +#0x04 U+0004 #END OF TRANSMISSION +#0x05 U+0005 #ENQUIRY +#0x06 U+0006 #ACKNOWLEDGE +#0x07 U+0007 #BELL +#0x08 U+0008 #BACKSPACE +#0x09 U+0009 #HORIZONTAL TABULATION +#0x0a U+000a #LINE FEED +#0x0b U+000b #VERTICAL TABULATION +#0x0c U+000c #FORM FEED +#0x0d U+000d #CARRIAGE RETURN +#0x0e U+000e #SHIFT OUT +#0x0f U+000f #SHIFT IN +#0x10 U+0010 #DATA LINK ESCAPE +#0x11 U+0011 #DEVICE CONTROL ONE +#0x12 U+0012 #DEVICE CONTROL TWO +#0x13 U+0013 #DEVICE CONTROL THREE +#0x14 U+0014 #DEVICE CONTROL FOUR +#0x15 U+0015 #NEGATIVE ACKNOWLEDGE +#0x16 U+0016 #SYNCHRONOUS IDLE +#0x17 U+0017 #END OF TRANSMISSION BLOCK +#0x18 U+0018 #CANCEL +#0x19 U+0019 #END OF MEDIUM +#0x1a U+001a #SUBSTITUTE +#0x1b U+001b #ESCAPE +#0x1c U+001c #FILE SEPARATOR +#0x1d U+001d #GROUP SEPARATOR +#0x1e U+001e #RECORD SEPARATOR +#0x1f U+001f #UNIT SEPARATOR +0x20 U+0020 #SPACE +0x21 U+0021 #EXCLAMATION MARK +0x22 U+0022 #QUOTATION MARK +0x23 U+0023 #NUMBER SIGN +0x24 U+0024 #DOLLAR SIGN +0x25 U+0025 #PERCENT SIGN +0x26 U+0026 #AMPERSAND +0x27 U+0027 #APOSTROPHE +0x28 U+0028 #LEFT PARENTHESIS +0x29 U+0029 #RIGHT PARENTHESIS +0x2a U+002a #ASTERISK +0x2b U+002b #PLUS SIGN +0x2c U+002c #COMMA +0x2d U+002d #HYPHEN-MINUS +0x2e U+002e #FULL STOP +0x2f U+002f #SOLIDUS +0x30 U+0030 #DIGIT ZERO +0x31 U+0031 #DIGIT ONE +0x32 U+0032 #DIGIT TWO +0x33 U+0033 #DIGIT THREE +0x34 U+0034 #DIGIT FOUR +0x35 U+0035 #DIGIT FIVE +0x36 U+0036 #DIGIT SIX +0x37 U+0037 #DIGIT SEVEN +0x38 U+0038 #DIGIT EIGHT +0x39 U+0039 #DIGIT NINE +0x3a U+003a #COLON +0x3b U+003b #SEMICOLON +0x3c U+003c #LESS-THAN SIGN +0x3d U+003d #EQUALS SIGN +0x3e U+003e #GREATER-THAN SIGN +0x3f U+003f #QUESTION MARK +0x40 U+0040 #COMMERCIAL AT +0x41 U+0041 #LATIN CAPITAL LETTER A +0x42 U+0042 #LATIN CAPITAL LETTER B +0x43 U+0043 #LATIN CAPITAL LETTER C +0x44 U+0044 #LATIN CAPITAL LETTER D +0x45 U+0045 #LATIN CAPITAL LETTER E +0x46 U+0046 #LATIN CAPITAL LETTER F +0x47 U+0047 #LATIN CAPITAL LETTER G +0x48 U+0048 #LATIN CAPITAL LETTER H +0x49 U+0049 #LATIN CAPITAL LETTER I +0x4a U+004a #LATIN CAPITAL LETTER J +0x4b U+004b #LATIN CAPITAL LETTER K +0x4c U+004c #LATIN CAPITAL LETTER L +0x4d U+004d #LATIN CAPITAL LETTER M +0x4e U+004e #LATIN CAPITAL LETTER N +0x4f U+004f #LATIN CAPITAL LETTER O +0x50 U+0050 #LATIN CAPITAL LETTER P +0x51 U+0051 #LATIN CAPITAL LETTER Q +0x52 U+0052 #LATIN CAPITAL LETTER R +0x53 U+0053 #LATIN CAPITAL LETTER S +0x54 U+0054 #LATIN CAPITAL LETTER T +0x55 U+0055 #LATIN CAPITAL LETTER U +0x56 U+0056 #LATIN CAPITAL LETTER V +0x57 U+0057 #LATIN CAPITAL LETTER W +0x58 U+0058 #LATIN CAPITAL LETTER X +0x59 U+0059 #LATIN CAPITAL LETTER Y +0x5a U+005a #LATIN CAPITAL LETTER Z +0x5b U+005b #LEFT SQUARE BRACKET +0x5c U+005c #REVERSE SOLIDUS +0x5d U+005d #RIGHT SQUARE BRACKET +0x5e U+005e #CIRCUMFLEX ACCENT +0x5f U+005f #LOW LINE +0x60 U+0060 #GRAVE ACCENT +0x61 U+0061 #LATIN SMALL LETTER A +0x62 U+0062 #LATIN SMALL LETTER B +0x63 U+0063 #LATIN SMALL LETTER C +0x64 U+0064 #LATIN SMALL LETTER D +0x65 U+0065 #LATIN SMALL LETTER E +0x66 U+0066 #LATIN SMALL LETTER F +0x67 U+0067 #LATIN SMALL LETTER G +0x68 U+0068 #LATIN SMALL LETTER H +0x69 U+0069 #LATIN SMALL LETTER I +0x6a U+006a #LATIN SMALL LETTER J +0x6b U+006b #LATIN SMALL LETTER K +0x6c U+006c #LATIN SMALL LETTER L +0x6d U+006d #LATIN SMALL LETTER M +0x6e U+006e #LATIN SMALL LETTER N +0x6f U+006f #LATIN SMALL LETTER O +0x70 U+0070 #LATIN SMALL LETTER P +0x71 U+0071 #LATIN SMALL LETTER Q +0x72 U+0072 #LATIN SMALL LETTER R +0x73 U+0073 #LATIN SMALL LETTER S +0x74 U+0074 #LATIN SMALL LETTER T +0x75 U+0075 #LATIN SMALL LETTER U +0x76 U+0076 #LATIN SMALL LETTER V +0x77 U+0077 #LATIN SMALL LETTER W +0x78 U+0078 #LATIN SMALL LETTER X +0x79 U+0079 #LATIN SMALL LETTER Y +0x7a U+007a #LATIN SMALL LETTER Z +0x7b U+007b #LEFT CURLY BRACKET +0x7c U+007c #VERTICAL LINE +0x7d U+007d #RIGHT CURLY BRACKET +0x7e U+007e #TILDE +0x7f U+007f #DELETE 0x80 U+00c7 #LATIN CAPITAL LETTER C WITH CEDILLA 0x81 U+00fc #LATIN SMALL LETTER U WITH DIAERESIS 0x82 U+00e9 #LATIN SMALL LETTER E WITH ACUTE diff --git a/src/chrtrans/cp852_uni.tbl b/src/chrtrans/cp852_uni.tbl index 946b4996..0658d893 100644 --- a/src/chrtrans/cp852_uni.tbl +++ b/src/chrtrans/cp852_uni.tbl @@ -2,7 +2,7 @@ Mcp852 #Name as a Display Charset (used on Options screen) -OEastern European (cp852) +ODosLatin2 (cp852) # # Name: cp852_DOSLatin2 to Unicode table @@ -21,136 +21,260 @@ OEastern European (cp852) # # The entries are in cp852_DOSLatin2 order # -################## - -0x20-0x7e idem -# -0x80 U+00c7 #LATIN CAPITAL LETTER C WITH CEDILLA -0x81 U+00fc #LATIN SMALL LETTER U WITH DIAERESIS -0x82 U+00e9 #LATIN SMALL LETTER E WITH ACUTE -0x83 U+00e2 #LATIN SMALL LETTER A WITH CIRCUMFLEX -0x84 U+00e4 #LATIN SMALL LETTER A WITH DIAERESIS -0x85 U+016f #LATIN SMALL LETTER U WITH RING ABOVE -0x86 U+0107 #LATIN SMALL LETTER C WITH ACUTE -0x87 U+00e7 #LATIN SMALL LETTER C WITH CEDILLA -0x88 U+0142 #LATIN SMALL LETTER L WITH STROKE -0x89 U+00eb #LATIN SMALL LETTER E WITH DIAERESIS -0x8a U+0150 #LATIN CAPITAL LETTER O WITH DOUBLE ACUTE -0x8b U+0151 #LATIN SMALL LETTER O WITH DOUBLE ACUTE -0x8c U+00ee #LATIN SMALL LETTER I WITH CIRCUMFLEX -0x8d U+0179 #LATIN CAPITAL LETTER Z WITH ACUTE -0x8e U+00c4 #LATIN CAPITAL LETTER A WITH DIAERESIS -0x8f U+0106 #LATIN CAPITAL LETTER C WITH ACUTE -0x90 U+00c9 #LATIN CAPITAL LETTER E WITH ACUTE -0x91 U+0139 #LATIN CAPITAL LETTER L WITH ACUTE -0x92 U+013a #LATIN SMALL LETTER L WITH ACUTE -0x93 U+00f4 #LATIN SMALL LETTER O WITH CIRCUMFLEX -0x94 U+00f6 #LATIN SMALL LETTER O WITH DIAERESIS -0x95 U+013d #LATIN CAPITAL LETTER L WITH CARON -0x96 U+013e #LATIN SMALL LETTER L WITH CARON -0x97 U+015a #LATIN CAPITAL LETTER S WITH ACUTE -0x98 U+015b #LATIN SMALL LETTER S WITH ACUTE -0x99 U+00d6 #LATIN CAPITAL LETTER O WITH DIAERESIS -0x9a U+00dc #LATIN CAPITAL LETTER U WITH DIAERESIS -0x9b U+0164 #LATIN CAPITAL LETTER T WITH CARON -0x9c U+0165 #LATIN SMALL LETTER T WITH CARON -0x9d U+0141 #LATIN CAPITAL LETTER L WITH STROKE -0x9e U+00d7 #MULTIPLICATION SIGN -0x9f U+010d #LATIN SMALL LETTER C WITH CARON -0xa0 U+00e1 #LATIN SMALL LETTER A WITH ACUTE -0xa1 U+00ed #LATIN SMALL LETTER I WITH ACUTE -0xa2 U+00f3 #LATIN SMALL LETTER O WITH ACUTE -0xa3 U+00fa #LATIN SMALL LETTER U WITH ACUTE -0xa4 U+0104 #LATIN CAPITAL LETTER A WITH OGONEK -0xa5 U+0105 #LATIN SMALL LETTER A WITH OGONEK -0xa6 U+017d #LATIN CAPITAL LETTER Z WITH CARON -0xa7 U+017e #LATIN SMALL LETTER Z WITH CARON -0xa8 U+0118 #LATIN CAPITAL LETTER E WITH OGONEK -0xa9 U+0119 #LATIN SMALL LETTER E WITH OGONEK -0xaa U+00ac #NOT SIGN -0xab U+017a #LATIN SMALL LETTER Z WITH ACUTE -0xac U+010c #LATIN CAPITAL LETTER C WITH CARON -0xad U+015f #LATIN SMALL LETTER S WITH CEDILLA -0xae U+00ab #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK -0xaf U+00bb #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -0xb0 U+2591 #LIGHT SHADE -0xb1 U+2592 #MEDIUM SHADE -0xb2 U+2593 #DARK SHADE -0xb3 U+2502 #BOX DRAWINGS LIGHT VERTICAL -0xb4 U+2524 #BOX DRAWINGS LIGHT VERTICAL AND LEFT -0xb5 U+00c1 #LATIN CAPITAL LETTER A WITH ACUTE -0xb6 U+00c2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX -0xb7 U+011a #LATIN CAPITAL LETTER E WITH CARON -0xb8 U+015e #LATIN CAPITAL LETTER S WITH CEDILLA -0xb9 U+2563 #BOX DRAWINGS DOUBLE VERTICAL AND LEFT -0xba U+2551 #BOX DRAWINGS DOUBLE VERTICAL -0xbb U+2557 #BOX DRAWINGS DOUBLE DOWN AND LEFT -0xbc U+255d #BOX DRAWINGS DOUBLE UP AND LEFT -0xbd U+017b #LATIN CAPITAL LETTER Z WITH DOT ABOVE -0xbe U+017c #LATIN SMALL LETTER Z WITH DOT ABOVE -0xbf U+2510 #BOX DRAWINGS LIGHT DOWN AND LEFT -0xc0 U+2514 #BOX DRAWINGS LIGHT UP AND RIGHT -0xc1 U+2534 #BOX DRAWINGS LIGHT UP AND HORIZONTAL -0xc2 U+252c #BOX DRAWINGS LIGHT DOWN AND HORIZONTAL -0xc3 U+251c #BOX DRAWINGS LIGHT VERTICAL AND RIGHT -0xc4 U+2500 #BOX DRAWINGS LIGHT HORIZONTAL -0xc5 U+253c #BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL -0xc6 U+0102 #LATIN CAPITAL LETTER A WITH BREVE -0xc7 U+0103 #LATIN SMALL LETTER A WITH BREVE -0xc8 U+255a #BOX DRAWINGS DOUBLE UP AND RIGHT -0xc9 U+2554 #BOX DRAWINGS DOUBLE DOWN AND RIGHT -0xca U+2569 #BOX DRAWINGS DOUBLE UP AND HORIZONTAL -0xcb U+2566 #BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL -0xcc U+2560 #BOX DRAWINGS DOUBLE VERTICAL AND RIGHT -0xcd U+2550 #BOX DRAWINGS DOUBLE HORIZONTAL -0xce U+256c #BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL -0xcf U+00a4 #CURRENCY SIGN -0xd0 U+0111 #LATIN SMALL LETTER D WITH STROKE -0xd1 U+0110 #LATIN CAPITAL LETTER D WITH STROKE -0xd2 U+010e #LATIN CAPITAL LETTER D WITH CARON -0xd3 U+00cb #LATIN CAPITAL LETTER E WITH DIAERESIS -0xd4 U+010f #LATIN SMALL LETTER D WITH CARON -0xd5 U+0147 #LATIN CAPITAL LETTER N WITH CARON -0xd6 U+00cd #LATIN CAPITAL LETTER I WITH ACUTE -0xd7 U+00ce #LATIN CAPITAL LETTER I WITH CIRCUMFLEX -0xd8 U+011b #LATIN SMALL LETTER E WITH CARON -0xd9 U+2518 #BOX DRAWINGS LIGHT UP AND LEFT -0xda U+250c #BOX DRAWINGS LIGHT DOWN AND RIGHT -0xdb U+2588 #FULL BLOCK -0xdc U+2584 #LOWER HALF BLOCK -0xdd U+0162 #LATIN CAPITAL LETTER T WITH CEDILLA -0xde U+016e #LATIN CAPITAL LETTER U WITH RING ABOVE -0xdf U+2580 #UPPER HALF BLOCK -0xe0 U+00d3 #LATIN CAPITAL LETTER O WITH ACUTE -0xe1 U+00df #LATIN SMALL LETTER SHARP S -0xe2 U+00d4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX -0xe3 U+0143 #LATIN CAPITAL LETTER N WITH ACUTE -0xe4 U+0144 #LATIN SMALL LETTER N WITH ACUTE -0xe5 U+0148 #LATIN SMALL LETTER N WITH CARON -0xe6 U+0160 #LATIN CAPITAL LETTER S WITH CARON -0xe7 U+0161 #LATIN SMALL LETTER S WITH CARON -0xe8 U+0154 #LATIN CAPITAL LETTER R WITH ACUTE -0xe9 U+00da #LATIN CAPITAL LETTER U WITH ACUTE -0xea U+0155 #LATIN SMALL LETTER R WITH ACUTE -0xeb U+0170 #LATIN CAPITAL LETTER U WITH DOUBLE ACUTE -0xec U+00fd #LATIN SMALL LETTER Y WITH ACUTE -0xed U+00dd #LATIN CAPITAL LETTER Y WITH ACUTE -0xee U+0163 #LATIN SMALL LETTER T WITH CEDILLA -0xef U+00b4 #ACUTE ACCENT -0xf0 U+00ad #SOFT HYPHEN -0xf1 U+02dd #DOUBLE ACUTE ACCENT -0xf2 U+02db #OGONEK -0xf3 U+02c7 #CARON -0xf4 U+02d8 #BREVE -0xf5 U+00a7 #SECTION SIGN -0xf6 U+00f7 #DIVISION SIGN -0xf7 U+00b8 #CEDILLA -0xf8 U+00b0 #DEGREE SIGN -0xf9 U+00a8 #DIAERESIS -0xfa U+02d9 #DOT ABOVE -0xfb U+0171 #LATIN SMALL LETTER U WITH DOUBLE ACUTE -0xfc U+0158 #LATIN CAPITAL LETTER R WITH CARON -0xfd U+0159 #LATIN SMALL LETTER R WITH CARON -0xfe U+25a0 #BLACK SQUARE -0xff U+00a0 #NO-BREAK SPACE +#0x00 U+0000 #NULL +#0x01 U+0001 #START OF HEADING +#0x02 U+0002 #START OF TEXT +#0x03 U+0003 #END OF TEXT +#0x04 U+0004 #END OF TRANSMISSION +#0x05 U+0005 #ENQUIRY +#0x06 U+0006 #ACKNOWLEDGE +#0x07 U+0007 #BELL +#0x08 U+0008 #BACKSPACE +#0x09 U+0009 #HORIZONTAL TABULATION +#0x0a U+000a #LINE FEED +#0x0b U+000b #VERTICAL TABULATION +#0x0c U+000c #FORM FEED +#0x0d U+000d #CARRIAGE RETURN +#0x0e U+000e #SHIFT OUT +#0x0f U+000f #SHIFT IN +#0x10 U+0010 #DATA LINK ESCAPE +#0x11 U+0011 #DEVICE CONTROL ONE +#0x12 U+0012 #DEVICE CONTROL TWO +#0x13 U+0013 #DEVICE CONTROL THREE +#0x14 U+0014 #DEVICE CONTROL FOUR +#0x15 U+0015 #NEGATIVE ACKNOWLEDGE +#0x16 U+0016 #SYNCHRONOUS IDLE +#0x17 U+0017 #END OF TRANSMISSION BLOCK +#0x18 U+0018 #CANCEL +#0x19 U+0019 #END OF MEDIUM +#0x1a U+001a #SUBSTITUTE +#0x1b U+001b #ESCAPE +#0x1c U+001c #FILE SEPARATOR +#0x1d U+001d #GROUP SEPARATOR +#0x1e U+001e #RECORD SEPARATOR +#0x1f U+001f #UNIT SEPARATOR +0x20 U+0020 #SPACE +0x21 U+0021 #EXCLAMATION MARK +0x22 U+0022 #QUOTATION MARK +0x23 U+0023 #NUMBER SIGN +0x24 U+0024 #DOLLAR SIGN +0x25 U+0025 #PERCENT SIGN +0x26 U+0026 #AMPERSAND +0x27 U+0027 #APOSTROPHE +0x28 U+0028 #LEFT PARENTHESIS +0x29 U+0029 #RIGHT PARENTHESIS +0x2a U+002a #ASTERISK +0x2b U+002b #PLUS SIGN +0x2c U+002c #COMMA +0x2d U+002d #HYPHEN-MINUS +0x2e U+002e #FULL STOP +0x2f U+002f #SOLIDUS +0x30 U+0030 #DIGIT ZERO +0x31 U+0031 #DIGIT ONE +0x32 U+0032 #DIGIT TWO +0x33 U+0033 #DIGIT THREE +0x34 U+0034 #DIGIT FOUR +0x35 U+0035 #DIGIT FIVE +0x36 U+0036 #DIGIT SIX +0x37 U+0037 #DIGIT SEVEN +0x38 U+0038 #DIGIT EIGHT +0x39 U+0039 #DIGIT NINE +0x3a U+003a #COLON +0x3b U+003b #SEMICOLON +0x3c U+003c #LESS-THAN SIGN +0x3d U+003d #EQUALS SIGN +0x3e U+003e #GREATER-THAN SIGN +0x3f U+003f #QUESTION MARK +0x40 U+0040 #COMMERCIAL AT +0x41 U+0041 #LATIN CAPITAL LETTER A +0x42 U+0042 #LATIN CAPITAL LETTER B +0x43 U+0043 #LATIN CAPITAL LETTER C +0x44 U+0044 #LATIN CAPITAL LETTER D +0x45 U+0045 #LATIN CAPITAL LETTER E +0x46 U+0046 #LATIN CAPITAL LETTER F +0x47 U+0047 #LATIN CAPITAL LETTER G +0x48 U+0048 #LATIN CAPITAL LETTER H +0x49 U+0049 #LATIN CAPITAL LETTER I +0x4a U+004a #LATIN CAPITAL LETTER J +0x4b U+004b #LATIN CAPITAL LETTER K +0x4c U+004c #LATIN CAPITAL LETTER L +0x4d U+004d #LATIN CAPITAL LETTER M +0x4e U+004e #LATIN CAPITAL LETTER N +0x4f U+004f #LATIN CAPITAL LETTER O +0x50 U+0050 #LATIN CAPITAL LETTER P +0x51 U+0051 #LATIN CAPITAL LETTER Q +0x52 U+0052 #LATIN CAPITAL LETTER R +0x53 U+0053 #LATIN CAPITAL LETTER S +0x54 U+0054 #LATIN CAPITAL LETTER T +0x55 U+0055 #LATIN CAPITAL LETTER U +0x56 U+0056 #LATIN CAPITAL LETTER V +0x57 U+0057 #LATIN CAPITAL LETTER W +0x58 U+0058 #LATIN CAPITAL LETTER X +0x59 U+0059 #LATIN CAPITAL LETTER Y +0x5a U+005a #LATIN CAPITAL LETTER Z +0x5b U+005b #LEFT SQUARE BRACKET +0x5c U+005c #REVERSE SOLIDUS +0x5d U+005d #RIGHT SQUARE BRACKET +0x5e U+005e #CIRCUMFLEX ACCENT +0x5f U+005f #LOW LINE +0x60 U+0060 #GRAVE ACCENT +0x61 U+0061 #LATIN SMALL LETTER A +0x62 U+0062 #LATIN SMALL LETTER B +0x63 U+0063 #LATIN SMALL LETTER C +0x64 U+0064 #LATIN SMALL LETTER D +0x65 U+0065 #LATIN SMALL LETTER E +0x66 U+0066 #LATIN SMALL LETTER F +0x67 U+0067 #LATIN SMALL LETTER G +0x68 U+0068 #LATIN SMALL LETTER H +0x69 U+0069 #LATIN SMALL LETTER I +0x6a U+006a #LATIN SMALL LETTER J +0x6b U+006b #LATIN SMALL LETTER K +0x6c U+006c #LATIN SMALL LETTER L +0x6d U+006d #LATIN SMALL LETTER M +0x6e U+006e #LATIN SMALL LETTER N +0x6f U+006f #LATIN SMALL LETTER O +0x70 U+0070 #LATIN SMALL LETTER P +0x71 U+0071 #LATIN SMALL LETTER Q +0x72 U+0072 #LATIN SMALL LETTER R +0x73 U+0073 #LATIN SMALL LETTER S +0x74 U+0074 #LATIN SMALL LETTER T +0x75 U+0075 #LATIN SMALL LETTER U +0x76 U+0076 #LATIN SMALL LETTER V +0x77 U+0077 #LATIN SMALL LETTER W +0x78 U+0078 #LATIN SMALL LETTER X +0x79 U+0079 #LATIN SMALL LETTER Y +0x7a U+007a #LATIN SMALL LETTER Z +0x7b U+007b #LEFT CURLY BRACKET +0x7c U+007c #VERTICAL LINE +0x7d U+007d #RIGHT CURLY BRACKET +0x7e U+007e #TILDE +#0x7f U+007f #DELETE +0x80 U+00c7 #LATIN CAPITAL LETTER C WITH CEDILLA +0x81 U+00fc #LATIN SMALL LETTER U WITH DIAERESIS +0x82 U+00e9 #LATIN SMALL LETTER E WITH ACUTE +0x83 U+00e2 #LATIN SMALL LETTER A WITH CIRCUMFLEX +0x84 U+00e4 #LATIN SMALL LETTER A WITH DIAERESIS +0x85 U+016f #LATIN SMALL LETTER U WITH RING ABOVE +0x86 U+0107 #LATIN SMALL LETTER C WITH ACUTE +0x87 U+00e7 #LATIN SMALL LETTER C WITH CEDILLA +0x88 U+0142 #LATIN SMALL LETTER L WITH STROKE +0x89 U+00eb #LATIN SMALL LETTER E WITH DIAERESIS +0x8a U+0150 #LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0x8b U+0151 #LATIN SMALL LETTER O WITH DOUBLE ACUTE +0x8c U+00ee #LATIN SMALL LETTER I WITH CIRCUMFLEX +0x8d U+0179 #LATIN CAPITAL LETTER Z WITH ACUTE +0x8e U+00c4 #LATIN CAPITAL LETTER A WITH DIAERESIS +0x8f U+0106 #LATIN CAPITAL LETTER C WITH ACUTE +0x90 U+00c9 #LATIN CAPITAL LETTER E WITH ACUTE +0x91 U+0139 #LATIN CAPITAL LETTER L WITH ACUTE +0x92 U+013a #LATIN SMALL LETTER L WITH ACUTE +0x93 U+00f4 #LATIN SMALL LETTER O WITH CIRCUMFLEX +0x94 U+00f6 #LATIN SMALL LETTER O WITH DIAERESIS +0x95 U+013d #LATIN CAPITAL LETTER L WITH CARON +0x96 U+013e #LATIN SMALL LETTER L WITH CARON +0x97 U+015a #LATIN CAPITAL LETTER S WITH ACUTE +0x98 U+015b #LATIN SMALL LETTER S WITH ACUTE +0x99 U+00d6 #LATIN CAPITAL LETTER O WITH DIAERESIS +0x9a U+00dc #LATIN CAPITAL LETTER U WITH DIAERESIS +0x9b U+0164 #LATIN CAPITAL LETTER T WITH CARON +0x9c U+0165 #LATIN SMALL LETTER T WITH CARON +0x9d U+0141 #LATIN CAPITAL LETTER L WITH STROKE +0x9e U+00d7 #MULTIPLICATION SIGN +0x9f U+010d #LATIN SMALL LETTER C WITH CARON +0xa0 U+00e1 #LATIN SMALL LETTER A WITH ACUTE +0xa1 U+00ed #LATIN SMALL LETTER I WITH ACUTE +0xa2 U+00f3 #LATIN SMALL LETTER O WITH ACUTE +0xa3 U+00fa #LATIN SMALL LETTER U WITH ACUTE +0xa4 U+0104 #LATIN CAPITAL LETTER A WITH OGONEK +0xa5 U+0105 #LATIN SMALL LETTER A WITH OGONEK +0xa6 U+017d #LATIN CAPITAL LETTER Z WITH CARON +0xa7 U+017e #LATIN SMALL LETTER Z WITH CARON +0xa8 U+0118 #LATIN CAPITAL LETTER E WITH OGONEK +0xa9 U+0119 #LATIN SMALL LETTER E WITH OGONEK +0xaa U+00ac #NOT SIGN +0xab U+017a #LATIN SMALL LETTER Z WITH ACUTE +0xac U+010c #LATIN CAPITAL LETTER C WITH CARON +0xad U+015f #LATIN SMALL LETTER S WITH CEDILLA +0xae U+00ab #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xaf U+00bb #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xb0 U+2591 #LIGHT SHADE +0xb1 U+2592 #MEDIUM SHADE +0xb2 U+2593 #DARK SHADE +0xb3 U+2502 #BOX DRAWINGS LIGHT VERTICAL +0xb4 U+2524 #BOX DRAWINGS LIGHT VERTICAL AND LEFT +0xb5 U+00c1 #LATIN CAPITAL LETTER A WITH ACUTE +0xb6 U+00c2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xb7 U+011a #LATIN CAPITAL LETTER E WITH CARON +0xb8 U+015e #LATIN CAPITAL LETTER S WITH CEDILLA +0xb9 U+2563 #BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0xba U+2551 #BOX DRAWINGS DOUBLE VERTICAL +0xbb U+2557 #BOX DRAWINGS DOUBLE DOWN AND LEFT +0xbc U+255d #BOX DRAWINGS DOUBLE UP AND LEFT +0xbd U+017b #LATIN CAPITAL LETTER Z WITH DOT ABOVE +0xbe U+017c #LATIN SMALL LETTER Z WITH DOT ABOVE +0xbf U+2510 #BOX DRAWINGS LIGHT DOWN AND LEFT +0xc0 U+2514 #BOX DRAWINGS LIGHT UP AND RIGHT +0xc1 U+2534 #BOX DRAWINGS LIGHT UP AND HORIZONTAL +0xc2 U+252c #BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0xc3 U+251c #BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0xc4 U+2500 #BOX DRAWINGS LIGHT HORIZONTAL +0xc5 U+253c #BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0xc6 U+0102 #LATIN CAPITAL LETTER A WITH BREVE +0xc7 U+0103 #LATIN SMALL LETTER A WITH BREVE +0xc8 U+255a #BOX DRAWINGS DOUBLE UP AND RIGHT +0xc9 U+2554 #BOX DRAWINGS DOUBLE DOWN AND RIGHT +0xca U+2569 #BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xcb U+2566 #BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xcc U+2560 #BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xcd U+2550 #BOX DRAWINGS DOUBLE HORIZONTAL +0xce U+256c #BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xcf U+00a4 #CURRENCY SIGN +0xd0 U+0111 #LATIN SMALL LETTER D WITH STROKE +0xd1 U+0110 #LATIN CAPITAL LETTER D WITH STROKE +0xd2 U+010e #LATIN CAPITAL LETTER D WITH CARON +0xd3 U+00cb #LATIN CAPITAL LETTER E WITH DIAERESIS +0xd4 U+010f #LATIN SMALL LETTER D WITH CARON +0xd5 U+0147 #LATIN CAPITAL LETTER N WITH CARON +0xd6 U+00cd #LATIN CAPITAL LETTER I WITH ACUTE +0xd7 U+00ce #LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xd8 U+011b #LATIN SMALL LETTER E WITH CARON +0xd9 U+2518 #BOX DRAWINGS LIGHT UP AND LEFT +0xda U+250c #BOX DRAWINGS LIGHT DOWN AND RIGHT +0xdb U+2588 #FULL BLOCK +0xdc U+2584 #LOWER HALF BLOCK +0xdd U+0162 #LATIN CAPITAL LETTER T WITH CEDILLA +0xde U+016e #LATIN CAPITAL LETTER U WITH RING ABOVE +0xdf U+2580 #UPPER HALF BLOCK +0xe0 U+00d3 #LATIN CAPITAL LETTER O WITH ACUTE +0xe1 U+00df #LATIN SMALL LETTER SHARP S +0xe2 U+00d4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xe3 U+0143 #LATIN CAPITAL LETTER N WITH ACUTE +0xe4 U+0144 #LATIN SMALL LETTER N WITH ACUTE +0xe5 U+0148 #LATIN SMALL LETTER N WITH CARON +0xe6 U+0160 #LATIN CAPITAL LETTER S WITH CARON +0xe7 U+0161 #LATIN SMALL LETTER S WITH CARON +0xe8 U+0154 #LATIN CAPITAL LETTER R WITH ACUTE +0xe9 U+00da #LATIN CAPITAL LETTER U WITH ACUTE +0xea U+0155 #LATIN SMALL LETTER R WITH ACUTE +0xeb U+0170 #LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0xec U+00fd #LATIN SMALL LETTER Y WITH ACUTE +0xed U+00dd #LATIN CAPITAL LETTER Y WITH ACUTE +0xee U+0163 #LATIN SMALL LETTER T WITH CEDILLA +0xef U+00b4 #ACUTE ACCENT +0xf0 U+00ad #SOFT HYPHEN +0xf1 U+02dd #DOUBLE ACUTE ACCENT +0xf2 U+02db #OGONEK +0xf3 U+02c7 #CARON +0xf4 U+02d8 #BREVE +0xf5 U+00a7 #SECTION SIGN +0xf6 U+00f7 #DIVISION SIGN +0xf7 U+00b8 #CEDILLA +0xf8 U+00b0 #DEGREE SIGN +0xf9 U+00a8 #DIAERESIS +0xfa U+02d9 #DOT ABOVE +0xfb U+0171 #LATIN SMALL LETTER U WITH DOUBLE ACUTE +0xfc U+0158 #LATIN CAPITAL LETTER R WITH CARON +0xfd U+0159 #LATIN SMALL LETTER R WITH CARON +0xfe U+25a0 #BLACK SQUARE +0xff U+00a0 #NO-BREAK SPACE diff --git a/src/chrtrans/cp862_uni.tbl b/src/chrtrans/cp862_uni.tbl index 60d9692e..3d21c138 100644 --- a/src/chrtrans/cp862_uni.tbl +++ b/src/chrtrans/cp862_uni.tbl @@ -2,7 +2,7 @@ Mcp862 #Name as a Display Charset (used on Options screen). -OHebrew (cp862) +ODosHebrew (cp862) # Name: cp862_DOSHebrew to Unicode table # Unicode version: 2.0 diff --git a/src/chrtrans/cp864_uni.tbl b/src/chrtrans/cp864_uni.tbl index 8411f8b7..d6e92431 100644 --- a/src/chrtrans/cp864_uni.tbl +++ b/src/chrtrans/cp864_uni.tbl @@ -2,7 +2,7 @@ Mcp864 #Name as a Display Charset (used on Options screen). -OArabic (cp864) +ODosArabic (cp864) # Name: cp864_DOSArabic to Unicode table # Unicode version: 2.0 diff --git a/src/chrtrans/cp866_uni.tbl b/src/chrtrans/cp866_uni.tbl index 55ce0718..2d02ce1b 100644 --- a/src/chrtrans/cp866_uni.tbl +++ b/src/chrtrans/cp866_uni.tbl @@ -3,7 +3,7 @@ Mcp866 #Name as a Display Charset (used on Options screen) -OCyrillic (cp866) +ODosCyrillic (cp866) # # Name: cp866_DOSCyrillicRussian to Unicode table # Unicode version: 2.0 diff --git a/src/chrtrans/cp869_uni.tbl b/src/chrtrans/cp869_uni.tbl index 1f418728..21cdeb95 100644 --- a/src/chrtrans/cp869_uni.tbl +++ b/src/chrtrans/cp869_uni.tbl @@ -2,7 +2,7 @@ Mcp869 #Name as a Display Charset (used on Options screen) -OGreek2 (cp869) +ODosGreek2 (cp869) # Name: cp869_DOSGreek2 to Unicode table # Unicode version: 2.0 diff --git a/src/chrtrans/def7_uni.tbl b/src/chrtrans/def7_uni.tbl index bd6b1b1c..66a63f76 100644 --- a/src/chrtrans/def7_uni.tbl +++ b/src/chrtrans/def7_uni.tbl @@ -1350,13 +1350,13 @@ U+208c:_= U+208d:( U+208e:) # Old euro currency sign glyph: -#U+20A0:CE +U+20A0:CE U+20a3:Ff U+20a4:Li U+20a7:Pt U+20a9:W= -# New euro currency sign glyph: -U+20AC:EUR +# New euro currency sign glyph ? +# U+20AC:EUR U+2103:oC U+2105:c/o U+2109:oF @@ -1754,7 +1754,6 @@ U+266e:Mx U+266f:# 0x58 U+2713 U+2717 # check marks -> x U+2720:-X -# CJK area: 0x20 U+3000 # ideographic space U+3001:,_ U+3002:._ @@ -2015,17 +2014,6 @@ U+3229:10c U+327f:KSC U+33c2:am U+33d8:pm -# -# -#There are four special ranges of characters that are represented only by -#their start and end characters <...> -# -# The CJK Ideographs Area (U+4E00 - U+9FFF) -# The Hangul Syllables Area (U+AC00 - U+D7A3) -# The Surrogates Area (U+D800 - U+DFFF) -# The Private Use Area (U+E000 - U+F8FF) -# -# U+fb00:ff U+fb01:fi U+fb02:fl @@ -2184,12 +2172,12 @@ U+001d:GS U+001e:RS U+001f:US U+007f:DT -# Most of these characters (80-9F) may be inflicted on us +U+0080:PA +U+0081:HO +# Most of these characters (82-9F) may be inflicted on us # by MS FrontPages which uses Unicode notation such as ™ # but there are no assigned letters in Unicode 128-159 range. # It is assumed in the code that those codepoints are from windows-1252. -#U+0080:PA -#U+0081:HO #U+0082:BH #U+0083:NH #U+0084:IN @@ -2201,10 +2189,10 @@ U+007f:DT #U+008a:VS #U+008b:PD #U+008c:PU -#U+008d:RI -#U+008e:SS2 -#U+008f:SS3 -#U+0090:DCS +U+008d:RI +U+008e:SS2 +U+008f:SS3 +U+0090:DCS #U+0091:P1 #U+0092:P2 #U+0093:TS @@ -2214,13 +2202,15 @@ U+007f:DT #U+0097:EG #U+0098:SS #U+0099:GC -#U+009a:SC +U+009a:SC #U+009b:CSI #U+009c:ST #U+009d:OC #U+009e:PM #U+009f:AC +# Characters in Private Use Area (e000-f8ff) do not have ussigned numbers. + # Let's try to show a question mark for character that cannot # be shown. U+fffd is used for invalid characters. # It works, but let's stick with UHHH representatiion. - FM diff --git a/src/chrtrans/iso01_uni.tbl b/src/chrtrans/iso01_uni.tbl index 5b697e02..f792164d 100644 --- a/src/chrtrans/iso01_uni.tbl +++ b/src/chrtrans/iso01_uni.tbl @@ -8,7 +8,7 @@ D0 Miso-8859-1 #Name as a Display Charset (used on Options screen) -OWestern (ISO-8859-1) +OISO Latin 1 # # Name: ISO 8859-1 (1987) to Unicode diff --git a/src/chrtrans/iso02_uni.tbl b/src/chrtrans/iso02_uni.tbl index 7fa0df92..af97bc55 100644 --- a/src/chrtrans/iso02_uni.tbl +++ b/src/chrtrans/iso02_uni.tbl @@ -2,7 +2,7 @@ Miso-8859-2 #Name as a Display Charset (used on Options screen) -OEastern European (ISO-8859-2) +OISO Latin 2 # # Name: ISO 8859-2 (1987) to Unicode diff --git a/src/chrtrans/iso03_uni.tbl b/src/chrtrans/iso03_uni.tbl index a3c1f07a..bb8cd90f 100644 --- a/src/chrtrans/iso03_uni.tbl +++ b/src/chrtrans/iso03_uni.tbl @@ -2,7 +2,7 @@ Miso-8859-3 #Name as a Display Charset (used on Options screen) -OLatin 3 (ISO-8859-3) +OISO Latin 3 # # Name: ISO 8859-3 (1988) to Unicode diff --git a/src/chrtrans/iso04_uni.tbl b/src/chrtrans/iso04_uni.tbl index 29be0be5..3f54afda 100644 --- a/src/chrtrans/iso04_uni.tbl +++ b/src/chrtrans/iso04_uni.tbl @@ -2,7 +2,7 @@ Miso-8859-4 #Name as a Display Charset (used on Options screen) -OLatin 4 (ISO-8859-4) +OISO Latin 4 # # Name: ISO 8859-4 (1988) to Unicode diff --git a/src/chrtrans/iso05_uni.tbl b/src/chrtrans/iso05_uni.tbl index a715b64f..40cdc24d 100644 --- a/src/chrtrans/iso05_uni.tbl +++ b/src/chrtrans/iso05_uni.tbl @@ -2,7 +2,7 @@ Miso-8859-5 #Name as a Display Charset (used on Options screen) -OCyrillic (ISO-8859-5) +OISO 8859-5 Cyrillic # # Name: ISO 8859-5 (1988) to Unicode diff --git a/src/chrtrans/iso06_uni.tbl b/src/chrtrans/iso06_uni.tbl index 549b592d..c9418864 100644 --- a/src/chrtrans/iso06_uni.tbl +++ b/src/chrtrans/iso06_uni.tbl @@ -2,7 +2,7 @@ Miso-8859-6 #Name as a Display Charset (used on Options screen). -OArabic (ISO-8859-6) +OISO 8859-6 Arabic # # Name: ISO 8859-6 (1987) to Unicode diff --git a/src/chrtrans/iso07_uni.tbl b/src/chrtrans/iso07_uni.tbl index dffca758..368209bf 100644 --- a/src/chrtrans/iso07_uni.tbl +++ b/src/chrtrans/iso07_uni.tbl @@ -2,7 +2,7 @@ Miso-8859-7 #Name as a Display Charset (used on Options screen) -OGreek (ISO-8859-7) +OISO 8859-7 Greek # # Name: ISO 8859-7 (1987) to Unicode diff --git a/src/chrtrans/iso08_uni.tbl b/src/chrtrans/iso08_uni.tbl index 050be29d..4d83f5c4 100644 --- a/src/chrtrans/iso08_uni.tbl +++ b/src/chrtrans/iso08_uni.tbl @@ -2,7 +2,7 @@ Miso-8859-8 #Name as a Display Charset (used on Options screen). -OHebrew (ISO-8859-8) +OISO 8859-8 Hebrew # # Name: ISO 8859-8 (1988) to Unicode diff --git a/src/chrtrans/iso09_uni.tbl b/src/chrtrans/iso09_uni.tbl index 5abe799f..1b204835 100644 --- a/src/chrtrans/iso09_uni.tbl +++ b/src/chrtrans/iso09_uni.tbl @@ -2,7 +2,7 @@ Miso-8859-9 #Name as a Display Charset (used on Options screen) -OTurkish (ISO-8859-9) +OISO 8859-9 (Latin 5) # # Name: ISO 8859-9 (1989) to Unicode diff --git a/src/chrtrans/iso10_uni.tbl b/src/chrtrans/iso10_uni.tbl index ab8128f0..be3c02cd 100644 --- a/src/chrtrans/iso10_uni.tbl +++ b/src/chrtrans/iso10_uni.tbl @@ -10,7 +10,7 @@ D0 Miso-8859-10 #Name as a Display Charset (used on Options screen) -OISO-8859-10 +OISO 8859-10 0x20-0x7e idem #0x7f U+2302 diff --git a/src/chrtrans/koi8r_uni.tbl b/src/chrtrans/koi8r_uni.tbl index 69eef3c2..09e8743c 100644 --- a/src/chrtrans/koi8r_uni.tbl +++ b/src/chrtrans/koi8r_uni.tbl @@ -1,5 +1,5 @@ # Options screen name for this character set -OCyrillic (KOI8-R) +OKOI8-R Cyrillic # MIME name for this charset Mkoi8-r diff --git a/src/chrtrans/makefile.dos b/src/chrtrans/makefile.dos index 57c45dfc..6871b87a 100644 --- a/src/chrtrans/makefile.dos +++ b/src/chrtrans/makefile.dos @@ -13,9 +13,7 @@ CFLAGS = $(MCFLAGS) CC = gcc MCFLAGS = -O3 -DDOSPATH -DNO_TTYTYP \ --I. \ --I../../WWW/library/implement \ --I../../djgpp/tcplib/include \ +-I../../WWW/library/implement -I../../djgpp/tcplib/include \ -I../../djgpp/tcplib/include/tcp .SUFFIXES: .tbl @@ -83,7 +81,6 @@ cp1256_uni.h: cp1256_uni.tbl makeuctb.exe cp1257_uni.h: cp1257_uni.tbl makeuctb.exe cp437_uni.h: cp437_uni.tbl makeuctb.exe cp737_uni.h: cp737_uni.tbl makeuctb.exe -cp775_uni.h: cp775_uni.tbl makeuctb.exe cp850_uni.h: cp850_uni.tbl makeuctb.exe cp852_uni.h: cp852_uni.tbl makeuctb.exe cp862_uni.h: cp862_uni.tbl makeuctb.exe diff --git a/src/chrtrans/makefile.in b/src/chrtrans/makefile.in index 512b003d..7cb02699 100644 --- a/src/chrtrans/makefile.in +++ b/src/chrtrans/makefile.in @@ -107,7 +107,6 @@ cp1256_uni.h: $(srcdir)/cp1256_uni.tbl makeuctb$x cp1257_uni.h: $(srcdir)/cp1257_uni.tbl makeuctb$x cp437_uni.h: $(srcdir)/cp437_uni.tbl makeuctb$x cp737_uni.h: $(srcdir)/cp737_uni.tbl makeuctb$x -cp775_uni.h: $(srcdir)/cp775_uni.tbl makeuctb$x cp850_uni.h: $(srcdir)/cp850_uni.tbl makeuctb$x cp852_uni.h: $(srcdir)/cp852_uni.tbl makeuctb$x cp862_uni.h: $(srcdir)/cp862_uni.tbl makeuctb$x diff --git a/src/chrtrans/makeuctb.c b/src/chrtrans/makeuctb.c index cc721723..874d971f 100644 --- a/src/chrtrans/makeuctb.c +++ b/src/chrtrans/makeuctb.c @@ -16,7 +16,13 @@ * version 2, or at your option any later version. */ -#define DONT_USE_SOCKS5 +#ifdef NOTDEFINED +#include <stdio.h> +#include <stdlib.h> +#include <sysexits.h> +#include <string.h> +#include <ctype.h> +#else #include <HTUtils.h> #include <tcp.h> /* @@ -25,6 +31,7 @@ #ifdef exit #undef exit #endif /* exit */ +#endif /* NODEFINED */ #ifndef TOLOWER #define TOLOWER(c) (isupper((unsigned char)c) ? tolower((unsigned char)c) : (c)) @@ -40,16 +47,6 @@ */ typedef u16 unicode; -/* - * Since we're writing the formatted file to stdout, ensure that we flush - * everything before leaving, since some old (and a few not-so-old) platforms - * that do not implement POSIX 'exit()'. - */ -#define done(code) \ - fflush(stdout); \ - fflush(stderr); \ - exit(code) - PRIVATE void usage ARGS1( char *, argv0) { @@ -59,7 +56,7 @@ PRIVATE void usage ARGS1( argv0); fprintf(stderr, "Utility to convert .tbl into .h files for Lynx compilation.\n"); - done(EX_USAGE); + exit(EX_USAGE); } /* copied from HTString.c, not everybody has strncasecmp */ @@ -132,7 +129,7 @@ PRIVATE void addpair_str ARGS2( if (!themap_str.entries) { fprintf(stderr, "%s: Out of memory\n", tblname); - done(EX_DATAERR); + exit(EX_DATAERR); } } else { /* @@ -152,7 +149,7 @@ PRIVATE void addpair_str ARGS2( if (themap_str.entry_ct > 1999) { fprintf(stderr, "ERROR: Only 2000 unicode replacement strings permitted!\n"); - done(EX_DATAERR); + exit(EX_DATAERR); } themap_str.entries[themap_str.entry_ct].unicode = un; themap_str.entries[themap_str.entry_ct].replace_str = str; @@ -198,7 +195,7 @@ PRIVATE void addpair ARGS2( */ if (unicount[fp] > 254) { fprintf(stderr, "ERROR: Only 255 unicodes/glyph permitted!\n"); - done(EX_DATAERR); + exit(EX_DATAERR); } unitable[fp][unicount[fp]] = un; unicount[fp]++; @@ -236,7 +233,7 @@ PUBLIC int main ARGS2( ctbl = fopen(tblname = argv[1], "r"); if (!ctbl) { perror(tblname); - done(EX_NOINPUT); + exit(EX_NOINPUT); } } @@ -386,11 +383,11 @@ PUBLIC int main ARGS2( un0 = getunicode(&p); if (un0 < 0) { fprintf(stderr, "Bad input line: %s\n", buffer); - done(EX_DATAERR); + exit(EX_DATAERR); fprintf(stderr, "%s: Bad Unicode range corresponding to font position range 0x%x-0x%x\n", tblname, fp0, fp1); - done(EX_DATAERR); + exit(EX_DATAERR); } un1 = un0; while (*p == ' ' || *p == '\t') { @@ -407,7 +404,7 @@ PUBLIC int main ARGS2( "%s: Bad Unicode range U+%x-U+%x\n", tblname, un0, un1); fprintf(stderr, "Bad input line: %s\n", buffer); - done(EX_DATAERR); + exit(EX_DATAERR); } while (*p == ' ' || *p == '\t') { p++; @@ -424,7 +421,7 @@ PUBLIC int main ARGS2( if (!(p1 = tbuf)) { fprintf(stderr, "%s: Out of memory\n", tblname); - done(EX_DATAERR); + exit(EX_DATAERR); } if (*p == '"') { /* @@ -484,7 +481,7 @@ PUBLIC int main ARGS2( fp0 = strtol(p, &p1, 0); if (p1 == p) { fprintf(stderr, "Bad input line: %s\n", buffer); - done(EX_DATAERR); + exit(EX_DATAERR); } p = p1; @@ -496,7 +493,7 @@ PUBLIC int main ARGS2( fp1 = strtol(p, &p1, 0); if (p1 == p) { fprintf(stderr, "Bad input line: %s\n", buffer); - done(EX_DATAERR); + exit(EX_DATAERR); } p = p1; } else { @@ -507,13 +504,13 @@ PUBLIC int main ARGS2( fprintf(stderr, "%s: Glyph number (0x%x) larger than font length\n", tblname, fp0); - done(EX_DATAERR); + exit(EX_DATAERR); } if (fp1 && (fp1 < fp0 || fp1 >= fontlen)) { fprintf(stderr, "%s: Bad end of range (0x%x)\n", tblname, fp1); - done(EX_DATAERR); + exit(EX_DATAERR); } if (fp1) { @@ -540,7 +537,7 @@ PUBLIC int main ARGS2( tblname); fprintf(stderr, " there should be a Unicode range.\n"); - done(EX_DATAERR); + exit(EX_DATAERR); } p++; un1 = getunicode(&p); @@ -548,7 +545,7 @@ PUBLIC int main ARGS2( fprintf(stderr, "%s: Bad Unicode range corresponding to font position range 0x%x-0x%x\n", tblname, fp0, fp1); - done(EX_DATAERR); + exit(EX_DATAERR); } if (un1 - un0 != fp1 - fp0) { fprintf(stderr, @@ -557,7 +554,7 @@ PUBLIC int main ARGS2( fprintf(stderr, " as font position range 0x%x-0x%x\n", fp0, fp1); - done(EX_DATAERR); + exit(EX_DATAERR); } for (i = fp0; i <= fp1; i++) { addpair(i,un0-fp0+i); @@ -588,7 +585,7 @@ PUBLIC int main ARGS2( fprintf(stderr, "%s: Bad Unicode range 0x%x-0x%x\n", tblname, un0, un1); - done(EX_DATAERR); + exit(EX_DATAERR); } for (un0++; un0 <= un1; un0++) { addpair(fp0, un0); @@ -634,14 +631,12 @@ PUBLIC int main ARGS2( } else if (this_LYNXcharset[0] == '\0') { strncpy(this_LYNXcharset,this_MIMEcharset,UC_MAXLEN_LYNXCSNAME); } -/***** DO NOT produce trailing spaces! if ((i = strlen(this_LYNXcharset)) < UC_LEN_LYNXCSNAME) { for (; i < UC_LEN_LYNXCSNAME; i++) { this_LYNXcharset[i] = ' '; } this_LYNXcharset[i] = '\0'; } -*******/ #ifdef NOTDEFINED fprintf(stderr,"this_MIMEcharset: %s.\n",this_MIMEcharset); fprintf(stderr,"this_LYNXcharset: %s.\n",this_LYNXcharset); @@ -660,8 +655,8 @@ PUBLIC int main ARGS2( p++, i++) { id_append[i+1] = isalnum(*p) ? *p : '_'; } - id_append[i+1] = '\0'; } + id_append[i+1] = '\0'; fprintf(stderr, " (%s).\n", id_append); printf("\ @@ -765,5 +760,5 @@ dfont_replacedesc%s,%d,%d)\n", id_append, this_MIMEcharset, this_LYNXcharset, id_append, id_append, nuni, id_append, lowest_eight, RawOrEnc); - done(EX_OK); + exit(EX_OK); } diff --git a/src/chrtrans/utf8_uni.tbl b/src/chrtrans/utf8_uni.tbl index 9fc470ad..67ff3460 100644 --- a/src/chrtrans/utf8_uni.tbl +++ b/src/chrtrans/utf8_uni.tbl @@ -7,7 +7,7 @@ Mutf-8 #Name as a Display Charset (used on Options screen) -OUNICODE (UTF-8) +OUNICODE UTF-8 # Some kind of raw Unicode? # Use 6 for for really "raw" 16bit UCS-2, 7 for UTF-8, ... |