diff options
Diffstat (limited to 'src/chrtrans')
-rw-r--r-- | src/chrtrans/cp1250_uni.tbl | 41 | ||||
-rw-r--r-- | src/chrtrans/cp1252_uni.tbl | 28 | ||||
-rw-r--r-- | src/chrtrans/cp437_uni.tbl | 39 | ||||
-rw-r--r-- | src/chrtrans/cp737_uni.tbl | 24 | ||||
-rw-r--r-- | src/chrtrans/cp850_uni.tbl | 54 | ||||
-rw-r--r-- | src/chrtrans/cp852_uni.tbl | 61 | ||||
-rw-r--r-- | src/chrtrans/def7_uni.tbl | 213 | ||||
-rw-r--r-- | src/chrtrans/iso01_uni.tbl | 26 | ||||
-rw-r--r-- | src/chrtrans/iso02_uni.tbl | 40 | ||||
-rw-r--r-- | src/chrtrans/iso05_uni.tbl | 41 | ||||
-rw-r--r-- | src/chrtrans/iso07_uni.tbl | 61 | ||||
-rw-r--r-- | src/chrtrans/iso09_uni.tbl | 32 | ||||
-rw-r--r-- | src/chrtrans/koi8r_uni.tbl | 33 | ||||
-rw-r--r-- | src/chrtrans/mac_uni.tbl | 4 | ||||
-rw-r--r-- | src/chrtrans/makeuctb.c | 43 |
15 files changed, 492 insertions, 248 deletions
diff --git a/src/chrtrans/cp1250_uni.tbl b/src/chrtrans/cp1250_uni.tbl index 207add88..64ad83c4 100644 --- a/src/chrtrans/cp1250_uni.tbl +++ b/src/chrtrans/cp1250_uni.tbl @@ -26,6 +26,8 @@ C1250 # The entries are in cp1250 order # ################## +# Lines with more than one Unicode (U+XXXX) value contain additional +# replacement mappings added for lynx. - kw 0x20-0x7e idem # @@ -39,11 +41,11 @@ C1250 0x87 U+2021 #DOUBLE DAGGER 0x88 #UNDEFINED 0x89 U+2030 #PER MILLE SIGN -0x8A U+0160 #LATIN CAPITAL LETTER S WITH CARON +0x8A U+0160 U+0428 #LATIN CAPITAL LETTER S WITH CARON 0x8B U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK 0x8C U+015A #LATIN CAPITAL LETTER S WITH ACUTE 0x8D U+0164 #LATIN CAPITAL LETTER T WITH CARON -0x8E U+017D #LATIN CAPITAL LETTER Z WITH CARON +0x8E U+017D U+0416 #LATIN CAPITAL LETTER Z WITH CARON 0x8F U+0179 #LATIN CAPITAL LETTER Z WITH ACUTE 0x90 #UNDEFINED 0x91 U+2018 #LEFT SINGLE QUOTATION MARK @@ -55,21 +57,21 @@ C1250 0x97 U+2014 #EM DASH 0x98 #UNDEFINED 0x99 U+2122 #TRADE MARK SIGN -0x9A U+0161 #LATIN SMALL LETTER S WITH CARON +0x9A U+0161 U+0448 #LATIN SMALL LETTER S WITH CARON 0x9B U+203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK 0x9C U+015B #LATIN SMALL LETTER S WITH ACUTE 0x9D U+0165 #LATIN SMALL LETTER T WITH CARON -0x9E U+017E #LATIN SMALL LETTER Z WITH CARON +0x9E U+017E U+0436 #LATIN SMALL LETTER Z WITH CARON 0x9F U+017A #LATIN SMALL LETTER Z WITH ACUTE 0xA0 U+00A0 #NO-BREAK SPACE -0xA1 U+02C7 #CARON -0xA2 U+02D8 #BREVE +0xA1 U+02C7 U+030c #CARON +0xA2 U+02D8 U+0306 #BREVE 0xA3 U+0141 #LATIN CAPITAL LETTER L WITH STROKE 0xA4 U+00A4 #CURRENCY SIGN 0xA5 U+0104 #LATIN CAPITAL LETTER A WITH OGONEK 0xA6 U+00A6 #BROKEN BAR 0xA7 U+00A7 #SECTION SIGN -0xA8 U+00A8 #DIAERESIS +0xA8 U+00A8 U+0308 #DIAERESIS 0xA9 U+00A9 #COPYRIGHT SIGN 0xAA U+015E #LATIN CAPITAL LETTER S WITH CEDILLA 0xAB U+00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK @@ -77,20 +79,20 @@ C1250 0xAD U+00AD #SOFT HYPHEN 0xAE U+00AE #REGISTERED SIGN 0xAF U+017B #LATIN CAPITAL LETTER Z WITH DOT ABOVE -0xB0 U+00B0 #DEGREE SIGN +0xB0 U+00B0 U+030a #DEGREE SIGN 0xB1 U+00B1 #PLUS-MINUS SIGN -0xB2 U+02DB #OGONEK +0xB2 U+02DB U+0328 #OGONEK 0xB3 U+0142 #LATIN SMALL LETTER L WITH STROKE 0xB4 U+00B4 #ACUTE ACCENT -0xB5 U+00B5 #MICRO SIGN +0xB5 U+00B5 U+03bc #MICRO SIGN 0xB6 U+00B6 #PILCROW SIGN 0xB7 U+00B7 #MIDDLE DOT -0xB8 U+00B8 #CEDILLA +0xB8 U+00B8 U+0327 #CEDILLA 0xB9 U+0105 #LATIN SMALL LETTER A WITH OGONEK 0xBA U+015F #LATIN SMALL LETTER S WITH CEDILLA 0xBB U+00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0xBC U+013D #LATIN CAPITAL LETTER L WITH CARON -0xBD U+02DD #DOUBLE ACUTE ACCENT +0xBD U+02DD U+030b #DOUBLE ACUTE ACCENT 0xBE U+013E #LATIN SMALL LETTER L WITH CARON 0xBF U+017C #LATIN SMALL LETTER Z WITH DOT ABOVE 0xC0 U+0154 #LATIN CAPITAL LETTER R WITH ACUTE @@ -101,7 +103,7 @@ C1250 0xC5 U+0139 #LATIN CAPITAL LETTER L WITH ACUTE 0xC6 U+0106 #LATIN CAPITAL LETTER C WITH ACUTE 0xC7 U+00C7 #LATIN CAPITAL LETTER C WITH CEDILLA -0xC8 U+010C #LATIN CAPITAL LETTER C WITH CARON +0xC8 U+010C U+0427 # LATIN CAPITAL LETTER C WITH CARON 0xC9 U+00C9 #LATIN CAPITAL LETTER E WITH ACUTE 0xCA U+0118 #LATIN CAPITAL LETTER E WITH OGONEK 0xCB U+00CB #LATIN CAPITAL LETTER E WITH DIAERESIS @@ -133,7 +135,7 @@ C1250 0xE5 U+013A #LATIN SMALL LETTER L WITH ACUTE 0xE6 U+0107 #LATIN SMALL LETTER C WITH ACUTE 0xE7 U+00E7 #LATIN SMALL LETTER C WITH CEDILLA -0xE8 U+010D #LATIN SMALL LETTER C WITH CARON +0xE8 U+010D U+02a7 U+0447 # LATIN SMALL LETTER C WITH CARON 0xE9 U+00E9 #LATIN SMALL LETTER E WITH ACUTE 0xEA U+0119 #LATIN SMALL LETTER E WITH OGONEK 0xEB U+00EB #LATIN SMALL LETTER E WITH DIAERESIS @@ -156,4 +158,13 @@ C1250 0xFC U+00FC #LATIN SMALL LETTER U WITH DIAERESIS 0xFD U+00FD #LATIN SMALL LETTER Y WITH ACUTE 0xFE U+0163 #LATIN SMALL LETTER T WITH CEDILLA -0xFF U+02D9 #DOT ABOVE +0xFF U+02D9 U+0307 U+0387 #DOT ABOVE + +U+2218 " \260 " # RING OPERATOR +U+2219 " \225 " # BULLET OPERATOR +U+2297 "(\327)" # CIRCLED TIMES +U+2299 "(\267)" # CIRCLED DOT OPERATOR +U+229A "(\260)" # CIRCLED RING OPERATOR +U+22A0 "[\327]" # SQUARED TIMES +U+22A1 "[\267]" # SQUARED DOT OPERATOR +U+22C5 " \267 " # DOT OPERATOR diff --git a/src/chrtrans/cp1252_uni.tbl b/src/chrtrans/cp1252_uni.tbl index 7a9e149f..2365c9c5 100644 --- a/src/chrtrans/cp1252_uni.tbl +++ b/src/chrtrans/cp1252_uni.tbl @@ -30,6 +30,8 @@ C1252 # The entries are in cp1252 order # ################## +# Lines with more than one Unicode (U+XXXX) value contain additional +# replacement mappings added for lynx. - kw 0x20-0x7e idem # @@ -41,7 +43,7 @@ C1252 0x85 U+2026 #HORIZONTAL ELLIPSIS 0x86 U+2020 #DAGGER 0x87 U+2021 #DOUBLE DAGGER -0x88 U+02C6 #MODIFIER LETTER CIRCUMFLEX ACCENT +0x88 U+02C6 U+0302 #MODIFIER LETTER CIRCUMFLEX ACCENT 0x89 U+2030 #PER MILLE SIGN 0x8A U+0160 #LATIN CAPITAL LETTER S WITH CARON 0x8B U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK @@ -53,7 +55,7 @@ C1252 0x91 U+2018 #LEFT SINGLE QUOTATION MARK 0x92 U+2019 #RIGHT SINGLE QUOTATION MARK 0x93 U+201C #LEFT DOUBLE QUOTATION MARK -0x94 U+201D #RIGHT DOUBLE QUOTATION MARK +0x94 U+201D U+02dd U+030b #RIGHT DOUBLE QUOTATION MARK 0x95 U+2022 #BULLET 0x96 U+2013 #EN DASH 0x97 U+2014 #EM DASH @@ -73,23 +75,23 @@ C1252 0xA5 U+00A5 #YEN SIGN 0xA6 U+00A6 #BROKEN BAR 0xA7 U+00A7 #SECTION SIGN -0xA8 U+00A8 #DIAERESIS +0xA8 U+00A8 U+0308 #DIAERESIS 0xA9 U+00A9 #COPYRIGHT SIGN 0xAA U+00AA #FEMININE ORDINAL INDICATOR 0xAB U+00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0xAC U+00AC #NOT SIGN 0xAD U+00AD #SOFT HYPHEN 0xAE U+00AE #REGISTERED SIGN -0xAF U+00AF #MACRON -0xB0 U+00B0 #DEGREE SIGN +0xAF U+00AF U+0304 #MACRON +0xB0 U+00B0 U+030a #DEGREE SIGN 0xB1 U+00B1 #PLUS-MINUS SIGN 0xB2 U+00B2 #SUPERSCRIPT TWO 0xB3 U+00B3 #SUPERSCRIPT THREE 0xB4 U+00B4 #ACUTE ACCENT -0xB5 U+00B5 #MICRO SIGN +0xB5 U+00B5 U+03bc #MICRO SIGN 0xB6 U+00B6 #PILCROW SIGN -0xB7 U+00B7 #MIDDLE DOT -0xB8 U+00B8 #CEDILLA +0xB7 U+00B7 U+0307 U+0387 U+2027 #MIDDLE DOT +0xB8 U+00B8 U+0327 #CEDILLA 0xB9 U+00B9 #SUPERSCRIPT ONE 0xBA U+00BA #MASCULINE ORDINAL INDICATOR 0xBB U+00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK @@ -161,3 +163,13 @@ C1252 0xFD U+00FD #LATIN SMALL LETTER Y WITH ACUTE 0xFE U+00FE #LATIN SMALL LETTER THORN 0xFF U+00FF #LATIN SMALL LETTER Y WITH DIAERESIS + +U+2218 " \260 " # RING OPERATOR +U+2219 " \225 " # BULLET OPERATOR +U+221b " ROOT\263 " +U+2297 "(\327)" # CIRCLED TIMES +U+2299 "(\267)" # CIRCLED DOT OPERATOR +U+229A "(\260)" # CIRCLED RING OPERATOR +U+22A0 "[\327]" # SQUARED TIMES +U+22A1 "[\267]" # SQUARED DOT OPERATOR +U+22C5 " \267 " # DOT OPERATOR diff --git a/src/chrtrans/cp437_uni.tbl b/src/chrtrans/cp437_uni.tbl index 818cbab3..95755693 100644 --- a/src/chrtrans/cp437_uni.tbl +++ b/src/chrtrans/cp437_uni.tbl @@ -28,8 +28,8 @@ C437 # # The entries are in cp437_DosLatinUS order # -# some mapppings of greek letters to latin letters added, -# just for fun.. - KW +# Lines with more than one Unicode (U+XXXX) value contain additional +# replacement mappings added for lynx. - kw # ####################################### @@ -51,7 +51,7 @@ C437 0x8d U+00ec #LATIN SMALL LETTER I WITH GRAVE 0x8e U+00c4 #LATIN CAPITAL LETTER A WITH DIAERESIS 0x8f U+00c5 #LATIN CAPITAL LETTER A WITH RING ABOVE -0x90 U+00c9 #LATIN CAPITAL LETTER E WITH ACUTE +0x90 U+00c9 U+0388 #LATIN CAPITAL LETTER E WITH ACUTE 0x91 U+00e6 #LATIN SMALL LIGATURE AE 0x92 U+00c6 #LATIN CAPITAL LIGATURE AE 0x93 U+00f4 #LATIN SMALL LETTER O WITH CIRCUMFLEX @@ -61,7 +61,7 @@ C437 0x97 U+00f9 #LATIN SMALL LETTER U WITH GRAVE 0x98 U+00ff #LATIN SMALL LETTER Y WITH DIAERESIS 0x99 U+00d6 #LATIN CAPITAL LETTER O WITH DIAERESIS -0x9a U+00dc #LATIN CAPITAL LETTER U WITH DIAERESIS +0x9a U+00dc U+03ab #LATIN CAPITAL LETTER U WITH DIAERESIS 0x9b U+00a2 #CENT SIGN 0x9c U+00a3 #POUND SIGN 0x9d U+00a5 #YEN SIGN @@ -135,40 +135,45 @@ C437 0xe1 U+00df U+03b2 #LATIN SMALL LETTER SHARP S 0xe2 U+0393 #GREEK CAPITAL LETTER GAMMA 0xe3 U+03c0 #GREEK SMALL LETTER PI -0xe4 U+03a3 #GREEK CAPITAL LETTER SIGMA +0xe4 U+03a3 U+2211 #GREEK CAPITAL LETTER SIGMA 0xe5 U+03c3 #GREEK SMALL LETTER SIGMA 0xe6 U+00b5 U+03bc #MICRO SIGN 0xe7 U+03c4 #GREEK SMALL LETTER TAU 0xe8 U+03a6 #GREEK CAPITAL LETTER PHI 0xe9 U+0398 U+03b8 #GREEK CAPITAL LETTER THETA -0xea U+03a9 #GREEK CAPITAL LETTER OMEGA +0xea U+03a9 U+2126 #GREEK CAPITAL LETTER OMEGA 0xeb U+03b4 #GREEK SMALL LETTER DELTA 0xec U+221e #INFINITY -0xed U+03c6 #GREEK SMALL LETTER PHI -0xee U+03b5 #GREEK SMALL LETTER EPSILON +0xed U+03c6 U+00f8 #GREEK SMALL LETTER PHI +0xee U+03b5 U+2208 U+220a #GREEK SMALL LETTER EPSILON 0xef U+2229 #INTERSECTION 0xf0 U+2261 #IDENTICAL TO 0xf1 U+00b1 #PLUS-MINUS SIGN -0xf2 U+2265 #GREATER-THAN OR EQUAL TO -0xf3 U+2264 #LESS-THAN OR EQUAL TO -0xf4 U+2320 #TOP HALF INTEGRAL +0xf2 U+2265 U+2267 #GREATER-THAN OR EQUAL TO +0xf3 U+2264 U+2266 #LESS-THAN OR EQUAL TO +0xf4 U+2320 U+0283 #TOP HALF INTEGRAL 0xf5 U+2321 #BOTTOM HALF INTEGRAL 0xf6 U+00f7 #DIVISION SIGN 0xf7 U+2248 #ALMOST EQUAL TO -0xf8 U+00b0 #DEGREE SIGN -0xf9 U+2219 #BULLET OPERATOR -0xfa U+00b7 #MIDDLE DOT +0xf8 U+00b0 U+030a #DEGREE SIGN +0xf9 U+2219 U+0307 U+0387 #BULLET OPERATOR +0xfa U+00b7 U+2027 #MIDDLE DOT 0xfb U+221a #SQUARE ROOT 0xfc U+207f #SUPERSCRIPT LATIN SMALL LETTER N 0xfd U+00b2 #SUPERSCRIPT TWO 0xfe U+25a0 #BLACK SQUARE 0xff U+00a0 #NO-BREAK SPACE -U+03ac:a' U+03ad "\356'" #:î' U+03ae:h' -U+03af:i' -U+03cc:o' U+03cd:u' U+03ce:w' +U+2209 " !\356 " +U+221b " 3\373" +U+221c " 4\373" +U+2262 " !\360" +U+2299 "(\372)" +U+229a "(\370)" +U+22a1 "[\372]" +U+02a7 "t\364" diff --git a/src/chrtrans/cp737_uni.tbl b/src/chrtrans/cp737_uni.tbl index 6beee2c4..710bd288 100644 --- a/src/chrtrans/cp737_uni.tbl +++ b/src/chrtrans/cp737_uni.tbl @@ -25,6 +25,8 @@ C737 # The entries are in cp737_DOSGreek order # ################## +# Lines with more than one Unicode (U+XXXX) value contain additional +# replacement mappings added for lynx. - kw 0x20-0x7f idem # @@ -54,16 +56,16 @@ C737 0x97 U+03a9 #GREEK CAPITAL LETTER OMEGA 0x98 U+03b1 #GREEK SMALL LETTER ALPHA 0x99 U+03b2 #GREEK SMALL LETTER BETA -0x9a U+03b3 #GREEK SMALL LETTER GAMMA +0x9a U+03b3 U+0263 #GREEK SMALL LETTER GAMMA 0x9b U+03b4 #GREEK SMALL LETTER DELTA 0x9c U+03b5 #GREEK SMALL LETTER EPSILON 0x9d U+03b6 #GREEK SMALL LETTER ZETA 0x9e U+03b7 #GREEK SMALL LETTER ETA 0x9f U+03b8 #GREEK SMALL LETTER THETA -0xa0 U+03b9 #GREEK SMALL LETTER IOTA +0xa0 U+03b9 U+0131 #GREEK SMALL LETTER IOTA 0xa1 U+03ba #GREEK SMALL LETTER KAPPA 0xa2 U+03bb #GREEK SMALL LETTER LAMDA -0xa3 U+03bc #GREEK SMALL LETTER MU +0xa3 U+03bc U+00b5 #GREEK SMALL LETTER MU 0xa4 U+03bd #GREEK SMALL LETTER NU 0xa5 U+03be #GREEK SMALL LETTER XI 0xa6 U+03bf #GREEK SMALL LETTER OMICRON @@ -72,7 +74,7 @@ C737 0xa9 U+03c3 #GREEK SMALL LETTER SIGMA 0xaa U+03c2 #GREEK SMALL LETTER FINAL SIGMA 0xab U+03c4 #GREEK SMALL LETTER TAU -0xac U+03c5 #GREEK SMALL LETTER UPSILON +0xac U+03c5 U+028a #GREEK SMALL LETTER UPSILON 0xad U+03c6 #GREEK SMALL LETTER PHI 0xae U+03c7 #GREEK SMALL LETTER CHI 0xaf U+03c8 #GREEK SMALL LETTER PSI @@ -132,7 +134,7 @@ C737 0xe5 U+03af #GREEK SMALL LETTER IOTA WITH TONOS 0xe6 U+03cc #GREEK SMALL LETTER OMICRON WITH TONOS 0xe7 U+03cd #GREEK SMALL LETTER UPSILON WITH TONOS -0xe8 U+03cb #GREEK SMALL LETTER UPSILON WITH DIALYTIKA +0xe8 U+03cb U+00fc #GREEK SMALL LETTER UPSILON WITH DIALYTIKA 0xe9 U+03ce #GREEK SMALL LETTER OMEGA WITH TONOS 0xea U+0386 #GREEK CAPITAL LETTER ALPHA WITH TONOS 0xeb U+0388 #GREEK CAPITAL LETTER EPSILON WITH TONOS @@ -149,10 +151,20 @@ C737 0xf6 U+00f7 #DIVISION SIGN 0xf7 U+2248 #ALMOST EQUAL TO 0xf8 U+00b0 #DEGREE SIGN -0xf9 U+2219 #BULLET OPERATOR +0xf9 U+2219 U+0307 U+0387 #BULLET OPERATOR 0xfa U+00b7 #MIDDLE DOT 0xfb U+221a #SQUARE ROOT 0xfc U+207f #SUPERSCRIPT LATIN SMALL LETTER N 0xfd U+00b2 #SUPERSCRIPT TWO 0xfe U+25a0 #BLACK SQUARE 0xff U+00a0 #NO-BREAK SPACE + +U+2209 " !\234 " +U+2218 " \370 " # RING OPERATOR +U+221b " 3\373" +U+221c " 4\373" +U+2299 "(\372)" +U+229a "(\370)" +U+22a1 "[\372]" +U+02a4 "d\235" +U+2249 "!\367" diff --git a/src/chrtrans/cp850_uni.tbl b/src/chrtrans/cp850_uni.tbl index bc44cde8..91fe44ee 100644 --- a/src/chrtrans/cp850_uni.tbl +++ b/src/chrtrans/cp850_uni.tbl @@ -31,12 +31,14 @@ C850 # The entries are in cp850_DOSLatin1 order # ################## +# Lines with more than one Unicode (U+XXXX) value contain additional +# replacement mappings added for lynx. - kw 0x20-0x7e idem # 0x80 U+00c7 #LATIN CAPITAL LETTER C WITH CEDILLA -0x81 U+00fc #LATIN SMALL LETTER U WITH DIAERESIS -0x82 U+00e9 #LATIN SMALL LETTER E WITH ACUTE +0x81 U+00fc U+03cb #LATIN SMALL LETTER U WITH DIAERESIS +0x82 U+00e9 U+03ad #LATIN SMALL LETTER E WITH ACUTE 0x83 U+00e2 #LATIN SMALL LETTER A WITH CIRCUMFLEX 0x84 U+00e4 #LATIN SMALL LETTER A WITH DIAERESIS 0x85 U+00e0 #LATIN SMALL LETTER A WITH GRAVE @@ -45,12 +47,12 @@ C850 0x88 U+00ea #LATIN SMALL LETTER E WITH CIRCUMFLEX 0x89 U+00eb #LATIN SMALL LETTER E WITH DIAERESIS 0x8a U+00e8 #LATIN SMALL LETTER E WITH GRAVE -0x8b U+00ef #LATIN SMALL LETTER I WITH DIAERESIS +0x8b U+00ef U+03ca #LATIN SMALL LETTER I WITH DIAERESIS 0x8c U+00ee #LATIN SMALL LETTER I WITH CIRCUMFLEX 0x8d U+00ec #LATIN SMALL LETTER I WITH GRAVE 0x8e U+00c4 #LATIN CAPITAL LETTER A WITH DIAERESIS 0x8f U+00c5 #LATIN CAPITAL LETTER A WITH RING ABOVE -0x90 U+00c9 #LATIN CAPITAL LETTER E WITH ACUTE +0x90 U+00c9 U+0388 #LATIN CAPITAL LETTER E WITH ACUTE 0x91 U+00e6 #LATIN SMALL LIGATURE AE 0x92 U+00c6 #LATIN CAPITAL LIGATURE AE 0x93 U+00f4 #LATIN SMALL LETTER O WITH CIRCUMFLEX @@ -60,16 +62,16 @@ C850 0x97 U+00f9 #LATIN SMALL LETTER U WITH GRAVE 0x98 U+00ff #LATIN SMALL LETTER Y WITH DIAERESIS 0x99 U+00d6 #LATIN CAPITAL LETTER O WITH DIAERESIS -0x9a U+00dc #LATIN CAPITAL LETTER U WITH DIAERESIS +0x9a U+00dc U+03ab #LATIN CAPITAL LETTER U WITH DIAERESIS 0x9b U+00f8 #LATIN SMALL LETTER O WITH STROKE 0x9c U+00a3 #POUND SIGN 0x9d U+00d8 #LATIN CAPITAL LETTER O WITH STROKE 0x9e U+00d7 #MULTIPLICATION SIGN 0x9f U+0192 #LATIN SMALL LETTER F WITH HOOK -0xa0 U+00e1 #LATIN SMALL LETTER A WITH ACUTE -0xa1 U+00ed #LATIN SMALL LETTER I WITH ACUTE -0xa2 U+00f3 #LATIN SMALL LETTER O WITH ACUTE -0xa3 U+00fa #LATIN SMALL LETTER U WITH ACUTE +0xa0 U+00e1 U+03ac #LATIN SMALL LETTER A WITH ACUTE +0xa1 U+00ed U+03af #LATIN SMALL LETTER I WITH ACUTE +0xa2 U+00f3 U+03cc #LATIN SMALL LETTER O WITH ACUTE +0xa3 U+00fa U+03cd #LATIN SMALL LETTER U WITH ACUTE 0xa4 U+00f1 #LATIN SMALL LETTER N WITH TILDE 0xa5 U+00d1 #LATIN CAPITAL LETTER N WITH TILDE 0xa6 U+00aa #FEMININE ORDINAL INDICATOR @@ -87,7 +89,7 @@ C850 0xb2 U+2593 #DARK SHADE 0xb3 U+2502 #BOX DRAWINGS LIGHT VERTICAL 0xb4 U+2524 #BOX DRAWINGS LIGHT VERTICAL AND LEFT -0xb5 U+00c1 #LATIN CAPITAL LETTER A WITH ACUTE +0xb5 U+00c1 U+0386 #LATIN CAPITAL LETTER A WITH ACUTE 0xb6 U+00c2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX 0xb7 U+00c0 #LATIN CAPITAL LETTER A WITH GRAVE 0xb8 U+00a9 #COPYRIGHT SIGN @@ -119,10 +121,10 @@ C850 0xd2 U+00ca #LATIN CAPITAL LETTER E WITH CIRCUMFLEX 0xd3 U+00cb #LATIN CAPITAL LETTER E WITH DIAERESIS 0xd4 U+00c8 #LATIN CAPITAL LETTER E WITH GRAVE -0xd5 U+0131 #LATIN SMALL LETTER DOTLESS I -0xd6 U+00cd #LATIN CAPITAL LETTER I WITH ACUTE +0xd5 U+0131 U+03b9 #LATIN SMALL LETTER DOTLESS I +0xd6 U+00cd U+038a #LATIN CAPITAL LETTER I WITH ACUTE 0xd7 U+00ce #LATIN CAPITAL LETTER I WITH CIRCUMFLEX -0xd8 U+00cf #LATIN CAPITAL LETTER I WITH DIAERESIS +0xd8 U+00cf U+03aa #LATIN CAPITAL LETTER I WITH DIAERESIS 0xd9 U+2518 #BOX DRAWINGS LIGHT UP AND LEFT 0xda U+250c #BOX DRAWINGS LIGHT DOWN AND RIGHT 0xdb U+2588 #FULL BLOCK @@ -130,13 +132,13 @@ C850 0xdd U+00a6 #BROKEN BAR 0xde U+00cc #LATIN CAPITAL LETTER I WITH GRAVE 0xdf U+2580 #UPPER HALF BLOCK -0xe0 U+00d3 #LATIN CAPITAL LETTER O WITH ACUTE +0xe0 U+00d3 U+038c #LATIN CAPITAL LETTER O WITH ACUTE 0xe1 U+00df #LATIN SMALL LETTER SHARP S 0xe2 U+00d4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX 0xe3 U+00d2 #LATIN CAPITAL LETTER O WITH GRAVE 0xe4 U+00f5 #LATIN SMALL LETTER O WITH TILDE 0xe5 U+00d5 #LATIN CAPITAL LETTER O WITH TILDE -0xe6 U+00b5 #MICRO SIGN +0xe6 U+00b5 U+03bc #MICRO SIGN 0xe7 U+00fe #LATIN SMALL LETTER THORN 0xe8 U+00de #LATIN CAPITAL LETTER THORN 0xe9 U+00da #LATIN CAPITAL LETTER U WITH ACUTE @@ -144,22 +146,30 @@ C850 0xeb U+00d9 #LATIN CAPITAL LETTER U WITH GRAVE 0xec U+00fd #LATIN SMALL LETTER Y WITH ACUTE 0xed U+00dd #LATIN CAPITAL LETTER Y WITH ACUTE -0xee U+00af #MACRON -0xef U+00b4 #ACUTE ACCENT +0xee U+00af U+0304 #MACRON +0xef U+00b4 U+0301 #ACUTE ACCENT 0xf0 U+00ad #SOFT HYPHEN 0xf1 U+00b1 #PLUS-MINUS SIGN -0xf2 U+2017 #DOUBLE LOW LINE +0xf2 U+2017 U+0333 #DOUBLE LOW LINE 0xf3 U+00be #VULGAR FRACTION THREE QUARTERS 0xf4 U+00b6 #PILCROW SIGN 0xf5 U+00a7 #SECTION SIGN 0xf6 U+00f7 #DIVISION SIGN -0xf7 U+00b8 #CEDILLA -0xf8 U+00b0 #DEGREE SIGN -0xf9 U+00a8 #DIAERESIS -0xfa U+00b7 #MIDDLE DOT +0xf7 U+00b8 U+0327 #CEDILLA +0xf8 U+00b0 U+030a #DEGREE SIGN +0xf9 U+00a8 U+0308 #DIAERESIS +0xfa U+00b7 U+0307 U+0387 U+2027 #MIDDLE DOT 0xfb U+00b9 #SUPERSCRIPT ONE 0xfc U+00b3 #SUPERSCRIPT THREE 0xfd U+00b2 #SUPERSCRIPT TWO 0xfe U+25a0 #BLACK SQUARE 0xff U+00a0 #NO-BREAK SPACE +U+2218 " \370 " # RING OPERATOR +U+221b " ROOT\374 " +U+2297 "(\236)" # CIRCLED TIMES +U+2299 "(\372)" # CIRCLED DOT OPERATOR +U+229A "(\370)" # CIRCLED RING OPERATOR +U+22A0 "[\236]" # SQUARED TIMES +U+22A1 "[\372]" # SQUARED DOT OPERATOR +U+22C5 " \372 " # DOT OPERATOR diff --git a/src/chrtrans/cp852_uni.tbl b/src/chrtrans/cp852_uni.tbl index fec2ecf4..c4ac349b 100644 --- a/src/chrtrans/cp852_uni.tbl +++ b/src/chrtrans/cp852_uni.tbl @@ -25,12 +25,14 @@ C852 # The entries are in cp852_DOSLatin2 order # ################## +# Lines with more than one Unicode (U+XXXX) value contain additional +# replacement mappings added for lynx. - kw 0x20-0x7e idem # 0x80 U+00c7 #LATIN CAPITAL LETTER C WITH CEDILLA -0x81 U+00fc #LATIN SMALL LETTER U WITH DIAERESIS -0x82 U+00e9 #LATIN SMALL LETTER E WITH ACUTE +0x81 U+00fc U+03cb #LATIN SMALL LETTER U WITH DIAERESIS +0x82 U+00e9 U+03ad #LATIN SMALL LETTER E WITH ACUTE 0x83 U+00e2 #LATIN SMALL LETTER A WITH CIRCUMFLEX 0x84 U+00e4 #LATIN SMALL LETTER A WITH DIAERESIS 0x85 U+016f #LATIN SMALL LETTER U WITH RING ABOVE @@ -44,7 +46,7 @@ C852 0x8d U+0179 #LATIN CAPITAL LETTER Z WITH ACUTE 0x8e U+00c4 #LATIN CAPITAL LETTER A WITH DIAERESIS 0x8f U+0106 #LATIN CAPITAL LETTER C WITH ACUTE -0x90 U+00c9 #LATIN CAPITAL LETTER E WITH ACUTE +0x90 U+00c9 U+0388 #LATIN CAPITAL LETTER E WITH ACUTE 0x91 U+0139 #LATIN CAPITAL LETTER L WITH ACUTE 0x92 U+013a #LATIN SMALL LETTER L WITH ACUTE 0x93 U+00f4 #LATIN SMALL LETTER O WITH CIRCUMFLEX @@ -54,25 +56,25 @@ C852 0x97 U+015a #LATIN CAPITAL LETTER S WITH ACUTE 0x98 U+015b #LATIN SMALL LETTER S WITH ACUTE 0x99 U+00d6 #LATIN CAPITAL LETTER O WITH DIAERESIS -0x9a U+00dc #LATIN CAPITAL LETTER U WITH DIAERESIS +0x9a U+00dc U+03ab #LATIN CAPITAL LETTER U WITH DIAERESIS 0x9b U+0164 #LATIN CAPITAL LETTER T WITH CARON 0x9c U+0165 #LATIN SMALL LETTER T WITH CARON 0x9d U+0141 #LATIN CAPITAL LETTER L WITH STROKE 0x9e U+00d7 #MULTIPLICATION SIGN -0x9f U+010d #LATIN SMALL LETTER C WITH CARON -0xa0 U+00e1 #LATIN SMALL LETTER A WITH ACUTE -0xa1 U+00ed #LATIN SMALL LETTER I WITH ACUTE -0xa2 U+00f3 #LATIN SMALL LETTER O WITH ACUTE -0xa3 U+00fa #LATIN SMALL LETTER U WITH ACUTE +0x9f U+010d U+02a7 U+0447 #LATIN SMALL LETTER C WITH CARON +0xa0 U+00e1 U+03ac #LATIN SMALL LETTER A WITH ACUTE +0xa1 U+00ed U+03af #LATIN SMALL LETTER I WITH ACUTE +0xa2 U+00f3 U+03cc #LATIN SMALL LETTER O WITH ACUTE +0xa3 U+00fa U+03cd #LATIN SMALL LETTER U WITH ACUTE 0xa4 U+0104 #LATIN CAPITAL LETTER A WITH OGONEK 0xa5 U+0105 #LATIN SMALL LETTER A WITH OGONEK -0xa6 U+017d #LATIN CAPITAL LETTER Z WITH CARON -0xa7 U+017e #LATIN SMALL LETTER Z WITH CARON +0xa6 U+017d U+0416 #LATIN CAPITAL LETTER Z WITH CARON +0xa7 U+017e U+0436 #LATIN SMALL LETTER Z WITH CARON 0xa8 U+0118 #LATIN CAPITAL LETTER E WITH OGONEK 0xa9 U+0119 #LATIN SMALL LETTER E WITH OGONEK 0xaa U+00ac #NOT SIGN 0xab U+017a #LATIN SMALL LETTER Z WITH ACUTE -0xac U+010c #LATIN CAPITAL LETTER C WITH CARON +0xac U+010c U+0427 #LATIN CAPITAL LETTER C WITH CARON 0xad U+015f #LATIN SMALL LETTER S WITH CEDILLA 0xae U+00ab #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0xaf U+00bb #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK @@ -81,7 +83,7 @@ C852 0xb2 U+2593 #DARK SHADE 0xb3 U+2502 #BOX DRAWINGS LIGHT VERTICAL 0xb4 U+2524 #BOX DRAWINGS LIGHT VERTICAL AND LEFT -0xb5 U+00c1 #LATIN CAPITAL LETTER A WITH ACUTE +0xb5 U+00c1 U+0386 #LATIN CAPITAL LETTER A WITH ACUTE 0xb6 U+00c2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX 0xb7 U+011a #LATIN CAPITAL LETTER E WITH CARON 0xb8 U+015e #LATIN CAPITAL LETTER S WITH CEDILLA @@ -114,7 +116,7 @@ C852 0xd3 U+00cb #LATIN CAPITAL LETTER E WITH DIAERESIS 0xd4 U+010f #LATIN SMALL LETTER D WITH CARON 0xd5 U+0147 #LATIN CAPITAL LETTER N WITH CARON -0xd6 U+00cd #LATIN CAPITAL LETTER I WITH ACUTE +0xd6 U+00cd U+038a #LATIN CAPITAL LETTER I WITH ACUTE 0xd7 U+00ce #LATIN CAPITAL LETTER I WITH CIRCUMFLEX 0xd8 U+011b #LATIN SMALL LETTER E WITH CARON 0xd9 U+2518 #BOX DRAWINGS LIGHT UP AND LEFT @@ -124,14 +126,14 @@ C852 0xdd U+0162 #LATIN CAPITAL LETTER T WITH CEDILLA 0xde U+016e #LATIN CAPITAL LETTER U WITH RING ABOVE 0xdf U+2580 #UPPER HALF BLOCK -0xe0 U+00d3 #LATIN CAPITAL LETTER O WITH ACUTE +0xe0 U+00d3 U+038c #LATIN CAPITAL LETTER O WITH ACUTE 0xe1 U+00df #LATIN SMALL LETTER SHARP S 0xe2 U+00d4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX 0xe3 U+0143 #LATIN CAPITAL LETTER N WITH ACUTE 0xe4 U+0144 #LATIN SMALL LETTER N WITH ACUTE 0xe5 U+0148 #LATIN SMALL LETTER N WITH CARON -0xe6 U+0160 #LATIN CAPITAL LETTER S WITH CARON -0xe7 U+0161 #LATIN SMALL LETTER S WITH CARON +0xe6 U+0160 U+0428 #LATIN CAPITAL LETTER S WITH CARON +0xe7 U+0161 U+0448 #LATIN SMALL LETTER S WITH CARON 0xe8 U+0154 #LATIN CAPITAL LETTER R WITH ACUTE 0xe9 U+00da #LATIN CAPITAL LETTER U WITH ACUTE 0xea U+0155 #LATIN SMALL LETTER R WITH ACUTE @@ -139,21 +141,28 @@ C852 0xec U+00fd #LATIN SMALL LETTER Y WITH ACUTE 0xed U+00dd #LATIN CAPITAL LETTER Y WITH ACUTE 0xee U+0163 #LATIN SMALL LETTER T WITH CEDILLA -0xef U+00b4 #ACUTE ACCENT +0xef U+00b4 U+0301 #ACUTE ACCENT 0xf0 U+00ad #SOFT HYPHEN -0xf1 U+02dd #DOUBLE ACUTE ACCENT -0xf2 U+02db #OGONEK -0xf3 U+02c7 #CARON -0xf4 U+02d8 #BREVE +0xf1 U+02dd U+030b #DOUBLE ACUTE ACCENT +0xf2 U+02db U+0328 #OGONEK +0xf3 U+02c7 U+030c #CARON +0xf4 U+02d8 U+0306 #BREVE 0xf5 U+00a7 #SECTION SIGN 0xf6 U+00f7 #DIVISION SIGN -0xf7 U+00b8 #CEDILLA -0xf8 U+00b0 #DEGREE SIGN -0xf9 U+00a8 #DIAERESIS -0xfa U+02d9 #DOT ABOVE +0xf7 U+00b8 U+0327 #CEDILLA +0xf8 U+00b0 U+030a #DEGREE SIGN +0xf9 U+00a8 U+0308 #DIAERESIS +0xfa U+02d9 U+0307 U+0387 #DOT ABOVE 0xfb U+0171 #LATIN SMALL LETTER U WITH DOUBLE ACUTE 0xfc U+0158 #LATIN CAPITAL LETTER R WITH CARON 0xfd U+0159 #LATIN SMALL LETTER R WITH CARON 0xfe U+25a0 #BLACK SQUARE 0xff U+00a0 #NO-BREAK SPACE +U+2218 " \370 " # RING OPERATOR +U+2297 "(\236)" # CIRCLED TIMES +U+2299 "(\372)" # CIRCLED DOT OPERATOR +U+229A "(\370)" # CIRCLED RING OPERATOR +U+22A0 "[\236]" # SQUARED TIMES +U+22A1 "[\372]" # SQUARED DOT OPERATOR +U+22C5 " \372 " # DOT OPERATOR diff --git a/src/chrtrans/def7_uni.tbl b/src/chrtrans/def7_uni.tbl index 39dc165d..ffc217fe 100644 --- a/src/chrtrans/def7_uni.tbl +++ b/src/chrtrans/def7_uni.tbl @@ -67,7 +67,7 @@ U+00bd: 1/2 U+00be: 3/4 U+00bf:? 0x41 U+00c0-U+00c3 -U+00c4:A: +U+00c4 "Ae" # Ä, not the best choice for some languages. U+00c5:AA U+00c6:AE U+00c7:C, @@ -76,17 +76,17 @@ U+00c7:C, U+00d0:D- 0x4e U+00d1 0x4f U+00d2-U+00d5 -U+00d6:O: +U+00d6 "Oe" # Ö, not the best choice for some languages. U+00d7: * U+00d8:O/ 0x55 U+00d9-U+00db -U+00dc:U: +U+00dc "Ue" # Ü, not the best choice for some languages. 0x59 U+00dd U+00de:TH U+00df:ss U+00e0:`a 0x61 U+00e1-U+00e3 -U+00e4:a: +U+00e4 "ae" # ä, not the best choice for some languages. U+00e5:aa U+00e6:ae U+00e7:c, @@ -96,11 +96,11 @@ U+00ec:`i U+00f0:d- 0x6e U+00f1 0x6f U+00f2-U+00f5 -U+00f6:o: +U+00f6 "oe" # ö, not the best choice for some languages. U+00f7:-: U+00f8:o/ 0x75 U+00f9-U+00fb -U+00fc:u: +U+00fc "ue" # ü, not the best choice for some languages. 0x79 U+00fd U+00fe:th 0x79 U+00ff @@ -165,7 +165,7 @@ U+0171:u" 0x77 U+0175 0x59 U+0176 U+0178 0x79 U+0177 -0x5a U+0179 U+017b U+017d +0x5a U+0179 U+017b U+017d U+021d 0x7a U+017a U+017c U+017e U+017f:s1 U+0187:C2 @@ -314,7 +314,7 @@ U+028d:w<vls> U+028e:l^ U+028f:I. U+0290:z. -U+0292:ed +U+0292:Z 0x3f U+0294 # LATIN SMALL LETTER GLOTTAL STOP -> ? U+0295:H<vcd> U+0296:l! @@ -326,6 +326,9 @@ U+029b:G` U+029e:k! 0x4c U+029F # LATIN LETTER SMALL CAPITAL L U+02a0:q` +U+02a4:d3 +U+02a6:ts +U+02a7:tS U+02b0:<h> U+02b1:<?> 0x3b U+02b2 U+0321 @@ -335,23 +338,50 @@ U+02bb:;S 0x60 U+02bc U+02c6:^ U+02c7:'< +U+02c8:| U+02c9:1- U+02cb:1! 0x3a U+02d0 +U+02d1 ":\\" +0x2b U+02d6 +0x2d U+02d7 U+02d8:'( U+02d9:'. U+02da:'0 U+02db:'; U+02dc:~ U+02dd:'" +U+02e5:_T +U+02e6:_H +U+02e7:_M +U+02e8:_L +U+02e9:_B +U+02ec:_v +U+02ee:'' +0x60 U+0300 +0x27 U+0301 +0x5e U+0302 0x7e U+0303 U+0334 +U+030b:'' +0x7c U+030d +U+030e:|| +U+030f:`` 0x2e U+0322 U+0323 U+0324:<?> U+0325:<o> +0x2c U+0326 U+0327 0x2d U+0329 0x5b U+032a U+032b:<w> U+0334:<H> +0x2f U+0337 U+0338 +U+0340:` +U+0341:' +U+0342:~ +U+0344:'% +U+0345:j3 +U+0347:= +U+0360:~~ U+0374:' U+0375:, U+037a:j3 @@ -359,12 +389,12 @@ U+037e:?% U+0384:'* U+0385:'% # Greek letters -U+0386:A% +U+0386:A' U+0387:.* -U+0388:E% +U+0388:E' U+0389:Y% -U+038a:I% -U+038c:O% +U+038a:I' +U+038c:O' U+038e:U% U+038f:W% U+0390:i3 @@ -391,13 +421,13 @@ U+03a5:U U+03a6:F U+03a7:X U+03a8:Q -U+03a9:W +U+03a9:W* U+03aa:J U+03ab:V* -U+03ac:a% -U+03ad:e% +U+03ac:a' +U+03ad:e' U+03ae:y% -U+03af:i% +U+03af:i' U+03b0:u3 U+03b1:a U+03b2:b @@ -426,7 +456,7 @@ U+03c8:q U+03c9:w U+03ca:j U+03cb:v* -U+03cc:o% +U+03cc:o' U+03cd:u% U+03ce:w% # Greek symbols @@ -435,6 +465,7 @@ U+03d1 "theta " U+03d2 "upsi " U+03d5 "phi " U+03d6 "pi " +U+03d7:k. U+03da:T3 U+03db:t3 U+03dc:M3 @@ -445,6 +476,7 @@ U+03e0:P3 U+03e1:p3 U+03f0 "kappa " U+03f1 "rho " +U+03f3:J U+03f4:'% U+03f5:j3 # Cyrillic capital letters @@ -555,7 +587,8 @@ U+0480:C3 U+0481:c3 U+0490:G3 U+0491:g3 - +U+04d4:AE +U+04d5:ae # These may make Yiddish slightly more readable, until we have # something better. @@ -1272,14 +1305,30 @@ U+1ef6:Y2 U+1ef7:y2 U+1ef8:Y? U+1ef9:y? -U+1f00:;' -U+1f01:,' -U+1f02:;! -U+1f03:,! -U+1f04:?; -U+1f05:?, -U+1f06:!: -U+1f07:?: +0x61 U+1f00 +U+1f01:ha +U+1f02:`a +U+1f03:h`a +U+1f04:a' +U+1f05:ha' +U+1f06:a~ +U+1f07:ha~ +0x41 U+1f08 +U+1f09:hA +U+1f0a:`A +U+1f0b:h`A +U+1f0c:A' +U+1f0d:hA' +U+1f0e:A~ +U+1f0f:hA~ +U+1f11:he +U+1f19:hE +U+1f31:hi +U+1f39:hI +U+1f41:ho +U+1f49:hO +U+1f51:hu +U+1f59:hU U+1fbf:,, U+1fc0:?* U+1fc1:?: @@ -1289,6 +1338,8 @@ U+1fcf:?, U+1fdd:;! U+1fde:;' U+1fdf:?; +U+1fe5:rh +U+1fec:Rh U+1fed:!: U+1fef:!* U+1ffe:;; @@ -1299,7 +1350,7 @@ U+2003 " " U+200e:(->) U+200f:(<-) U+200a: -0x2d U+2010 U+2013 U+2015 # hyphen-like +0x2d U+2010 U+2011 U+2013 U+2015 # hyphen-like U+2014 "--" U+2016:|| U+2017:=2 @@ -1309,8 +1360,12 @@ U+2017:=2 U+2020:/- U+2021:/= U+2022 " o " +0x2e U+2024 U+2025:.. U+2026:... +U+2027:. +U+2028 "\015" +U+2029 "\015\012" # Dont wanna see these: # POP DIRECTIONAL FORMATTING 202C @@ -1318,6 +1373,7 @@ U+202c: # LEFT-TO-RIGHT OVERRIDE 202D U+202d: +U+202f "" U+2030: 0/00 U+2032:' U+2033:'' @@ -1333,6 +1389,8 @@ U+203c:!! U+203e:'- 0x2d U+2043 # HYPHEN BULLET ? U+2044:/ +U+2048:?! +U+2049:!? # end of General punctuation. U+2070:^0 U+2074:^4 @@ -1370,22 +1428,31 @@ U+20a7:Pt U+20a9:W= # New euro currency sign glyph: U+20AC:EUR +U+2100:a/c +U+2101:a/s U+2103:oC U+2105:c/o +U+2106:c/u U+2109:oF +0x67 U+210a +0x68 U+210e +U+210f "\134hbar " U+2111:Im +U+2113:l U+2116:No. U+2117:PO U+2118:P U+211C:Re U+211e:Rx U+2120:(SM) +U+2121:TEL # TRADE MARK SIGN: U+2122:(TM) U+2126:Ohm 0x4b U+212A # Kelvin sign - K U+212b:Ang. U+212E:est. +0x6f U+2134 U+2135 "Aleph " U+2136 "Bet " U+2137 "Gimel " @@ -1402,6 +1469,7 @@ U+215b: 1/8 U+215c: 3/8 U+215d: 5/8 U+215e: 7/8 +U+215f: 1/ U+2160:I U+2161:II U+2162:III @@ -1459,25 +1527,34 @@ U+2200:FA U+2202:\partial U+2203:TE U+2205:{} -U+2206:decr. +U+2206:Delta U+2207:Nabla U+2208:(- U+2209:!(- +U+220a:(- U+220b:-) +U+220c:!-) +U+220d:-) +U+220e " qed" U+220f:\prod U+2211:\sum U+2212: - U+2213:-/+ U+2214:.+ +0x2f U+2215 +U+2216 " - " U+2217:* U+2218:Ob U+2219:sb U+221a " SQRT " +U+221b " ROOT3 " +U+221c " ROOT4 " U+221d:0( U+221e:infty U+221f:-L U+2220:-V U+2225:PP +U+2226 " !PP " U+2227:AND U+2228:OR U+2229:(U @@ -1495,30 +1572,69 @@ U+2243:?- U+2245:?= # ALMOST EQUAL TO: U+2248:~= +U+2249 " !~= " U+224c:=? U+2253:HI +U+2254::= +U+2255:=: U+2260:!= U+2261:=3 +U+2262 " !=3 " U+2264:=< U+2265:>= +U+2266:.LE. +U+2267:.GE. +U+2268:.LT.NOT.EQ. +U+2269:.GT.NOT.EQ. U+226a:<< U+226b:>> U+226e:!< U+226f:!> +U+2276 " <> " +U+2277 " >< " U+2282:(C U+2283:)C -U+2282:!(C +U+2284 " !(C " +U+2285 " !)C " U+2286:(_ U+2287:)_ -U+2295:(+) -U+2297:(x) -U+2299:0. -U+229a:02 +U+2295 "(+)" # CIRCLED PLUS +U+2296 "(-)" # CIRCLED MINUS +U+2297 "(x)" # CIRCLED TIMES +U+2298 "(/)" # CIRCLED DIVISION SLASH +U+2299 "(.)" # CIRCLED DOT OPERATOR +U+229A "(o)" # CIRCLED RING OPERATOR +U+229B "(*)" # CIRCLED ASTERISK OPERATOR +U+229C "(=)" # CIRCLED EQUALS +U+229D "(-)" # CIRCLED DASH +U+229E "[+]" # SQUARED PLUS +U+229F "[-]" # SQUARED MINUS +U+22A0 "[x]" # SQUARED TIMES +U+22A1 "[.]" # SQUARED DOT OPERATOR U+22a5:-T -U+22c5:.P +U+22A7 " MODELS " # MODELS +U+22A8 " TRUE " # TRUE +U+22A9 " FORCES " # FORCES +U+22AC " !PROVES " # DOES NOT PROVE +U+22AD " NOT TRUE " # NOT TRUE +U+22AE " !FORCES " # DOES NOT FORCE +U+22B2 " NORMAL SUBGROUP OF " +U+22B3 " CONTAINS AS NORMAL SUBGROUP " +U+22B4 " NORMAL SUBGROUP OF OR EQUAL TO " +U+22B5 " CONTAINS AS NORMAL SUBGROUP OR EQUAL TO " +U+22B8 " MULTIMAP " # MULTIMAP +U+22BA " INTERCALATE " # INTERCALATE +U+22BB " XOR " # XOR +U+22BC " NAND " # NAND +U+22C5 " DOT " # DOT OPERATOR +U+22d6:<. +U+22d7:>. +U+22d8:<<< +U+22d9:>>> U+22ee::3 U+22ef:.3 U+2302:Eh +U+2307:~~ U+2308:<7 U+2309:>7 U+230a:7< @@ -1529,7 +1645,10 @@ U+2315:TR U+2318:88 U+2320:Iu U+2321:Il +U+2322::( U+2323::) +U+2324:|^| +U+2327:[X] U+2329:</ U+232a:/> U+2423:Vs @@ -1716,16 +1835,16 @@ U+25a9:RX U+25aa:sB U+25ac:SR U+25ad:Or -U+25b2:UT +U+25b2:^ U+25b3:uT -U+25b6:PR +U+25b6:|> U+25b7:Tr -U+25ba:PR -U+25bc:Dt +U+25ba:|> +U+25bc:v U+25bd:dT -U+25c0:PL +U+25c0:<| U+25c1:Tl -U+25c4:PL +U+25c4:<| U+25c6:Db U+25c7:Dw U+25ca:LZ @@ -1745,6 +1864,9 @@ U+260e:TEL U+260f:tel U+261c:<-- U+261e:--> +U+2621 "CAUTION " +U+2627:XP +U+2639::-( U+263a::-) U+263b:(-: U+263c:SU @@ -1792,6 +1914,7 @@ U+3016:(I U+3017:)I U+301c:-? U+3020:=T:) +0x20 U+303f U+3041:A5 U+3042:a5 U+3043:I5 @@ -2163,6 +2286,11 @@ U+fefa:lh. U+fefb:la- U+fefc:la. +0x21-0x7e U+ff01-U+ff5e +0x2e U+ff61 +0x22 U+ff62 U+ff63 +0x2c U+ff64 + # Symbols for C0 and C1 control characters, in case they get through... U+0000:NUL U+0001:SH @@ -2207,6 +2335,7 @@ U+007f:DT #U+0083:NH #U+0084:IN #U+0085:NL +U+0085 "\012" #U+0086:SA #U+0087:ES #U+0088:HS @@ -2236,5 +2365,5 @@ U+007f:DT # Let's try to show a question mark for character that cannot # be shown. U+fffd is used for invalid characters. -# It works, but let's stick with UHHH representatiion. - FM -#U+fffd:? +# It works, but let's stick with UHHH representation. - FM +#U+fffd "?" diff --git a/src/chrtrans/iso01_uni.tbl b/src/chrtrans/iso01_uni.tbl index 2d138ec2..5a47e2f8 100644 --- a/src/chrtrans/iso01_uni.tbl +++ b/src/chrtrans/iso01_uni.tbl @@ -52,6 +52,9 @@ C819 # # Any comments or problems, contact <John_Jenkins@taligent.com> # +# Lines with more than one Unicode (U+XXXX) value contain additional +# replacement mappings added for lynx. - kw +# 0x20-0x7e idem 0xa0-0xff idem # iso 8859-1 special: trivial mapping to Unicode # @@ -150,7 +153,7 @@ C819 #0x7C U+007C # VERTICAL LINE #0x7D U+007D # RIGHT CURLY BRACKET #0x7E U+007E # TILDE -#0xA0 U+00A0 # NO-BREAK SPACE +0xA0 U+00A0 U+2007 # NO-BREAK SPACE #0xA1 U+00A1 # INVERTED EXCLAMATION MARK #0xA2 U+00A2 # CENT SIGN #0xA3 U+00A3 # POUND SIGN @@ -158,23 +161,23 @@ C819 #0xA5 U+00A5 # YEN SIGN #0xA6 U+00A6 # BROKEN BAR #0xA7 U+00A7 # SECTION SIGN -#0xA8 U+00A8 # DIAERESIS +0xA8 U+00A8 U+0308 # DIAERESIS #0xA9 U+00A9 # COPYRIGHT SIGN #0xAA U+00AA # FEMININE ORDINAL INDICATOR #0xAB U+00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK #0xAC U+00AC # NOT SIGN #0xAD U+00AD # SOFT HYPHEN #0xAE U+00AE # REGISTERED SIGN -#0xAF U+00AF # MACRON -#0xB0 U+00B0 # DEGREE SIGN +0xAF U+00AF U+0304 # MACRON +0xB0 U+00B0 U+030a # DEGREE SIGN #0xB1 U+00B1 # PLUS-MINUS SIGN #0xB2 U+00B2 # SUPERSCRIPT TWO #0xB3 U+00B3 # SUPERSCRIPT THREE #0xB4 U+00B4 # ACUTE ACCENT -#0xB5 U+00B5 # MICRO SIGN +0xB5 U+00B5 U+03bc # MICRO SIGN #0xB6 U+00B6 # PILCROW SIGN -#0xB7 U+00B7 # MIDDLE DOT -#0xB8 U+00B8 # CEDILLA +0xB7 U+00B7 U+0307 U+0387 U+2027 # MIDDLE DOT +0xB8 U+00B8 U+0327 # CEDILLA #0xB9 U+00B9 # SUPERSCRIPT ONE #0xBA U+00BA # MASCULINE ORDINAL INDICATOR #0xBB U+00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK @@ -250,4 +253,11 @@ C819 0xd0 U+0110 # Dstrok and ETH are nearly the same... -U+2297 "(\327)" +U+2218 " \260 " # RING OPERATOR +U+221b " ROOT\263 " +U+2297 "(\327)" # CIRCLED TIMES +U+2299 "(\267)" # CIRCLED DOT OPERATOR +U+229A "(\260)" # CIRCLED RING OPERATOR +U+22A0 "[\327]" # SQUARED TIMES +U+22A1 "[\267]" # SQUARED DOT OPERATOR +U+22C5 " \267 " # DOT OPERATOR diff --git a/src/chrtrans/iso02_uni.tbl b/src/chrtrans/iso02_uni.tbl index 383b4674..b245be55 100644 --- a/src/chrtrans/iso02_uni.tbl +++ b/src/chrtrans/iso02_uni.tbl @@ -46,6 +46,9 @@ C912 # # Any comments or problems, contact <John_Jenkins@taligent.com> # +# Lines with more than one Unicode (U+XXXX) value contain additional +# replacement mappings added for lynx. - kw +# 0x20-0x7e idem # #0x20 U+0020 # SPACE @@ -145,35 +148,35 @@ C912 #0x7E U+007E # TILDE 0xA0 U+00A0 # NO-BREAK SPACE 0xA1 U+0104 # LATIN CAPITAL LETTER A WITH OGONEK -0xA2 U+02D8 # BREVE +0xA2 U+02D8 U+0306 # BREVE 0xA3 U+0141 # LATIN CAPITAL LETTER L WITH STROKE 0xA4 U+00A4 # CURRENCY SIGN 0xA5 U+013D # LATIN CAPITAL LETTER L WITH CARON 0xA6 U+015A # LATIN CAPITAL LETTER S WITH ACUTE 0xA7 U+00A7 # SECTION SIGN -0xA8 U+00A8 # DIAERESIS -0xA9 U+0160 # LATIN CAPITAL LETTER S WITH CARON +0xA8 U+00A8 U+0308 # DIAERESIS +0xA9 U+0160 U+0428 # LATIN CAPITAL LETTER S WITH CARON 0xAA U+015E # LATIN CAPITAL LETTER S WITH CEDILLA 0xAB U+0164 # LATIN CAPITAL LETTER T WITH CARON 0xAC U+0179 # LATIN CAPITAL LETTER Z WITH ACUTE 0xAD U+00AD # SOFT HYPHEN -0xAE U+017D # LATIN CAPITAL LETTER Z WITH CARON +0xAE U+017D U+0416 # LATIN CAPITAL LETTER Z WITH CARON 0xAF U+017B # LATIN CAPITAL LETTER Z WITH DOT ABOVE -0xB0 U+00B0 # DEGREE SIGN +0xB0 U+00B0 U+030a # DEGREE SIGN 0xB1 U+0105 # LATIN SMALL LETTER A WITH OGONEK -0xB2 U+02DB # OGONEK +0xB2 U+02DB U+0328 # OGONEK 0xB3 U+0142 # LATIN SMALL LETTER L WITH STROKE 0xB4 U+00B4 # ACUTE ACCENT 0xB5 U+013E # LATIN SMALL LETTER L WITH CARON 0xB6 U+015B # LATIN SMALL LETTER S WITH ACUTE -0xB7 U+02C7 # CARON -0xB8 U+00B8 # CEDILLA -0xB9 U+0161 # LATIN SMALL LETTER S WITH CARON +0xB7 U+02C7 U+030c # CARON +0xB8 U+00B8 U+0327 # CEDILLA +0xB9 U+0161 U+0448 # LATIN SMALL LETTER S WITH CARON 0xBA U+015F # LATIN SMALL LETTER S WITH CEDILLA 0xBB U+0165 # LATIN SMALL LETTER T WITH CARON 0xBC U+017A # LATIN SMALL LETTER Z WITH ACUTE -0xBD U+02DD # DOUBLE ACUTE ACCENT -0xBE U+017E # LATIN SMALL LETTER Z WITH CARON +0xBD U+02DD U+030b # DOUBLE ACUTE ACCENT +0xBE U+017E U+0436 # LATIN SMALL LETTER Z WITH CARON 0xBF U+017C # LATIN SMALL LETTER Z WITH DOT ABOVE 0xC0 U+0154 # LATIN CAPITAL LETTER R WITH ACUTE 0xC1 U+00C1 # LATIN CAPITAL LETTER A WITH ACUTE @@ -183,7 +186,7 @@ C912 0xC5 U+0139 # LATIN CAPITAL LETTER L WITH ACUTE 0xC6 U+0106 # LATIN CAPITAL LETTER C WITH ACUTE 0xC7 U+00C7 # LATIN CAPITAL LETTER C WITH CEDILLA -0xC8 U+010C # LATIN CAPITAL LETTER C WITH CARON +0xC8 U+010C U+0427 # LATIN CAPITAL LETTER C WITH CARON 0xC9 U+00C9 # LATIN CAPITAL LETTER E WITH ACUTE 0xCA U+0118 # LATIN CAPITAL LETTER E WITH OGONEK 0xCB U+00CB # LATIN CAPITAL LETTER E WITH DIAERESIS @@ -198,7 +201,7 @@ C912 0xD4 U+00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 0xD5 U+0150 # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE 0xD6 U+00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS -0xD7 U+00D7 # MULTIPLICATION SIGN +0xD7 U+00D7 U+00b7 # MULTIPLICATION SIGN 0xD8 U+0158 # LATIN CAPITAL LETTER R WITH CARON 0xD9 U+016E # LATIN CAPITAL LETTER U WITH RING ABOVE 0xDA U+00DA # LATIN CAPITAL LETTER U WITH ACUTE @@ -215,7 +218,7 @@ C912 0xE5 U+013A # LATIN SMALL LETTER L WITH ACUTE 0xE6 U+0107 # LATIN SMALL LETTER C WITH ACUTE 0xE7 U+00E7 # LATIN SMALL LETTER C WITH CEDILLA -0xE8 U+010D # LATIN SMALL LETTER C WITH CARON +0xE8 U+010D U+02a7 U+0447 # LATIN SMALL LETTER C WITH CARON 0xE9 U+00E9 # LATIN SMALL LETTER E WITH ACUTE 0xEA U+0119 # LATIN SMALL LETTER E WITH OGONEK 0xEB U+00EB # LATIN SMALL LETTER E WITH DIAERESIS @@ -238,8 +241,15 @@ C912 0xFC U+00FC # LATIN SMALL LETTER U WITH DIAERESIS 0xFD U+00FD # LATIN SMALL LETTER Y WITH ACUTE 0xFE U+0163 # LATIN SMALL LETTER T WITH CEDILLA -0xFF U+02D9 # DOT ABOVE +0xFF U+02D9 U+0307 U+0387 # DOT ABOVE 0xd0 U+00d0 # Dstrok and ETH are nearly the same... +U+2218 " \260 " # RING OPERATOR +U+2297 "(\327)" # CIRCLED TIMES +U+2299 "(\377)" # CIRCLED DOT OPERATOR +U+229A "(\260)" # CIRCLED RING OPERATOR +U+22A0 "[\327]" # SQUARED TIMES +U+22A1 "[\377]" # SQUARED DOT OPERATOR +U+22C5 " \377 " # DOT OPERATOR diff --git a/src/chrtrans/iso05_uni.tbl b/src/chrtrans/iso05_uni.tbl index 1436a687..7eeba113 100644 --- a/src/chrtrans/iso05_uni.tbl +++ b/src/chrtrans/iso05_uni.tbl @@ -46,6 +46,9 @@ C915 # # Any comments or problems, contact <John_Jenkins@taligent.com> # +# Lines with more than one Unicode (U+XXXX) value contain additional +# replacement mappings added for lynx. - kw +# 0x20-0x7e idem # #0x20 U+0020 # SPACE @@ -149,8 +152,8 @@ C915 0xA3 U+0403 # CYRILLIC CAPITAL LETTER GJE 0xA4 U+0404 # CYRILLIC CAPITAL LETTER UKRAINIAN IE 0xA5 U+0405 # CYRILLIC CAPITAL LETTER DZE -0xA6 U+0406 # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I -0xA7 U+0407 # CYRILLIC CAPITAL LETTER YI +0xA6 U+0406 U+0130 # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +0xA7 U+0407 U+03AA # CYRILLIC CAPITAL LETTER YI 0xA8 U+0408 # CYRILLIC CAPITAL LETTER JE 0xA9 U+0409 # CYRILLIC CAPITAL LETTER LJE 0xAA U+040A # CYRILLIC CAPITAL LETTER NJE @@ -162,28 +165,28 @@ C915 0xB0 U+0410 # CYRILLIC CAPITAL LETTER A 0xB1 U+0411 # CYRILLIC CAPITAL LETTER BE 0xB2 U+0412 # CYRILLIC CAPITAL LETTER VE -0xB3 U+0413 # CYRILLIC CAPITAL LETTER GHE +0xB3 U+0413 U+0393 # CYRILLIC CAPITAL LETTER GHE 0xB4 U+0414 # CYRILLIC CAPITAL LETTER DE 0xB5 U+0415 # CYRILLIC CAPITAL LETTER IE -0xB6 U+0416 # CYRILLIC CAPITAL LETTER ZHE +0xB6 U+0416 U+017d # CYRILLIC CAPITAL LETTER ZHE 0xB7 U+0417 # CYRILLIC CAPITAL LETTER ZE 0xB8 U+0418 # CYRILLIC CAPITAL LETTER I 0xB9 U+0419 # CYRILLIC CAPITAL LETTER SHORT I 0xBA U+041A # CYRILLIC CAPITAL LETTER KA -0xBB U+041B # CYRILLIC CAPITAL LETTER EL +0xBB U+041B U+039b # CYRILLIC CAPITAL LETTER EL 0xBC U+041C # CYRILLIC CAPITAL LETTER EM 0xBD U+041D # CYRILLIC CAPITAL LETTER EN 0xBE U+041E # CYRILLIC CAPITAL LETTER O -0xBF U+041F # CYRILLIC CAPITAL LETTER PE +0xBF U+041F U+03a0 # CYRILLIC CAPITAL LETTER PE 0xC0 U+0420 # CYRILLIC CAPITAL LETTER ER 0xC1 U+0421 # CYRILLIC CAPITAL LETTER ES 0xC2 U+0422 # CYRILLIC CAPITAL LETTER TE 0xC3 U+0423 # CYRILLIC CAPITAL LETTER U -0xC4 U+0424 # CYRILLIC CAPITAL LETTER EF +0xC4 U+0424 U+03a6 # CYRILLIC CAPITAL LETTER EF 0xC5 U+0425 # CYRILLIC CAPITAL LETTER HA 0xC6 U+0426 # CYRILLIC CAPITAL LETTER TSE -0xC7 U+0427 # CYRILLIC CAPITAL LETTER CHE -0xC8 U+0428 # CYRILLIC CAPITAL LETTER SHA +0xC7 U+0427 U+010c # CYRILLIC CAPITAL LETTER CHE +0xC8 U+0428 U+0160 # CYRILLIC CAPITAL LETTER SHA 0xC9 U+0429 # CYRILLIC CAPITAL LETTER SHCHA 0xCA U+042A # CYRILLIC CAPITAL LETTER HARD SIGN 0xCB U+042B # CYRILLIC CAPITAL LETTER YERU @@ -197,28 +200,28 @@ C915 0xD3 U+0433 # CYRILLIC SMALL LETTER GHE 0xD4 U+0434 # CYRILLIC SMALL LETTER DE 0xD5 U+0435 # CYRILLIC SMALL LETTER IE -0xD6 U+0436 # CYRILLIC SMALL LETTER ZHE +0xD6 U+0436 U+017e # CYRILLIC SMALL LETTER ZHE 0xD7 U+0437 # CYRILLIC SMALL LETTER ZE 0xD8 U+0438 # CYRILLIC SMALL LETTER I 0xD9 U+0439 # CYRILLIC SMALL LETTER SHORT I 0xDA U+043A # CYRILLIC SMALL LETTER KA -0xDB U+043B # CYRILLIC SMALL LETTER EL +0xDB U+043B U+03bb # CYRILLIC SMALL LETTER EL 0xDC U+043C # CYRILLIC SMALL LETTER EM 0xDD U+043D # CYRILLIC SMALL LETTER EN 0xDE U+043E # CYRILLIC SMALL LETTER O -0xDF U+043F # CYRILLIC SMALL LETTER PE +0xDF U+043F U+03c0 # CYRILLIC SMALL LETTER PE 0xE0 U+0440 # CYRILLIC SMALL LETTER ER 0xE1 U+0441 # CYRILLIC SMALL LETTER ES 0xE2 U+0442 # CYRILLIC SMALL LETTER TE 0xE3 U+0443 # CYRILLIC SMALL LETTER U -0xE4 U+0444 # CYRILLIC SMALL LETTER EF +0xE4 U+0444 U+03c6 # CYRILLIC SMALL LETTER EF 0xE5 U+0445 # CYRILLIC SMALL LETTER HA 0xE6 U+0446 # CYRILLIC SMALL LETTER TSE -0xE7 U+0447 # CYRILLIC SMALL LETTER CHE -0xE8 U+0448 # CYRILLIC SMALL LETTER SHA +0xE7 U+0447 U+010d # CYRILLIC SMALL LETTER CHE +0xE8 U+0448 U+0161 # CYRILLIC SMALL LETTER SHA 0xE9 U+0449 # CYRILLIC SMALL LETTER SHCHA 0xEA U+044A # CYRILLIC SMALL LETTER HARD SIGN -0xEB U+044B # CYRILLIC SMALL LETTER YERU +0xEB U+044B U+0131 # CYRILLIC SMALL LETTER YERU 0xEC U+044C # CYRILLIC SMALL LETTER SOFT SIGN 0xED U+044D # CYRILLIC SMALL LETTER E 0xEE U+044E # CYRILLIC SMALL LETTER YU @@ -230,7 +233,7 @@ C915 0xF4 U+0454 # CYRILLIC SMALL LETTER UKRAINIAN IE 0xF5 U+0455 # CYRILLIC SMALL LETTER DZE 0xF6 U+0456 # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I -0xF7 U+0457 # CYRILLIC SMALL LETTER YI +0xF7 U+0457 U+03CA # CYRILLIC SMALL LETTER YI 0xF8 U+0458 # CYRILLIC SMALL LETTER JE 0xF9 U+0459 # CYRILLIC SMALL LETTER LJE 0xFA U+045A # CYRILLIC SMALL LETTER NJE @@ -240,3 +243,7 @@ C915 0xFE U+045E # CYRILLIC SMALL LETTER SHORT U 0xFF U+045F # CYRILLIC SMALL LETTER DZHE +U+0400 "`\265" +U+040d "`\270" +U+0450 "`\325" +U+045d "`\330" diff --git a/src/chrtrans/iso07_uni.tbl b/src/chrtrans/iso07_uni.tbl index 7e9063ec..458c2389 100644 --- a/src/chrtrans/iso07_uni.tbl +++ b/src/chrtrans/iso07_uni.tbl @@ -46,6 +46,9 @@ C813 # # Any comments or problems, contact <John_Jenkins@taligent.com> # +# Lines with more than one Unicode (U+XXXX) value contain additional +# replacement mappings added for lynx. - kw +# 0x20-0x7e idem # #0x20 U+0020 # SPACE @@ -161,20 +164,20 @@ C813 0xB3 U+00B3 # SUPERSCRIPT THREE 0xB4 U+0384 # GREEK TONOS 0xB5 U+0385 # GREEK DIALYTIKA TONOS -0xB6 U+0386 # GREEK CAPITAL LETTER ALPHA WITH TONOS -0xB7 U+00B7 # MIDDLE DOT -0xB8 U+0388 # GREEK CAPITAL LETTER EPSILON WITH TONOS -0xB9 U+0389 # GREEK CAPITAL LETTER ETA WITH TONOS -0xBA U+038A # GREEK CAPITAL LETTER IOTA WITH TONOS +0xB6 U+0386 U+1fbb # GREEK CAPITAL LETTER ALPHA WITH TONOS +0xB7 U+00B7 U+0307 U+0387 U+2027 # MIDDLE DOT +0xB8 U+0388 U+1fc9 # GREEK CAPITAL LETTER EPSILON WITH TONOS +0xB9 U+0389 U+1fcb # GREEK CAPITAL LETTER ETA WITH TONOS +0xBA U+038A U+1fdb # GREEK CAPITAL LETTER IOTA WITH TONOS 0xBB U+00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -0xBC U+038C # GREEK CAPITAL LETTER OMICRON WITH TONOS +0xBC U+038C U+1ff9 # GREEK CAPITAL LETTER OMICRON WITH TONOS 0xBD U+00BD # VULGAR FRACTION ONE HALF -0xBE U+038E # GREEK CAPITAL LETTER UPSILON WITH TONOS -0xBF U+038F # GREEK CAPITAL LETTER OMEGA WITH TONOS +0xBE U+038E U+1feb # GREEK CAPITAL LETTER UPSILON WITH TONOS +0xBF U+038F U+1ffb # GREEK CAPITAL LETTER OMEGA WITH TONOS 0xC0 U+0390 # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS 0xC1 U+0391 # GREEK CAPITAL LETTER ALPHA 0xC2 U+0392 # GREEK CAPITAL LETTER BETA -0xC3 U+0393 # GREEK CAPITAL LETTER GAMMA +0xC3 U+0393 U+0413 # GREEK CAPITAL LETTER GAMMA 0xC4 U+0394 # GREEK CAPITAL LETTER DELTA 0xC5 U+0395 # GREEK CAPITAL LETTER EPSILON 0xC6 U+0396 # GREEK CAPITAL LETTER ZETA @@ -182,39 +185,39 @@ C813 0xC8 U+0398 # GREEK CAPITAL LETTER THETA 0xC9 U+0399 # GREEK CAPITAL LETTER IOTA 0xCA U+039A # GREEK CAPITAL LETTER KAPPA -0xCB U+039B # GREEK CAPITAL LETTER LAMDA +0xCB U+039B U+041b # GREEK CAPITAL LETTER LAMDA 0xCC U+039C # GREEK CAPITAL LETTER MU 0xCD U+039D # GREEK CAPITAL LETTER NU 0xCE U+039E # GREEK CAPITAL LETTER XI 0xCF U+039F # GREEK CAPITAL LETTER OMICRON -0xD0 U+03A0 # GREEK CAPITAL LETTER PI +0xD0 U+03A0 U+041f # GREEK CAPITAL LETTER PI 0xD1 U+03A1 # GREEK CAPITAL LETTER RHO 0xD3 U+03A3 # GREEK CAPITAL LETTER SIGMA 0xD4 U+03A4 # GREEK CAPITAL LETTER TAU 0xD5 U+03A5 # GREEK CAPITAL LETTER UPSILON -0xD6 U+03A6 # GREEK CAPITAL LETTER PHI -0xD7 U+03A7 # GREEK CAPITAL LETTER CHI +0xD6 U+03A6 U+0424 # GREEK CAPITAL LETTER PHI +0xD7 U+03A7 U+0425 # GREEK CAPITAL LETTER CHI 0xD8 U+03A8 # GREEK CAPITAL LETTER PSI 0xD9 U+03A9 # GREEK CAPITAL LETTER OMEGA 0xDA U+03AA # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA 0xDB U+03AB # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA -0xDC U+03AC # GREEK SMALL LETTER ALPHA WITH TONOS -0xDD U+03AD # GREEK SMALL LETTER EPSILON WITH TONOS -0xDE U+03AE # GREEK SMALL LETTER ETA WITH TONOS -0xDF U+03AF # GREEK SMALL LETTER IOTA WITH TONOS +0xDC U+03AC U+1f71 # GREEK SMALL LETTER ALPHA WITH TONOS +0xDD U+03AD U+1f73 # GREEK SMALL LETTER EPSILON WITH TONOS +0xDE U+03AE U+1f75 # GREEK SMALL LETTER ETA WITH TONOS +0xDF U+03AF U+1f77 # GREEK SMALL LETTER IOTA WITH TONOS 0xE0 U+03B0 # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS 0xE1 U+03B1 # GREEK SMALL LETTER ALPHA 0xE2 U+03B2 # GREEK SMALL LETTER BETA -0xE3 U+03B3 # GREEK SMALL LETTER GAMMA -0xE4 U+03B4 # GREEK SMALL LETTER DELTA +0xE3 U+03B3 U+0263 # GREEK SMALL LETTER GAMMA +0xE4 U+03B4 U+00f0 # GREEK SMALL LETTER DELTA 0xE5 U+03B5 # GREEK SMALL LETTER EPSILON 0xE6 U+03B6 # GREEK SMALL LETTER ZETA 0xE7 U+03B7 # GREEK SMALL LETTER ETA 0xE8 U+03B8 # GREEK SMALL LETTER THETA -0xE9 U+03B9 # GREEK SMALL LETTER IOTA +0xE9 U+03B9 U+0131 # GREEK SMALL LETTER IOTA 0xEA U+03BA # GREEK SMALL LETTER KAPPA 0xEB U+03BB # GREEK SMALL LETTER LAMDA -0xEC U+03BC # GREEK SMALL LETTER MU +0xEC U+03BC U+00b5 # GREEK SMALL LETTER MU 0xED U+03BD # GREEK SMALL LETTER NU 0xEE U+03BE # GREEK SMALL LETTER XI 0xEF U+03BF # GREEK SMALL LETTER OMICRON @@ -223,14 +226,20 @@ C813 0xF2 U+03C2 # GREEK SMALL LETTER FINAL SIGMA 0xF3 U+03C3 # GREEK SMALL LETTER SIGMA 0xF4 U+03C4 # GREEK SMALL LETTER TAU -0xF5 U+03C5 # GREEK SMALL LETTER UPSILON +0xF5 U+03C5 U+028a # GREEK SMALL LETTER UPSILON 0xF6 U+03C6 # GREEK SMALL LETTER PHI 0xF7 U+03C7 # GREEK SMALL LETTER CHI 0xF8 U+03C8 # GREEK SMALL LETTER PSI 0xF9 U+03C9 # GREEK SMALL LETTER OMEGA 0xFA U+03CA # GREEK SMALL LETTER IOTA WITH DIALYTIKA -0xFB U+03CB # GREEK SMALL LETTER UPSILON WITH DIALYTIKA -0xFC U+03CC # GREEK SMALL LETTER OMICRON WITH TONOS -0xFD U+03CD # GREEK SMALL LETTER UPSILON WITH TONOS -0xFE U+03CE # GREEK SMALL LETTER OMEGA WITH TONOS +0xFB U+03CB U+00fc # GREEK SMALL LETTER UPSILON WITH DIALYTIKA +0xFC U+03CC U+1f79 # GREEK SMALL LETTER OMICRON WITH TONOS +0xFD U+03CD U+1f7b # GREEK SMALL LETTER UPSILON WITH TONOS +0xFE U+03CE U+1f7d # GREEK SMALL LETTER OMEGA WITH TONOS +U+2218 " \260 " # RING OPERATOR +U+2209 " !\345 " +U+221b " ROOT\263 " +U+229A "(\260)" # CIRCLED RING OPERATOR +U+02a4 "d\346" +U+20af "\304\361\367" diff --git a/src/chrtrans/iso09_uni.tbl b/src/chrtrans/iso09_uni.tbl index 0b93209c..5dc9660c 100644 --- a/src/chrtrans/iso09_uni.tbl +++ b/src/chrtrans/iso09_uni.tbl @@ -46,7 +46,11 @@ C920 # # Any comments or problems, contact <John_Jenkins@taligent.com> # +# Lines with more than one Unicode (U+XXXX) value contain additional +# replacement mappings added for lynx. - kw +# 0x20-0x7e idem +0x49 U+042b # #0x20 U+0020 # SPACE #0x21 U+0021 # EXCLAMATION MARK @@ -151,23 +155,23 @@ C920 0xA5 U+00A5 # YEN SIGN 0xA6 U+00A6 # BROKEN BAR 0xA7 U+00A7 # SECTION SIGN -0xA8 U+00A8 # DIAERESIS +0xA8 U+00A8 U+0308 # DIAERESIS 0xA9 U+00A9 # COPYRIGHT SIGN 0xAA U+00AA # FEMININE ORDINAL INDICATOR 0xAB U+00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0xAC U+00AC # NOT SIGN 0xAD U+00AD # SOFT HYPHEN 0xAE U+00AE # REGISTERED SIGN -0xAF U+00AF # MACRON -0xB0 U+00B0 # DEGREE SIGN +0xAF U+00AF U+0304 # MACRON +0xB0 U+00B0 U+030a # DEGREE SIGN 0xB1 U+00B1 # PLUS-MINUS SIGN 0xB2 U+00B2 # SUPERSCRIPT TWO 0xB3 U+00B3 # SUPERSCRIPT THREE 0xB4 U+00B4 # ACUTE ACCENT -0xB5 U+00B5 # MICRO SIGN +0xB5 U+00B5 U+03bc # MICRO SIGN 0xB6 U+00B6 # PILCROW SIGN -0xB7 U+00B7 # MIDDLE DOT -0xB8 U+00B8 # CEDILLA +0xB7 U+00B7 U+0307 U+0387 # MIDDLE DOT +0xB8 U+00B8 U+0327 # CEDILLA 0xB9 U+00B9 # SUPERSCRIPT ONE 0xBA U+00BA # MASCULINE ORDINAL INDICATOR 0xBB U+00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK @@ -204,8 +208,8 @@ C920 0xDA U+00DA # LATIN CAPITAL LETTER U WITH ACUTE 0xDB U+00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX 0xDC U+00DC # LATIN CAPITAL LETTER U WITH DIAERESIS -0xDD U+0130 # LATIN CAPITAL LETTER I WITH DOT ABOVE -0xDE U+015E # LATIN CAPITAL LETTER S WITH CEDILLA +0xDD U+0130 U+0418 U+0406 # LATIN CAPITAL LETTER I WITH DOT ABOVE +0xDE U+015E U+0428 # LATIN CAPITAL LETTER S WITH CEDILLA 0xDF U+00DF # LATIN SMALL LETTER SHARP S 0xE0 U+00E0 # LATIN SMALL LETTER A WITH GRAVE 0xE1 U+00E1 # LATIN SMALL LETTER A WITH ACUTE @@ -236,7 +240,15 @@ C920 0xFA U+00FA # LATIN SMALL LETTER U WITH ACUTE 0xFB U+00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX 0xFC U+00FC # LATIN SMALL LETTER U WITH DIAERESIS -0xFD U+0131 # LATIN SMALL LETTER DOTLESS I -0xFE U+015F # LATIN SMALL LETTER S WITH CEDILLA +0xFD U+0131 U+03b9 U+044b # LATIN SMALL LETTER DOTLESS I +0xFE U+015F U+0448 # LATIN SMALL LETTER S WITH CEDILLA 0xFF U+00FF # LATIN SMALL LETTER Y WITH DIAERESIS +U+2218 " \260 " # RING OPERATOR +U+221b " ROOT\263 " +U+2297 "(\327)" # CIRCLED TIMES +U+2299 "(\267)" # CIRCLED DOT OPERATOR +U+229A "(\260)" # CIRCLED RING OPERATOR +U+22A0 "[\327]" # SQUARED TIMES +U+22A1 "[\267]" # SQUARED DOT OPERATOR +U+22C5 " \267 " # DOT OPERATOR diff --git a/src/chrtrans/koi8r_uni.tbl b/src/chrtrans/koi8r_uni.tbl index 1378eed9..8bf4001a 100644 --- a/src/chrtrans/koi8r_uni.tbl +++ b/src/chrtrans/koi8r_uni.tbl @@ -11,6 +11,9 @@ C878 # Based on a table received from "Glenn E. Thobe" <thobe@lafn.org> # (verified against RFC1489). # +# Lines with more than one Unicode (U+XXXX) value contain additional +# replacement mappings added for lynx. - kw +# #hex unicode # description #--- U+---- # --------------- 0x80 U+2500 # FORMS LIGHT HORIZONTAL @@ -43,7 +46,7 @@ C878 0x9B U+2321 # BOTTOM HALF INTEGRAL 0x9C U+00B0 # DEGREE SIGN 0x9D U+00B2 # SUPERSCRIPT DIGIT TWO -0x9E U+00B7 # MIDDLE DOT +0x9E U+00B7 U+2027 # MIDDLE DOT 0x9F U+00F7 # DIVISION SIGN 0xA0 U+2550 # FORMS DOUBLE HORIZONTAL 0xA1 U+2551 # FORMS DOUBLE VERTICAL @@ -83,31 +86,31 @@ C878 0xC3 U+0446 # SMA TSE 0xC4 U+0434 # SMA DE 0xC5 U+0435 # SMA IE -0xC6 U+0444 # SMA EF +0xC6 U+0444 U+03c6 # SMA EF 0xC7 U+0433 # SMA GE 0xC8 U+0445 # SMA KHA 0xC9 U+0438 # SMA II 0xCA U+0439 # SMA SHORT II 0xCB U+043A # SMA KA -0xCC U+043B # SMA EL +0xCC U+043B U+03bb # SMA EL 0xCD U+043C # SMA EM 0xCE U+043D # SMA EN 0xCF U+043E # SMA O -0xD0 U+043F # SMA PE +0xD0 U+043F U+03c0 # SMA PE 0xD1 U+044F # SMA IA 0xD2 U+0440 # SMA ER 0xD3 U+0441 # SMA ES 0xD4 U+0442 # SMA TE 0xD5 U+0443 # SMA U -0xD6 U+0436 # SMA ZHE +0xD6 U+0436 U+017e # SMA ZHE 0xD7 U+0432 # SMA VE 0xD8 U+044C # SMA SOFT SIGN -0xD9 U+044B # SMA YERI +0xD9 U+044B U+0131 # SMA YERI 0xDA U+0437 # SMA ZE -0xDB U+0448 # SMA SHA +0xDB U+0448 U+0161 # SMA SHA 0xDC U+044D # SMA REVERSED E 0xDD U+0449 # SMA SHCHA -0xDE U+0447 # SMA CHE +0xDE U+0447 U+010d # SMA CHE 0xDF U+044A # SMA HARD SIGN 0xE0 U+042E # CAP IU 0xE1 U+0410 # CAP A @@ -115,30 +118,30 @@ C878 0xE3 U+0426 # CAP TSE 0xE4 U+0414 # CAP DE 0xE5 U+0415 # CAP IE -0xE6 U+0424 # CAP EF -0xE7 U+0413 # CAP GE +0xE6 U+0424 U+03a6 # CAP EF +0xE7 U+0413 U+0393 # CAP GE 0xE8 U+0425 # CAP KHA 0xE9 U+0418 # CAP II 0xEA U+0419 # CAP SHORT II 0xEB U+041A # CAP KA -0xEC U+041B # CAP EL +0xEC U+041B U+039b # CAP EL 0xED U+041C # CAP EM 0xEE U+041D # CAP EN 0xEF U+041E # CAP O -0xF0 U+041F # CAP PE +0xF0 U+041F U+03a0 # CAP PE 0xF1 U+042F # CAP IA 0xF2 U+0420 # CAP ER 0xF3 U+0421 # CAP ES 0xF4 U+0422 # CAP TE 0xF5 U+0423 # CAP U -0xF6 U+0416 # CAP ZHE +0xF6 U+0416 U+017d # CAP ZHE 0xF7 U+0412 # CAP VE 0xF8 U+042C # CAP SOFT SIGN 0xF9 U+042B # CAP YERI 0xFA U+0417 # CAP ZE -0xFB U+0428 # CAP SHA +0xFB U+0428 U+0160 # CAP SHA 0xFC U+042D # CAP REVERSED E 0xFD U+0429 # CAP SHCHA -0xFE U+0427 # CAP CHE +0xFE U+0427 U+010c # CAP CHE 0xFF U+042A # CAP HARD SIGN diff --git a/src/chrtrans/mac_uni.tbl b/src/chrtrans/mac_uni.tbl index ea76d078..c2457e7b 100644 --- a/src/chrtrans/mac_uni.tbl +++ b/src/chrtrans/mac_uni.tbl @@ -80,6 +80,8 @@ OMacintosh (8 bit) # interpreted (if at all) as the control codes DC1-DC4. # ################## +# Lines with more than one Unicode (U+XXXX) value contain additional +# replacement mappings added for lynx. - kw #0x20 U+0020 # SPACE #0x21 U+0021 # EXCLAMATION MARK @@ -276,7 +278,7 @@ OMacintosh (8 bit) 0xDE U+FB01 # LATIN SMALL LIGATURE FI 0xDF U+FB02 # LATIN SMALL LIGATURE FL 0xE0 U+2021 # DOUBLE DAGGER -0xE1 U+00B7 # MIDDLE DOT +0xE1 U+00B7 U+0307 U+0387 U+2027 # MIDDLE DOT 0xE2 U+201A # SINGLE LOW-9 QUOTATION MARK 0xE3 U+201E # DOUBLE LOW-9 QUOTATION MARK 0xE4 U+2030 # PER MILLE SIGN diff --git a/src/chrtrans/makeuctb.c b/src/chrtrans/makeuctb.c index 8c333d90..9ad80ecb 100644 --- a/src/chrtrans/makeuctb.c +++ b/src/chrtrans/makeuctb.c @@ -89,7 +89,7 @@ PUBLIC int strncasecomp ARGS3( CONST char *q = b; for (p = a, q = b; ; p++, q++) { - int diff; + int diff; if (p == (a+n)) return 0; /* Match up to n characters */ if (!(*p && *q)) @@ -136,6 +136,8 @@ PRIVATE int Raw_found = 0; /* whether explicit R directive found */ PRIVATE int CodePage = 0; PRIVATE int CodePage_found = 0; /* whether explicit C directive found */ +#define MAX_UNIPAIRS 2500 + PRIVATE void addpair_str ARGS2( char *, str, int, un) @@ -147,8 +149,8 @@ PRIVATE void addpair_str ARGS2( /* * Initialize the map for replacement strings. */ - themap_str.entries = - (struct unipair_str *) malloc (2000 * sizeof (struct unipair_str)); + themap_str.entries = (struct unipair_str *) malloc (MAX_UNIPAIRS + * sizeof (struct unipair_str)); if (!themap_str.entries) { fprintf(stderr, "%s: Out of memory\n", tblname); @@ -169,9 +171,10 @@ PRIVATE void addpair_str ARGS2( /* * Add to list. */ - if (themap_str.entry_ct > 1999) { + if (themap_str.entry_ct > MAX_UNIPAIRS-1) { fprintf(stderr, - "ERROR: Only 2000 unicode replacement strings permitted!\n"); + "ERROR: Only %d unicode replacement strings permitted!\n", + MAX_UNIPAIRS); done(EX_DATAERR); } themap_str.entries[themap_str.entry_ct].unicode = un; @@ -187,7 +190,7 @@ PRIVATE void addpair ARGS2( { int i; - if (!Raw_found) { /* enc not (yet) explicitly given with 'R' */ + if (!Raw_found) { /* enc not (yet) explicitly given with 'R' */ if (fp >= 128) { if (RawOrEnc != UCT_ENC_8BIT && RawOrEnc <= UCT_ENC_8859) { if (fp < 160) { /* cannot be 8859 */ @@ -391,7 +394,7 @@ PUBLIC int main ARGS2( } p++; while (*p == ' ' || *p == '\t') { - p++; + p++; } RawOrEnc = strtol(p,0,10); Raw_found = 1; @@ -400,7 +403,7 @@ PUBLIC int main ARGS2( /* * Is this the default table? */ - case 'D': + case 'D': if (p[1] == 'e' || p[1] == 'E') { buffer[sizeof(buffer) - 1] = '\0'; if (!strncasecomp(p, "Default", 7)) { @@ -417,7 +420,7 @@ PUBLIC int main ARGS2( /* * Is this the default table? */ - case 'F': + case 'F': if (p[1] == 'a' || p[1] == 'A') { buffer[sizeof(buffer) - 1] = '\0'; if (!strncasecomp(p, "FallBack", 8)) { @@ -477,7 +480,7 @@ PUBLIC int main ARGS2( } p++; while (*p == ' ' || *p == '\t') { - p++; + p++; } CodePage = strtol(p,0,10); CodePage_found = 1; @@ -587,7 +590,7 @@ PUBLIC int main ARGS2( if (p1 == p) { fprintf(stderr, "Bad input line: %s\n", buffer); done(EX_DATAERR); - } + } p = p1; while (*p == ' ' || *p == '\t') { @@ -601,7 +604,7 @@ PUBLIC int main ARGS2( done(EX_DATAERR); } p = p1; - } else { + } else { fp1 = 0; } @@ -643,7 +646,7 @@ PUBLIC int main ARGS2( fprintf(stderr, " there should be a Unicode range.\n"); done(EX_DATAERR); - } + } p++; un1 = getunicode(&p); if (un0 < 0 || un1 < 0) { @@ -651,7 +654,7 @@ PUBLIC int main ARGS2( "%s: Bad Unicode range corresponding to font position range 0x%x-0x%x\n", tblname, fp0, fp1); done(EX_DATAERR); - } + } if (un1 - un0 != fp1 - fp0) { fprintf(stderr, "%s: Unicode range U+%x-U+%x not of the same length", @@ -660,7 +663,7 @@ PUBLIC int main ARGS2( " as font position range 0x%x-0x%x\n", fp0, fp1); done(EX_DATAERR); - } + } for (i = fp0; i <= fp1; i++) { addpair(i,un0-fp0+i); } @@ -752,8 +755,8 @@ PUBLIC int main ARGS2( this_isDefaultMap = !strncmp(this_MIMEcharset,"iso-8859-1", 10); } fprintf(stderr, - "makeuctb: %s: %stranslation map", - this_MIMEcharset, (this_isDefaultMap ? "default " : "")); + "makeuctb: %s: %stranslation map", + this_MIMEcharset, (this_isDefaultMap ? "default " : "")); if (this_isDefaultMap == 1) { *id_append = '\0'; } else { @@ -808,7 +811,7 @@ static CONST u8 dfont_unicount%s[%d] = \n\ if (nuni) { fprintf(chdr, "\nstatic CONST u16 dfont_unitable%s[%d] = \n{\n\t", - id_append, nuni); + id_append, nuni); } else { fprintf(chdr, "\nstatic CONST u16 dfont_unitable%s[1]; /* dummy */\n", id_append); } @@ -841,8 +844,8 @@ static struct unipair_str repl_map%s[%d] = \n\ for (i = 0; i < themap_str.entry_ct; i++) { fprintf(chdr, "{0x%x,\"%s\"}", - themap_str.entries[i].unicode, - themap_str.entries[i].replace_str); + themap_str.entries[i].unicode, + themap_str.entries[i].replace_str); if (i == (themap_str.entry_ct - 1)) { fprintf(chdr, "\n};\n"); } else if ((i % 4) == 3) { |