diff options
Diffstat (limited to 'src/chrtrans')
27 files changed, 2077 insertions, 224 deletions
diff --git a/src/chrtrans/Makefile.old b/src/chrtrans/Makefile.old index 3b75ab08..7e6c9a99 100644 --- a/src/chrtrans/Makefile.old +++ b/src/chrtrans/Makefile.old @@ -22,26 +22,36 @@ FONTMAP_INC = iso01_uni.h# default, if not set by recursive call CHRTR= -TABLES= $(CHRTR)iso02_uni.h \ - $(CHRTR)iso01_uni.h \ +TABLES= $(CHRTR)iso01_uni.h \ + $(CHRTR)cp850_uni.h \ + $(CHRTR)cp1252_uni.h \ + $(CHRTR)cp437_uni.h \ + $(CHRTR)dmcs_uni.h \ + $(CHRTR)mac_uni.h \ + $(CHRTR)next_uni.h \ + $(CHRTR)viscii_uni.h \ $(CHRTR)def7_uni.h \ + $(CHRTR)iso02_uni.h \ + $(CHRTR)cp852_uni.h \ + $(CHRTR)cp1250_uni.h \ $(CHRTR)iso03_uni.h \ $(CHRTR)iso04_uni.h \ $(CHRTR)iso05_uni.h \ + $(CHRTR)cp866_uni.h \ + $(CHRTR)cp1251_uni.h \ + $(CHRTR)koi8r_uni.h \ $(CHRTR)iso06_uni.h \ + $(CHRTR)cp864_uni.h \ + $(CHRTR)cp1256_uni.h \ $(CHRTR)iso07_uni.h \ + $(CHRTR)cp737_uni.h \ + $(CHRTR)cp869_uni.h \ + $(CHRTR)cp1253_uni.h \ $(CHRTR)iso08_uni.h \ + $(CHRTR)cp862_uni.h \ + $(CHRTR)cp1255_uni.h \ $(CHRTR)iso09_uni.h \ $(CHRTR)iso10_uni.h \ - $(CHRTR)koi8r_uni.h \ - $(CHRTR)cp437_uni.h \ - $(CHRTR)cp850_uni.h \ - $(CHRTR)cp852_uni.h \ - $(CHRTR)cp866_uni.h \ - $(CHRTR)cp1250_uni.h \ - $(CHRTR)cp1251_uni.h \ - $(CHRTR)cp1252_uni.h \ - $(CHRTR)viscii_uni.h \ $(CHRTR)utf8_uni.h \ $(CHRTR)rfc_suni.h \ $(CHRTR)mnemonic_suni.h \ @@ -57,9 +67,28 @@ makeuctb: makeuctb.c UCkd.h .tbl.h: ./makeuctb $*.tbl > $@ +def7_uni.h: def7_uni.tbl makeuctb iso01_uni.h: iso01_uni.tbl makeuctb iso02_uni.h: iso02_uni.tbl makeuctb -def7_uni.h: def7_uni.tbl makeuctb +cp1252_uni.h: cp1252_uni.tbl makeuctb +dmcs_uni.h: dmcs_uni.tbl makeuctb +mac_uni.h: mac_uni.tbl makeuctb +next_uni.h: next_uni.tbl makeuctb +koi8r_uni.h: koi8r_uni.tbl makeuctb +viscii_uni.h: viscii_uni.tbl makeuctb +cp437_uni.h: cp437_uni.tbl makeuctb +cp850_uni.h: cp850_uni.tbl makeuctb +cp852_uni.h: cp852_uni.tbl makeuctb +cp866_uni.h: cp866_uni.tbl makeuctb +cp737_uni.h: cp737_uni.tbl makeuctb +cp869_uni.h: cp869_uni.tbl makeuctb +cp864_uni.h: cp864_uni.tbl makeuctb +cp862_uni.h: cp862_uni.tbl makeuctb +cp1250_uni.h: cp1250_uni.tbl makeuctb +cp1251_uni.h: cp1251_uni.tbl makeuctb +cp1253_uni.h: cp1253_uni.tbl makeuctb +cp1255_uni.h: cp1255_uni.tbl makeuctb +cp1256_uni.h: cp1256_uni.tbl makeuctb iso03_uni.h: iso03_uni.tbl makeuctb iso04_uni.h: iso04_uni.tbl makeuctb iso05_uni.h: iso05_uni.tbl makeuctb @@ -68,17 +97,10 @@ iso07_uni.h: iso07_uni.tbl makeuctb iso08_uni.h: iso08_uni.tbl makeuctb iso09_uni.h: iso09_uni.tbl makeuctb iso10_uni.h: iso10_uni.tbl makeuctb -koi8r_uni.h: koi8r_uni.tbl makeuctb -cp437_uni.h: cp437_uni.tbl makeuctb -cp850_uni.h: cp850_uni.tbl makeuctb -cp852_uni.h: cp852_uni.tbl makeuctb -cp1250_uni.h: cp1250_uni.tbl makeuctb -cp1251_uni.h: cp1251_uni.tbl makeuctb -cp1252_uni.h: cp1252_uni.tbl makeuctb utf8_uni.h: utf8_uni.tbl makeuctb +rfc_suni.h: rfc_suni.tbl makeuctb mnemonic_suni.h: mnemonic_suni.tbl makeuctb mnem_suni.h: mnem_suni.tbl makeuctb -rfc_suni.h: rfc_suni.tbl makeuctb clean: rm -f makeuctb *.o *uni.h diff --git a/src/chrtrans/README.format b/src/chrtrans/README.format index 0ec556a2..8c21714c 100644 --- a/src/chrtrans/README.format +++ b/src/chrtrans/README.format @@ -34,12 +34,15 @@ b) directives: The name for this charset in MIME syntax (one word with digits and some other non-letters allowed, should be IANA registered) Default - This is the default (fallback) translation table, it will be used - for Unicode -> 8bit (or 7bit) translation if no translation is found - in the specific table. + If "Y[es]" or "1", this is the default (fallback) translation table, + it will be used for Unicode -> 8bit (or 7bit) translation if no + translation is found in the specific table. FallBack Whether to use the default table if no translation is found in - this table. Normally fallback is used, "FallBack NO" disables it. + this table. Normally fallback is used, "FallBack NO" or "FallBack 0" + disables it (actually, other values than "FallBack Y[es]" or + "FallBack 1" disable it). + RawOrEnc a number which flags some special property (encoding) for this charset [see utf8.uni for example, see UCDefs.h for details]. @@ -103,7 +106,7 @@ d) string replacement definitions: * and <unicode> ::= U+<h><h><h><h> * and <h> ::= <hexadecimal digit> * and <replace> any string not containing '\n' or '\0', taken verbatim - * and <C replace> any string, with backslash having the usual C meaning + * and <C replace> any string, with backslash having the usual C meaning. Motivation: diff --git a/src/chrtrans/build-chrtrans.com b/src/chrtrans/build-chrtrans.com index 23c9a2ae..5ddb2590 100644 --- a/src/chrtrans/build-chrtrans.com +++ b/src/chrtrans/build-chrtrans.com @@ -44,13 +44,13 @@ $ THEN $ CHRcompiler := "GNUC" $ v1 = f$verify(1) $! GNUC: -$ cc := gcc 'cc_opts'/INCLUDE=([-],[--],[--.WWW.Library.Implementation]) +$ cc := gcc 'CHRcc_opts'/INCLUDE=([-],[--],[--.WWW.Library.Implementation]) $ v1 = 'f$verify(0)' $ ELSE $ CHRcompiler := "VAXC" $ v1 = f$verify(1) $! VAXC: -$ cc := cc 'cc_opts'/INCLUDE=([-],[--],[--.WWW.Library.Implementation]) +$ cc := cc 'CHRcc_opts'/INCLUDE=([-],[--],[--.WWW.Library.Implementation]) $ v1 = 'f$verify(0)' $ ENDIF $ ENDIF @@ -65,7 +65,7 @@ $ v1 = f$verify(1) $! $! Link the Lynx [.SRC.CHRTRANS]makeuctb module. $! -$ link/exe=makeuctb.exe'link_opts' makeuctb, - +$ link/exe=makeuctb.exe'CHRlink_opts' makeuctb, - sys$disk:[-]'CHRcompiler'.opt/opt $ v1 = 'f$verify(0)' $! @@ -76,54 +76,74 @@ $! $! Create the Lynx [.SRC.CHRTRANS] header files. $! $ makeuctb := $'CHRwhere'makeuctb -$ define/user sys$output 'CHRwhere'iso01_uni.h +$ define/user sys$output 'CHRwhere'iso01_uni.h !ISO Latin 1 $ makeuctb iso01_uni.tbl -$ define/user sys$output 'CHRwhere'iso02_uni.h -$ makeuctb iso02_uni.tbl -$ define/user sys$output 'CHRwhere'def7_uni.h +$ define/user sys$output 'CHRwhere'cp850_uni.h ! cp850 +$ makeuctb cp850_uni.tbl +$ define/user sys$output 'CHRwhere'cp1252_uni.h +$ makeuctb cp1252_uni.tbl +$ define/user sys$output 'CHRwhere'cp437_uni.h ! cp437 +$ makeuctb cp437_uni.tbl +$ define/user sys$output 'CHRwhere'dmcs_uni.h !DEC Multinational +$ makeuctb dmcs_uni.tbl +$ define/user sys$output 'CHRwhere'mac_uni.h !Macintosh (8 bit) +$ makeuctb mac_uni.tbl +$ define/user sys$output 'CHRwhere'next_uni.h !NeXT character set +$ makeuctb next_uni.tbl +$ define/user sys$output 'CHRwhere'viscii_uni.h !Vietnamese (VISCII) +$ makeuctb viscii_uni.tbl +$ define/user sys$output 'CHRwhere'def7_uni.h !7 bit approximations $ makeuctb def7_uni.tbl -$ define/user sys$output 'CHRwhere'iso03_uni.h +$ define/user sys$output 'CHRwhere'iso02_uni.h !ISO Latin 2 +$ makeuctb iso02_uni.tbl +$ define/user sys$output 'CHRwhere'cp852_uni.h !DosLatin2 (cp852) +$ makeuctb cp852_uni.tbl +$ define/user sys$output 'CHRwhere'cp1250_uni.h !WinLatin2 (cp1250) +$ makeuctb cp1250_uni.tbl +$ define/user sys$output 'CHRwhere'iso03_uni.h !ISO Latin 3 $ makeuctb iso03_uni.tbl -$ define/user sys$output 'CHRwhere'iso04_uni.h +$ define/user sys$output 'CHRwhere'iso04_uni.h !ISO Latin 4 $ makeuctb iso04_uni.tbl -$ define/user sys$output 'CHRwhere'iso05_uni.h +$ define/user sys$output 'CHRwhere'iso05_uni.h !ISO Latin 5 Cyrillic $ makeuctb iso05_uni.tbl -$ define/user sys$output 'CHRwhere'iso06_uni.h +$ define/user sys$output 'CHRwhere'cp866_uni.h !DosCyrillic (cp866) +$ makeuctb cp866_uni.tbl +$ define/user sys$output 'CHRwhere'cp1251_uni.h !WinCyrillic (cp1251) +$ makeuctb cp1251_uni.tbl +$ define/user sys$output 'CHRwhere'koi8r_uni.h !KOI8-R Cyrillic +$ makeuctb koi8r_uni.tbl +$ define/user sys$output 'CHRwhere'iso06_uni.h !ISO 8859-6 Arabic $ makeuctb iso06_uni.tbl -$ define/user sys$output 'CHRwhere'iso07_uni.h +$ define/user sys$output 'CHRwhere'cp864_uni.h !DosArabic (cp864) +$ makeuctb cp864_uni.tbl +$ define/user sys$output 'CHRwhere'cp1256_uni.h !WinArabic (cp1256) +$ makeuctb cp1256_uni.tbl +$ define/user sys$output 'CHRwhere'iso07_uni.h !ISO 8859-7 Greek $ makeuctb iso07_uni.tbl -$ define/user sys$output 'CHRwhere'iso08_uni.h +$ define/user sys$output 'CHRwhere'cp737_uni.h !DosGreek (cp737) +$ makeuctb cp737_uni.tbl +$ define/user sys$output 'CHRwhere'cp869_uni.h !DosGreek2 (cp869) +$ makeuctb cp869_uni.tbl +$ define/user sys$output 'CHRwhere'cp1253_uni.h !WinGreek (cp1253) +$ makeuctb cp1253_uni.tbl +$ define/user sys$output 'CHRwhere'iso08_uni.h !ISO 8859-8 Hebrew $ makeuctb iso08_uni.tbl -$ define/user sys$output 'CHRwhere'iso09_uni.h +$ define/user sys$output 'CHRwhere'cp862_uni.h !DosHebrew (cp862) +$ makeuctb cp862_uni.tbl +$ define/user sys$output 'CHRwhere'cp1255_uni.h !WinHebrew (cp1255) +$ makeuctb cp1255_uni.tbl +$ define/user sys$output 'CHRwhere'iso09_uni.h !ISO 8859-9 (Latin 5) $ makeuctb iso09_uni.tbl -$ define/user sys$output 'CHRwhere'iso10_uni.h +$ define/user sys$output 'CHRwhere'iso10_uni.h !ISO 8859-10 $ makeuctb iso10_uni.tbl -$ define/user sys$output 'CHRwhere'koi8r_uni.h -$ makeuctb koi8r_uni.tbl -$ define/user sys$output 'CHRwhere'cp437_uni.h -$ makeuctb cp437_uni.tbl -$ define/user sys$output 'CHRwhere'cp850_uni.h -$ makeuctb cp850_uni.tbl -$ define/user sys$output 'CHRwhere'cp852_uni.h -$ makeuctb cp852_uni.tbl -$ define/user sys$output 'CHRwhere'cp866_uni.h -$ makeuctb cp866_uni.tbl -$ define/user sys$output 'CHRwhere'cp1250_uni.h -$ makeuctb cp1250_uni.tbl -$ define/user sys$output 'CHRwhere'cp1251_uni.h -$ makeuctb cp1251_uni.tbl -$ define/user sys$output 'CHRwhere'cp1252_uni.h -$ makeuctb cp1252_uni.tbl -$ define/user sys$output 'CHRwhere'viscii_uni.h -$ makeuctb viscii_uni.tbl -$ define/user sys$output 'CHRwhere'utf8_uni.h +$ define/user sys$output 'CHRwhere'utf8_uni.h !UNICODE UTF 8 $ makeuctb utf8_uni.tbl -$ define/user sys$output 'CHRwhere'mnemonic_suni.h +$ define/user sys$output 'CHRwhere'rfc_suni.h !RFC 1345 w/o Intro +$ makeuctb rfc_suni.tbl +$ define/user sys$output 'CHRwhere'mnemonic_suni.h !RFC 1345 Mnemonic $ makeuctb mnemonic_suni.tbl -$ define/user sys$output 'CHRwhere'mnem_suni.h +$ define/user sys$output 'CHRwhere'mnem_suni.h !(not used) $ makeuctb mnem_suni.tbl -$ define/user sys$output 'CHRwhere'rfc_suni.h -$ makeuctb rfc_suni.tbl $ v1 = 'f$verify(0)' $ exit $! diff --git a/src/chrtrans/cp1250_uni.tbl b/src/chrtrans/cp1250_uni.tbl index 75416b67..6f148398 100644 --- a/src/chrtrans/cp1250_uni.tbl +++ b/src/chrtrans/cp1250_uni.tbl @@ -2,7 +2,7 @@ Mwindows-1250 #Name as a Display Charset (used on Options screen) -O MS Windows CP 1250 +OptionName WinLatin2 (cp1250) # # Name: cp1250_WinLatin2 to Unicode table diff --git a/src/chrtrans/cp1252_uni.tbl b/src/chrtrans/cp1252_uni.tbl index e9660295..22fa585f 100644 --- a/src/chrtrans/cp1252_uni.tbl +++ b/src/chrtrans/cp1252_uni.tbl @@ -1,14 +1,12 @@ #Shall this become the "default" translation? -#Meaning of that is currently unclear... It's different -#from the default input or defualt output charset... -#but there has to be exactly one table marked as "default". +#There has to be exactly one table marked as "default". D0 # #The MIME name of this charset. Miso-8859-1-windows-3.1-latin-1 #Name as a Display Charset (used on Options screen) -O MS Windows CP 1252 +O WinLatin1 (cp1252) # # Name: cp1252_WinLatin1 to Unicode table diff --git a/src/chrtrans/cp1253_uni.tbl b/src/chrtrans/cp1253_uni.tbl new file mode 100644 index 00000000..176ba7e6 --- /dev/null +++ b/src/chrtrans/cp1253_uni.tbl @@ -0,0 +1,157 @@ +#The MIME name of this charset. +MIMEname windows-1253 + +#Name as a Display Charset (used on Options screen) +OWinGreek (cp1253) + +# Name: cp1253_WinGreek to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 +# Table format: Format A +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp1253_WinGreek code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp1253_WinGreek order +# +################## + +0x20-0x7f idem +# +#0x80 #UNDEFINED +#0x81 #UNDEFINED +0x82 U+201A #SINGLE LOW-9 QUOTATION MARK +0x83 U+0192 #LATIN SMALL LETTER F WITH HOOK +0x84 U+201E #DOUBLE LOW-9 QUOTATION MARK +0x85 U+2026 #HORIZONTAL ELLIPSIS +0x86 U+2020 #DAGGER +0x87 U+2021 #DOUBLE DAGGER +#0x88 #UNDEFINED +0x89 U+2030 #PER MILLE SIGN +#0x8A #UNDEFINED +0x8B U+2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK +#0x8C #UNDEFINED +#0x8D #UNDEFINED +#0x8E #UNDEFINED +#0x8F #UNDEFINED +#0x90 #UNDEFINED +0x91 U+2018 #LEFT SINGLE QUOTATION MARK +0x92 U+2019 #RIGHT SINGLE QUOTATION MARK +0x93 U+201C #LEFT DOUBLE QUOTATION MARK +0x94 U+201D #RIGHT DOUBLE QUOTATION MARK +0x95 U+2022 #BULLET +0x96 U+2013 #EN DASH +0x97 U+2014 #EM DASH +#0x98 #UNDEFINED +0x99 U+2122 #TRADE MARK SIGN +#0x9A #UNDEFINED +0x9B U+203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +#0x9C #UNDEFINED +#0x9D #UNDEFINED +#0x9E #UNDEFINED +#0x9F #UNDEFINED +0xA0 U+00A0 #NO-BREAK SPACE +0xA1 U+0385 #GREEK DIALYTIKA TONOS +0xA2 U+0386 #GREEK CAPITAL LETTER ALPHA WITH TONOS +0xA3 U+00A3 #POUND SIGN +0xA4 U+00A4 #CURRENCY SIGN +0xA5 U+00A5 #YEN SIGN +0xA6 U+00A6 #BROKEN BAR +0xA7 U+00A7 #SECTION SIGN +0xA8 U+00A8 #DIAERESIS +0xA9 U+00A9 #COPYRIGHT SIGN +#0xAA #UNDEFINED +0xAB U+00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC U+00AC #NOT SIGN +0xAD U+00AD #SOFT HYPHEN +0xAE U+00AE #REGISTERED SIGN +0xAF U+2015 #HORIZONTAL BAR +0xB0 U+00B0 #DEGREE SIGN +0xB1 U+00B1 #PLUS-MINUS SIGN +0xB2 U+00B2 #SUPERSCRIPT TWO +0xB3 U+00B3 #SUPERSCRIPT THREE +0xB4 U+0384 #GREEK TONOS +0xB5 U+00B5 #MICRO SIGN +0xB6 U+00B6 #PILCROW SIGN +0xB7 U+00B7 #MIDDLE DOT +0xB8 U+0388 #GREEK CAPITAL LETTER EPSILON WITH TONOS +0xB9 U+0389 #GREEK CAPITAL LETTER ETA WITH TONOS +0xBA U+038A #GREEK CAPITAL LETTER IOTA WITH TONOS +0xBB U+00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC U+038C #GREEK CAPITAL LETTER OMICRON WITH TONOS +0xBD U+00BD #VULGAR FRACTION ONE HALF +0xBE U+038E #GREEK CAPITAL LETTER UPSILON WITH TONOS +0xBF U+038F #GREEK CAPITAL LETTER OMEGA WITH TONOS +0xC0 U+0390 #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +0xC1 U+0391 #GREEK CAPITAL LETTER ALPHA +0xC2 U+0392 #GREEK CAPITAL LETTER BETA +0xC3 U+0393 #GREEK CAPITAL LETTER GAMMA +0xC4 U+0394 #GREEK CAPITAL LETTER DELTA +0xC5 U+0395 #GREEK CAPITAL LETTER EPSILON +0xC6 U+0396 #GREEK CAPITAL LETTER ZETA +0xC7 U+0397 #GREEK CAPITAL LETTER ETA +0xC8 U+0398 #GREEK CAPITAL LETTER THETA +0xC9 U+0399 #GREEK CAPITAL LETTER IOTA +0xCA U+039A #GREEK CAPITAL LETTER KAPPA +0xCB U+039B #GREEK CAPITAL LETTER LAMDA +0xCC U+039C #GREEK CAPITAL LETTER MU +0xCD U+039D #GREEK CAPITAL LETTER NU +0xCE U+039E #GREEK CAPITAL LETTER XI +0xCF U+039F #GREEK CAPITAL LETTER OMICRON +0xD0 U+03A0 #GREEK CAPITAL LETTER PI +0xD1 U+03A1 #GREEK CAPITAL LETTER RHO +#0xD2 #UNDEFINED +0xD3 U+03A3 #GREEK CAPITAL LETTER SIGMA +0xD4 U+03A4 #GREEK CAPITAL LETTER TAU +0xD5 U+03A5 #GREEK CAPITAL LETTER UPSILON +0xD6 U+03A6 #GREEK CAPITAL LETTER PHI +0xD7 U+03A7 #GREEK CAPITAL LETTER CHI +0xD8 U+03A8 #GREEK CAPITAL LETTER PSI +0xD9 U+03A9 #GREEK CAPITAL LETTER OMEGA +0xDA U+03AA #GREEK CAPITAL LETTER IOTA WITH DIALYTIKA +0xDB U+03AB #GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +0xDC U+03AC #GREEK SMALL LETTER ALPHA WITH TONOS +0xDD U+03AD #GREEK SMALL LETTER EPSILON WITH TONOS +0xDE U+03AE #GREEK SMALL LETTER ETA WITH TONOS +0xDF U+03AF #GREEK SMALL LETTER IOTA WITH TONOS +0xE0 U+03B0 #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS +0xE1 U+03B1 #GREEK SMALL LETTER ALPHA +0xE2 U+03B2 #GREEK SMALL LETTER BETA +0xE3 U+03B3 #GREEK SMALL LETTER GAMMA +0xE4 U+03B4 #GREEK SMALL LETTER DELTA +0xE5 U+03B5 #GREEK SMALL LETTER EPSILON +0xE6 U+03B6 #GREEK SMALL LETTER ZETA +0xE7 U+03B7 #GREEK SMALL LETTER ETA +0xE8 U+03B8 #GREEK SMALL LETTER THETA +0xE9 U+03B9 #GREEK SMALL LETTER IOTA +0xEA U+03BA #GREEK SMALL LETTER KAPPA +0xEB U+03BB #GREEK SMALL LETTER LAMDA +0xEC U+03BC #GREEK SMALL LETTER MU +0xED U+03BD #GREEK SMALL LETTER NU +0xEE U+03BE #GREEK SMALL LETTER XI +0xEF U+03BF #GREEK SMALL LETTER OMICRON +0xF0 U+03C0 #GREEK SMALL LETTER PI +0xF1 U+03C1 #GREEK SMALL LETTER RHO +0xF2 U+03C2 #GREEK SMALL LETTER FINAL SIGMA +0xF3 U+03C3 #GREEK SMALL LETTER SIGMA +0xF4 U+03C4 #GREEK SMALL LETTER TAU +0xF5 U+03C5 #GREEK SMALL LETTER UPSILON +0xF6 U+03C6 #GREEK SMALL LETTER PHI +0xF7 U+03C7 #GREEK SMALL LETTER CHI +0xF8 U+03C8 #GREEK SMALL LETTER PSI +0xF9 U+03C9 #GREEK SMALL LETTER OMEGA +0xFA U+03CA #GREEK SMALL LETTER IOTA WITH DIALYTIKA +0xFB U+03CB #GREEK SMALL LETTER UPSILON WITH DIALYTIKA +0xFC U+03CC #GREEK SMALL LETTER OMICRON WITH TONOS +0xFD U+03CD #GREEK SMALL LETTER UPSILON WITH TONOS +0xFE U+03CE #GREEK SMALL LETTER OMEGA WITH TONOS +#0xFF #UNDEFINED + +# TRADE MARK SIGN: +U+2122:(TM) diff --git a/src/chrtrans/cp1255_uni.tbl b/src/chrtrans/cp1255_uni.tbl new file mode 100644 index 00000000..eb446da8 --- /dev/null +++ b/src/chrtrans/cp1255_uni.tbl @@ -0,0 +1,157 @@ +#The MIME name of this charset. +MIMEname windows-1255 + +#Name as a Display Charset (used on Options screen). +OWinHebrew (cp1255) + +# Name: cp1255_WinHebrew to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 +# Table format: Format A +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp1255_WinHebrew code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp1255_WinHebrew order +# +################## + +0x20-0x7f idem +# +#0x80 #UNDEFINED +#0x81 #UNDEFINED +0x82 U+201A #LOW SINGLE COMMA QUOTATION MARK +0x83 U+0192 #LATIN SMALL LETTER SCRIPT F +0x84 U+201E #LOW DOUBLE COMMA QUOTATION MARK +0x85 U+2026 #HORIZONTAL ELLIPSIS +0x86 U+2020 #DAGGER +0x87 U+2021 #DOUBLE DAGGER +0x88 U+02C6 #MODIFIER LETTER CIRCUMFLEX +0x89 U+2030 #PER MILLE SIGN +#0x8A #UNDEFINED +0x8B U+2039 #LEFT POINTING SINGLE GUILLEMET +#0x8C #UNDEFINED +#0x8D #UNDEFINED +#0x8E #UNDEFINED +#0x8F #UNDEFINED +#0x90 #UNDEFINED +0x91 U+2018 #SINGLE TURNED COMMA QUOTATION MARK +0x92 U+2019 #SINGLE COMMA QUOTATION MARK +0x93 U+201C #DOUBLE TURNED COMMA QUOTATION MARK +0x94 U+201D #DOUBLE COMMA QUOTATION MARK +0x95 U+2022 #BULLET +0x96 U+2013 #EN DASH +0x97 U+2014 #EM DASH +0x98 U+02DC #SPACING TILDE +0x99 U+2122 #TRADEMARK +#0x9A #UNDEFINED +0x9B U+203A #RIGHT POINTING SINGLE GUILLEMET +#0x9C #UNDEFINED +#0x9D #UNDEFINED +#0x9E #UNDEFINED +#0x9F #UNDEFINED +0xA0 U+00A0 #NON-BREAKING SPACE +#0xA1 #UNDEFINED +0xA2 U+00A2 #CENT SIGN +0xA3 U+00A3 #POUND SIGN +0xA4 U+20AA #NEW SHEQEL SIGN +0xA5 U+00A5 #YEN SIGN +0xA6 U+00A6 #BROKEN VERTICAL BAR +0xA7 U+00A7 #SECTION SIGN +0xA8 U+00A8 #SPACING DIAERESIS +0xA9 U+00A9 #COPYRIGHT SIGN +#0xAA #UNDEFINED +0xAB U+00AB #LEFT POINTING GUILLEMET +0xAC U+00AC #NOT SIGN +0xAD U+00AD #SOFT HYPHEN +0xAE U+00AE #REGISTERED TRADE MARK SIGN +0xAF U+00AF #SPACING MACRON +0xB0 U+00B0 #DEGREE SIGN +0xB1 U+00B1 #PLUS-OR-MINUS SIGN +0xB2 U+00B2 #SUPERSCRIPT DIGIT TWO +0xB3 U+00B3 #SUPERSCRIPT DIGIT THREE +0xB4 U+00B4 #SPACING ACUTE +0xB5 U+00B5 #MICRO SIGN +0xB6 U+00B6 #PARAGRAPH SIGN +0xB7 U+00B7 #MIDDLE DOT +#0xB8 #UNDEFINED +0xB9 U+00B9 #SUPERSCRIPT DIGIT ONE +#0xBA #UNDEFINED +0xBB U+00BB #RIGHT POINTING GUILLEMET +0xBC U+00BC #FRACTION ONE QUARTER +0xBD U+00BD #FRACTION ONE HALF +0xBE U+00BE #FRACTION THREE QUARTERS +#0xBF #UNDEFINED +0xC0 U+05B0 #HEBREW POINT SHEVA +0xC1 U+05B1 #HEBREW POINT HATAF SEGOL +0xC2 U+05B2 #HEBREW POINT HATAF PATAH +0xC3 U+05B3 #HEBREW POINT HATAF QAMATS +0xC4 U+05B4 #HEBREW POINT HIRIQ +0xC5 U+05B5 #HEBREW POINT TSERE +0xC6 U+05B6 #HEBREW POINT SEGOL +0xC7 U+05B7 #HEBREW POINT PATAH +0xC8 U+05B8 #HEBREW POINT QAMATS +0xC9 U+05B9 #HEBREW POINT HOLAM +0xCA U+05BA #HEBREW POINT +0xCB U+05BB #HEBREW POINT QUBUTS +0xCC U+05BC #HEBREW POINT DAGESH +0xCD U+05BD #HEBREW POINT METEG +0xCE U+05BE #HEBREW PUNCTUATION MAQAF +0xCF U+05BF #HEBREW POINT RAFE +0xD0 U+05C0 #HEBREW POINT PASEQ +0xD1 U+05C1 #HEBREW POINT SHIN DOT +0xD2 U+05C2 #HEBREW POINT SIN DOT +0xD3 U+05C3 #HEBREW PUNCTUATION SOF PASUQ +0xD4 U+05F0 #HEBREW LETTER DOUBLE VAV +0xD5 U+05F1 #HEBREW LETTER VAV YOD +0xD6 U+05F2 #HEBREW LETTER DOUBLE YOD +#0xD7 #UNDEFINED +#0xD8 #UNDEFINED +#0xD9 #UNDEFINED +#0xDA #UNDEFINED +#0xDB #UNDEFINED +#0xDC #UNDEFINED +#0xDD #UNDEFINED +#0xDE #UNDEFINED +#0xDF #UNDEFINED +0xE0 U+05D0 #HEBREW LETTER ALEF +0xE1 U+05D1 #HEBREW LETTER BET +0xE2 U+05D2 #HEBREW LETTER GIMEL +0xE3 U+05D3 #HEBREW LETTER DALET +0xE4 U+05D4 #HEBREW LETTER HE +0xE5 U+05D5 #HEBREW LETTER VAV +0xE6 U+05D6 #HEBREW LETTER ZAYIN +0xE7 U+05D7 #HEBREW LETTER HET +0xE8 U+05D8 #HEBREW LETTER TET +0xE9 U+05D9 #HEBREW LETTER YOD +0xEA U+05DA #HEBREW LETTER FINAL KAF +0xEB U+05DB #HEBREW LETTER KAF +0xEC U+05DC #HEBREW LETTER LAMED +0xED U+05DD #HEBREW LETTER FINAL MEM +0xEE U+05DE #HEBREW LETTER MEM +0xEF U+05DF #HEBREW LETTER FINAL NUN +0xF0 U+05E0 #HEBREW LETTER NUN +0xF1 U+05E1 #HEBREW LETTER SAMEKH +0xF2 U+05E2 #HEBREW LETTER AYIN +0xF3 U+05E3 #HEBREW LETTER FINAL PE +0xF4 U+05E4 #HEBREW LETTER PE +0xF5 U+05E5 #HEBREW LETTER FINAL TSADI +0xF6 U+05E6 #HEBREW LETTER TSADI +0xF7 U+05E7 #HEBREW LETTER QOF +0xF8 U+05E8 #HEBREW LETTER RESH +0xF9 U+05E9 #HEBREW LETTER SHIN +0xFA U+05EA #HEBREW LETTER TAV +#0xFB #UNDEFINED +#0xFC #UNDEFINED +0xFD U+200E #LEFT-TO-RIGHT MARK +0xFE U+200F #RIGHT-TO-LEFT MARK +#0xFF #UNDEFINED + +# TRADE MARK SIGN: +U+2122:(TM) diff --git a/src/chrtrans/cp1256_uni.tbl b/src/chrtrans/cp1256_uni.tbl new file mode 100644 index 00000000..900c72c3 --- /dev/null +++ b/src/chrtrans/cp1256_uni.tbl @@ -0,0 +1,157 @@ +#The MIME name of this charset. +MIMEname windows-1256 + +#Name as a Display Charset (used on Options screen). +OWinArabic (cp1256) + +# Name: cp1256_WinArabic to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 +# Table format: Format A +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp1256_WinArabic code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp1256_WinArabic order +# +################## + +0x20-0x7f idem +# +#0x80 #UNDEFINED +0x81 U+067E #ARABIC TAA WITH THREE DOTS BELOW +0x82 U+201A #LOW SINGLE COMMA QUOTATION MARK +0x83 U+0192 #LATIN SMALL LETTER SCRIPT F +0x84 U+201E #LOW DOUBLE COMMA QUOTATION MARK +0x85 U+2026 #HORIZONTAL ELLIPSIS +0x86 U+2020 #DAGGER +0x87 U+2021 #DOUBLE DAGGER +0x88 U+02C6 #MODIFIER LETTER CIRCUMFLEX +0x89 U+2030 #PER MILLE SIGN +#0x8A #UNDEFINED +0x8B U+2039 #LEFT POINTING SINGLE GUILLEMET +0x8C U+0152 #LATIN CAPITAL LETTER O E +0x8D U+0686 #ARABIC HAA WITH MIDDLE THREE DOTS DOWNWARD +0x8E U+0698 #ARABIC RA WITH THREE DOTS ABOVE +#0x8F #UNDEFINED +0x90 U+06AF #ARABIC GAF +0x91 U+2018 #SINGLE TURNED COMMA QUOTATION MARK +0x92 U+2019 #SINGLE COMMA QUOTATION MARK +0x93 U+201C #DOUBLE TURNED COMMA QUOTATION MARK +0x94 U+201D #DOUBLE COMMA QUOTATION MARK +0x95 U+2022 #BULLET +0x96 U+2013 #EN DASH +0x97 U+2014 #EM DASH +#0x98 #UNDEFINED +0x99 U+2122 #TRADEMARK +#0x9A #UNDEFINED +0x9B U+203A #RIGHT POINTING SINGLE GUILLEMET +0x9C U+0153 #LATIN SMALL LETTER O E +0x9D U+200C #ZERO WIDTH NON-JOINER +0x9E U+200D #ZERO WIDTH JOINER +#0x9F #UNDEFINED +0xA0 U+00A0 #NON-BREAKING SPACE +0xA1 U+060C #ARABIC COMMA +0xA2 U+00A2 #CENT SIGN +0xA3 U+00A3 #POUND SIGN +0xA4 U+00A4 #CURRENCY SIGN +0xA5 U+00A5 #YEN SIGN +0xA6 U+00A6 #BROKEN VERTICAL BAR +0xA7 U+00A7 #SECTION SIGN +0xA8 U+00A8 #SPACING DIAERESIS +0xA9 U+00A9 #COPYRIGHT SIGN +#0xAA #UNDEFINED +0xAB U+00AB #LEFT POINTING GUILLEMET +0xAC U+00AC #NOT SIGN +0xAD U+00AD #SOFT HYPHEN +0xAE U+00AE #REGISTERED TRADE MARK SIGN +0xAF U+00AF #SPACING MACRON +0xB0 U+00B0 #DEGREE SIGN +0xB1 U+00B1 #PLUS-OR-MINUS SIGN +0xB2 U+00B2 #SUPERSCRIPT DIGIT TWO +0xB3 U+00B3 #SUPERSCRIPT DIGIT THREE +0xB4 U+00B4 #SPACING ACUTE +0xB5 U+00B5 #MICRO SIGN +0xB6 U+00B6 #PARAGRAPH SIGN +0xB7 U+00B7 #MIDDLE DOT +0xB8 U+00B8 #SPACING CEDILLA +0xB9 U+00B9 #SUPERSCRIPT DIGIT ONE +0xBA U+061B #ARABIC SEMICOLON +0xBB U+00BB #RIGHT POINTING GUILLEMET +0xBC U+00BC #FRACTION ONE QUARTER +0xBD U+00BD #FRACTION ONE HALF +0xBE U+00BE #FRACTION THREE QUARTERS +0xBF U+061F #ARABIC QUESTION MARK +#0xC0 #UNDEFINED +0xC1 U+0621 #ARABIC LETTER HAMZAH +0xC2 U+0622 #ARABIC LETTER MADDAH ON ALEF +0xC3 U+0623 #ARABIC LETTER HAMZAH ON ALEF +0xC4 U+0624 #ARABIC LETTER HAMZAH ON WAW +0xC5 U+0625 #ARABIC LETTER HAMZAH UNDER ALEF +0xC6 U+0626 #ARABIC LETTER HAMZAH ON YA +0xC7 U+0627 #ARABIC LETTER ALEF +0xC8 U+0628 #ARABIC LETTER BAA +0xC9 U+0629 #ARABIC LETTER TAA MARBUTAH +0xCA U+062A #ARABIC LETTER TAA +0xCB U+062B #ARABIC LETTER THAA +0xCC U+062C #ARABIC LETTER JEEM +0xCD U+062D #ARABIC LETTER HAA +0xCE U+062E #ARABIC LETTER KHAA +0xCF U+062F #ARABIC LETTER DAL +0xD0 U+0630 #ARABIC LETTER THAL +0xD1 U+0631 #ARABIC LETTER RA +0xD2 U+0632 #ARABIC LETTER ZAIN +0xD3 U+0633 #ARABIC LETTER SEEN +0xD4 U+0634 #ARABIC LETTER SHEEN +0xD5 U+0635 #ARABIC LETTER SAD +0xD6 U+0636 #ARABIC LETTER DAD +0xD7 U+00D7 #MULTIPLICATION SIGN +0xD8 U+0637 #ARABIC LETTER TAH +0xD9 U+0638 #ARABIC LETTER DHAH +0xDA U+0639 #ARABIC LETTER AIN +0xDB U+063A #ARABIC LETTER GHAIN +0xDC U+0640 #ARABIC TATWEEL +0xDD U+0641 #ARABIC LETTER FA +0xDE U+0642 #ARABIC LETTER QAF +0xDF U+0643 #ARABIC LETTER CAF +0xE0 U+00E0 #LATIN SMALL LETTER A GRAVE +0xE1 U+0644 #ARABIC LETTER LAM +0xE2 U+00E2 #LATIN SMALL LETTER A CIRCUMFLEX +0xE3 U+0645 #ARABIC LETTER MEEM +0xE4 U+0646 #ARABIC LETTER NOON +0xE5 U+0647 #ARABIC LETTER HA +0xE6 U+0648 #ARABIC LETTER WAW +0xE7 U+00E7 #LATIN SMALL LETTER C CEDILLA +0xE8 U+00E8 #LATIN SMALL LETTER E GRAVE +0xE9 U+00E9 #LATIN SMALL LETTER E ACUTE +0xEA U+00EA #LATIN SMALL LETTER E CIRCUMFLEX +0xEB U+00EB #LATIN SMALL LETTER E DIAERESIS +0xEC U+0649 #ARABIC LETTER ALEF MAQSURAH +0xED U+064A #ARABIC LETTER YA +0xEE U+00EE #LATIN SMALL LETTER I CIRCUMFLEX +0xEF U+00EF #LATIN SMALL LETTER I DIAERESIS +0xF0 U+064B #ARABIC FATHATAN +0xF1 U+064C #ARABIC DAMMATAN +0xF2 U+064D #ARABIC KASRATAN +0xF3 U+064E #ARABIC FATHAH +0xF4 U+00F4 #LATIN SMALL LETTER O CIRCUMFLEX +0xF5 U+064F #ARABIC DAMMAH +0xF6 U+0650 #ARABIC KASRAH +0xF7 U+00F7 #DIVISION SIGN +0xF8 U+0651 #ARABIC SHADDAH +0xF9 U+00F9 #LATIN SMALL LETTER U GRAVE +0xFA U+0652 #ARABIC SUKUN +0xFB U+00FB #LATIN SMALL LETTER U CIRCUMFLEX +0xFC U+00FC #LATIN SMALL LETTER U DIAERESIS +0xFD U+200E #LEFT-TO-RIGHT MARK +0xFE U+200F #RIGHT-TO-LEFT MARK +#0xFF #UNDEFINED + +# TRADE MARK SIGN: +U+2122:(TM) diff --git a/src/chrtrans/cp437_uni.tbl b/src/chrtrans/cp437_uni.tbl index 6bfbab22..56c937a5 100644 --- a/src/chrtrans/cp437_uni.tbl +++ b/src/chrtrans/cp437_uni.tbl @@ -18,7 +18,7 @@ OIBM PC character set # General notes: none # # Format: Three tab-separated columns -# Column #1 is the cp1255_WinHebrew code (in hex) +# Column #1 is the cp437 code (in hex) # Column #2 is the Unicode (in hex as U+XXXX) # Column #3 is the Unicode name (follows a comment sign, '#') # diff --git a/src/chrtrans/cp737_uni.tbl b/src/chrtrans/cp737_uni.tbl new file mode 100644 index 00000000..b1d44adf --- /dev/null +++ b/src/chrtrans/cp737_uni.tbl @@ -0,0 +1,158 @@ +#The MIME name of this charset. +Mcp737 + +#Name as a Display Charset (used on Options screen) +ODosGreek (cp737) + +# +# Name: cp737_DOSGreek to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 +# Table format: Format A +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp737_DOSGreek code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp737_DOSGreek order +# +################## + +0x20-0x7f idem +# +0x80 U+0391 #GREEK CAPITAL LETTER ALPHA +0x81 U+0392 #GREEK CAPITAL LETTER BETA +0x82 U+0393 #GREEK CAPITAL LETTER GAMMA +0x83 U+0394 #GREEK CAPITAL LETTER DELTA +0x84 U+0395 #GREEK CAPITAL LETTER EPSILON +0x85 U+0396 #GREEK CAPITAL LETTER ZETA +0x86 U+0397 #GREEK CAPITAL LETTER ETA +0x87 U+0398 #GREEK CAPITAL LETTER THETA +0x88 U+0399 #GREEK CAPITAL LETTER IOTA +0x89 U+039a #GREEK CAPITAL LETTER KAPPA +0x8a U+039b #GREEK CAPITAL LETTER LAMDA +0x8b U+039c #GREEK CAPITAL LETTER MU +0x8c U+039d #GREEK CAPITAL LETTER NU +0x8d U+039e #GREEK CAPITAL LETTER XI +0x8e U+039f #GREEK CAPITAL LETTER OMICRON +0x8f U+03a0 #GREEK CAPITAL LETTER PI +0x90 U+03a1 #GREEK CAPITAL LETTER RHO +0x91 U+03a3 #GREEK CAPITAL LETTER SIGMA +0x92 U+03a4 #GREEK CAPITAL LETTER TAU +0x93 U+03a5 #GREEK CAPITAL LETTER UPSILON +0x94 U+03a6 #GREEK CAPITAL LETTER PHI +0x95 U+03a7 #GREEK CAPITAL LETTER CHI +0x96 U+03a8 #GREEK CAPITAL LETTER PSI +0x97 U+03a9 #GREEK CAPITAL LETTER OMEGA +0x98 U+03b1 #GREEK SMALL LETTER ALPHA +0x99 U+03b2 #GREEK SMALL LETTER BETA +0x9a U+03b3 #GREEK SMALL LETTER GAMMA +0x9b U+03b4 #GREEK SMALL LETTER DELTA +0x9c U+03b5 #GREEK SMALL LETTER EPSILON +0x9d U+03b6 #GREEK SMALL LETTER ZETA +0x9e U+03b7 #GREEK SMALL LETTER ETA +0x9f U+03b8 #GREEK SMALL LETTER THETA +0xa0 U+03b9 #GREEK SMALL LETTER IOTA +0xa1 U+03ba #GREEK SMALL LETTER KAPPA +0xa2 U+03bb #GREEK SMALL LETTER LAMDA +0xa3 U+03bc #GREEK SMALL LETTER MU +0xa4 U+03bd #GREEK SMALL LETTER NU +0xa5 U+03be #GREEK SMALL LETTER XI +0xa6 U+03bf #GREEK SMALL LETTER OMICRON +0xa7 U+03c0 #GREEK SMALL LETTER PI +0xa8 U+03c1 #GREEK SMALL LETTER RHO +0xa9 U+03c3 #GREEK SMALL LETTER SIGMA +0xaa U+03c2 #GREEK SMALL LETTER FINAL SIGMA +0xab U+03c4 #GREEK SMALL LETTER TAU +0xac U+03c5 #GREEK SMALL LETTER UPSILON +0xad U+03c6 #GREEK SMALL LETTER PHI +0xae U+03c7 #GREEK SMALL LETTER CHI +0xaf U+03c8 #GREEK SMALL LETTER PSI +0xb0 U+2591 #LIGHT SHADE +0xb1 U+2592 #MEDIUM SHADE +0xb2 U+2593 #DARK SHADE +0xb3 U+2502 #BOX DRAWINGS LIGHT VERTICAL +0xb4 U+2524 #BOX DRAWINGS LIGHT VERTICAL AND LEFT +0xb5 U+2561 #BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +0xb6 U+2562 #BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE +0xb7 U+2556 #BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE +0xb8 U+2555 #BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE +0xb9 U+2563 #BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0xba U+2551 #BOX DRAWINGS DOUBLE VERTICAL +0xbb U+2557 #BOX DRAWINGS DOUBLE DOWN AND LEFT +0xbc U+255d #BOX DRAWINGS DOUBLE UP AND LEFT +0xbd U+255c #BOX DRAWINGS UP DOUBLE AND LEFT SINGLE +0xbe U+255b #BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +0xbf U+2510 #BOX DRAWINGS LIGHT DOWN AND LEFT +0xc0 U+2514 #BOX DRAWINGS LIGHT UP AND RIGHT +0xc1 U+2534 #BOX DRAWINGS LIGHT UP AND HORIZONTAL +0xc2 U+252c #BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0xc3 U+251c #BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0xc4 U+2500 #BOX DRAWINGS LIGHT HORIZONTAL +0xc5 U+253c #BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0xc6 U+255e #BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +0xc7 U+255f #BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +0xc8 U+255a #BOX DRAWINGS DOUBLE UP AND RIGHT +0xc9 U+2554 #BOX DRAWINGS DOUBLE DOWN AND RIGHT +0xca U+2569 #BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xcb U+2566 #BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xcc U+2560 #BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xcd U+2550 #BOX DRAWINGS DOUBLE HORIZONTAL +0xce U+256c #BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xcf U+2567 #BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +0xd0 U+2568 #BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +0xd1 U+2564 #BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE +0xd2 U+2565 #BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE +0xd3 U+2559 #BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +0xd4 U+2558 #BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +0xd5 U+2552 #BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +0xd6 U+2553 #BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE +0xd7 U+256b #BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +0xd8 U+256a #BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +0xd9 U+2518 #BOX DRAWINGS LIGHT UP AND LEFT +0xda U+250c #BOX DRAWINGS LIGHT DOWN AND RIGHT +0xdb U+2588 #FULL BLOCK +0xdc U+2584 #LOWER HALF BLOCK +0xdd U+258c #LEFT HALF BLOCK +0xde U+2590 #RIGHT HALF BLOCK +0xdf U+2580 #UPPER HALF BLOCK +0xe0 U+03c9 #GREEK SMALL LETTER OMEGA +0xe1 U+03ac #GREEK SMALL LETTER ALPHA WITH TONOS +0xe2 U+03ad #GREEK SMALL LETTER EPSILON WITH TONOS +0xe3 U+03ae #GREEK SMALL LETTER ETA WITH TONOS +0xe4 U+03ca #GREEK SMALL LETTER IOTA WITH DIALYTIKA +0xe5 U+03af #GREEK SMALL LETTER IOTA WITH TONOS +0xe6 U+03cc #GREEK SMALL LETTER OMICRON WITH TONOS +0xe7 U+03cd #GREEK SMALL LETTER UPSILON WITH TONOS +0xe8 U+03cb #GREEK SMALL LETTER UPSILON WITH DIALYTIKA +0xe9 U+03ce #GREEK SMALL LETTER OMEGA WITH TONOS +0xea U+0386 #GREEK CAPITAL LETTER ALPHA WITH TONOS +0xeb U+0388 #GREEK CAPITAL LETTER EPSILON WITH TONOS +0xec U+0389 #GREEK CAPITAL LETTER ETA WITH TONOS +0xed U+038a #GREEK CAPITAL LETTER IOTA WITH TONOS +0xee U+038c #GREEK CAPITAL LETTER OMICRON WITH TONOS +0xef U+038e #GREEK CAPITAL LETTER UPSILON WITH TONOS +0xf0 U+038f #GREEK CAPITAL LETTER OMEGA WITH TONOS +0xf1 U+00b1 #PLUS-MINUS SIGN +0xf2 U+2265 #GREATER-THAN OR EQUAL TO +0xf3 U+2264 #LESS-THAN OR EQUAL TO +0xf4 U+03aa #GREEK CAPITAL LETTER IOTA WITH DIALYTIKA +0xf5 U+03ab #GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +0xf6 U+00f7 #DIVISION SIGN +0xf7 U+2248 #ALMOST EQUAL TO +0xf8 U+00b0 #DEGREE SIGN +0xf9 U+2219 #BULLET OPERATOR +0xfa U+00b7 #MIDDLE DOT +0xfb U+221a #SQUARE ROOT +0xfc U+207f #SUPERSCRIPT LATIN SMALL LETTER N +0xfd U+00b2 #SUPERSCRIPT TWO +0xfe U+25a0 #BLACK SQUARE +0xff U+00a0 #NO-BREAK SPACE + +# TRADE MARK SIGN: +U+2122:(TM) diff --git a/src/chrtrans/cp850_uni.tbl b/src/chrtrans/cp850_uni.tbl index 96de277b..759bf950 100644 --- a/src/chrtrans/cp850_uni.tbl +++ b/src/chrtrans/cp850_uni.tbl @@ -1,7 +1,5 @@ #Shall this become the "default" translation? -#Meaning of that is currently unclear... It's different -#from the default input or defualt output charset... -#but there has to be exactly one table marked as "default". +#There has to be exactly one table marked as "default". D0 # #The MIME name of this charset. diff --git a/src/chrtrans/cp862_uni.tbl b/src/chrtrans/cp862_uni.tbl new file mode 100644 index 00000000..f1a7dd02 --- /dev/null +++ b/src/chrtrans/cp862_uni.tbl @@ -0,0 +1,157 @@ +#The MIME name of this charset. +Mcp862 + +#Name as a Display Charset (used on Options screen). +ODosHebrew (cp862) + +# Name: cp862_DOSHebrew to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 +# Table format: Format A +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp862_DOSHebrew code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp862_DOSHebrew order +# +################## + +0x20-0x7f idem +# +0x80 U+05d0 #HEBREW LETTER ALEF +0x81 U+05d1 #HEBREW LETTER BET +0x82 U+05d2 #HEBREW LETTER GIMEL +0x83 U+05d3 #HEBREW LETTER DALET +0x84 U+05d4 #HEBREW LETTER HE +0x85 U+05d5 #HEBREW LETTER VAV +0x86 U+05d6 #HEBREW LETTER ZAYIN +0x87 U+05d7 #HEBREW LETTER HET +0x88 U+05d8 #HEBREW LETTER TET +0x89 U+05d9 #HEBREW LETTER YOD +0x8a U+05da #HEBREW LETTER FINAL KAF +0x8b U+05db #HEBREW LETTER KAF +0x8c U+05dc #HEBREW LETTER LAMED +0x8d U+05dd #HEBREW LETTER FINAL MEM +0x8e U+05de #HEBREW LETTER MEM +0x8f U+05df #HEBREW LETTER FINAL NUN +0x90 U+05e0 #HEBREW LETTER NUN +0x91 U+05e1 #HEBREW LETTER SAMEKH +0x92 U+05e2 #HEBREW LETTER AYIN +0x93 U+05e3 #HEBREW LETTER FINAL PE +0x94 U+05e4 #HEBREW LETTER PE +0x95 U+05e5 #HEBREW LETTER FINAL TSADI +0x96 U+05e6 #HEBREW LETTER TSADI +0x97 U+05e7 #HEBREW LETTER QOF +0x98 U+05e8 #HEBREW LETTER RESH +0x99 U+05e9 #HEBREW LETTER SHIN +0x9a U+05ea #HEBREW LETTER TAV +0x9b U+00a2 #CENT SIGN +0x9c U+00a3 #POUND SIGN +0x9d U+00a5 #YEN SIGN +0x9e U+20a7 #PESETA SIGN +0x9f U+0192 #LATIN SMALL LETTER F WITH HOOK +0xa0 U+00e1 #LATIN SMALL LETTER A WITH ACUTE +0xa1 U+00ed #LATIN SMALL LETTER I WITH ACUTE +0xa2 U+00f3 #LATIN SMALL LETTER O WITH ACUTE +0xa3 U+00fa #LATIN SMALL LETTER U WITH ACUTE +0xa4 U+00f1 #LATIN SMALL LETTER N WITH TILDE +0xa5 U+00d1 #LATIN CAPITAL LETTER N WITH TILDE +0xa6 U+00aa #FEMININE ORDINAL INDICATOR +0xa7 U+00ba #MASCULINE ORDINAL INDICATOR +0xa8 U+00bf #INVERTED QUESTION MARK +0xa9 U+2310 #REVERSED NOT SIGN +0xaa U+00ac #NOT SIGN +0xab U+00bd #VULGAR FRACTION ONE HALF +0xac U+00bc #VULGAR FRACTION ONE QUARTER +0xad U+00a1 #INVERTED EXCLAMATION MARK +0xae U+00ab #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xaf U+00bb #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xb0 U+2591 #LIGHT SHADE +0xb1 U+2592 #MEDIUM SHADE +0xb2 U+2593 #DARK SHADE +0xb3 U+2502 #BOX DRAWINGS LIGHT VERTICAL +0xb4 U+2524 #BOX DRAWINGS LIGHT VERTICAL AND LEFT +0xb5 U+2561 #BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +0xb6 U+2562 #BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE +0xb7 U+2556 #BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE +0xb8 U+2555 #BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE +0xb9 U+2563 #BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0xba U+2551 #BOX DRAWINGS DOUBLE VERTICAL +0xbb U+2557 #BOX DRAWINGS DOUBLE DOWN AND LEFT +0xbc U+255d #BOX DRAWINGS DOUBLE UP AND LEFT +0xbd U+255c #BOX DRAWINGS UP DOUBLE AND LEFT SINGLE +0xbe U+255b #BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +0xbf U+2510 #BOX DRAWINGS LIGHT DOWN AND LEFT +0xc0 U+2514 #BOX DRAWINGS LIGHT UP AND RIGHT +0xc1 U+2534 #BOX DRAWINGS LIGHT UP AND HORIZONTAL +0xc2 U+252c #BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0xc3 U+251c #BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0xc4 U+2500 #BOX DRAWINGS LIGHT HORIZONTAL +0xc5 U+253c #BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0xc6 U+255e #BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +0xc7 U+255f #BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +0xc8 U+255a #BOX DRAWINGS DOUBLE UP AND RIGHT +0xc9 U+2554 #BOX DRAWINGS DOUBLE DOWN AND RIGHT +0xca U+2569 #BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xcb U+2566 #BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xcc U+2560 #BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xcd U+2550 #BOX DRAWINGS DOUBLE HORIZONTAL +0xce U+256c #BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xcf U+2567 #BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +0xd0 U+2568 #BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +0xd1 U+2564 #BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE +0xd2 U+2565 #BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE +0xd3 U+2559 #BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +0xd4 U+2558 #BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +0xd5 U+2552 #BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +0xd6 U+2553 #BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE +0xd7 U+256b #BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +0xd8 U+256a #BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +0xd9 U+2518 #BOX DRAWINGS LIGHT UP AND LEFT +0xda U+250c #BOX DRAWINGS LIGHT DOWN AND RIGHT +0xdb U+2588 #FULL BLOCK +0xdc U+2584 #LOWER HALF BLOCK +0xdd U+258c #LEFT HALF BLOCK +0xde U+2590 #RIGHT HALF BLOCK +0xdf U+2580 #UPPER HALF BLOCK +0xe0 U+03b1 #GREEK SMALL LETTER ALPHA +0xe1 U+00df #LATIN SMALL LETTER SHARP S (GERMAN) +0xe2 U+0393 #GREEK CAPITAL LETTER GAMMA +0xe3 U+03c0 #GREEK SMALL LETTER PI +0xe4 U+03a3 #GREEK CAPITAL LETTER SIGMA +0xe5 U+03c3 #GREEK SMALL LETTER SIGMA +0xe6 U+00b5 #MICRO SIGN +0xe7 U+03c4 #GREEK SMALL LETTER TAU +0xe8 U+03a6 #GREEK CAPITAL LETTER PHI +0xe9 U+0398 #GREEK CAPITAL LETTER THETA +0xea U+03a9 #GREEK CAPITAL LETTER OMEGA +0xeb U+03b4 #GREEK SMALL LETTER DELTA +0xec U+221e #INFINITY +0xed U+03c6 #GREEK SMALL LETTER PHI +0xee U+03b5 #GREEK SMALL LETTER EPSILON +0xef U+2229 #INTERSECTION +0xf0 U+2261 #IDENTICAL TO +0xf1 U+00b1 #PLUS-MINUS SIGN +0xf2 U+2265 #GREATER-THAN OR EQUAL TO +0xf3 U+2264 #LESS-THAN OR EQUAL TO +0xf4 U+2320 #TOP HALF INTEGRAL +0xf5 U+2321 #BOTTOM HALF INTEGRAL +0xf6 U+00f7 #DIVISION SIGN +0xf7 U+2248 #ALMOST EQUAL TO +0xf8 U+00b0 #DEGREE SIGN +0xf9 U+2219 #BULLET OPERATOR +0xfa U+00b7 #MIDDLE DOT +0xfb U+221a #SQUARE ROOT +0xfc U+207f #SUPERSCRIPT LATIN SMALL LETTER N +0xfd U+00b2 #SUPERSCRIPT TWO +0xfe U+25a0 #BLACK SQUARE +0xff U+00a0 #NO-BREAK SPACE + +# TRADE MARK SIGN: +U+2122:(TM) diff --git a/src/chrtrans/cp864_uni.tbl b/src/chrtrans/cp864_uni.tbl new file mode 100644 index 00000000..14097a6e --- /dev/null +++ b/src/chrtrans/cp864_uni.tbl @@ -0,0 +1,157 @@ +#The MIME name of this charset. +Mcp864 + +#Name as a Display Charset (used on Options screen). +ODosArabic (cp864) + +# Name: cp864_DOSArabic to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 +# Table format: Format A +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp864_DOSArabic code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp864_DOSArabic order +# +################## + +0x20-0x7f idem +# +0x80 U+00b0 #DEGREE SIGN +0x81 U+00b7 #MIDDLE DOT +0x82 U+2219 #BULLET OPERATOR +0x83 U+221a #SQUARE ROOT +0x84 U+2592 #MEDIUM SHADE +0x85 U+2500 #FORMS LIGHT HORIZONTAL +0x86 U+2502 #FORMS LIGHT VERTICAL +0x87 U+253c #FORMS LIGHT VERTICAL AND HORIZONTAL +0x88 U+2524 #FORMS LIGHT VERTICAL AND LEFT +0x89 U+252c #FORMS LIGHT DOWN AND HORIZONTAL +0x8a U+251c #FORMS LIGHT VERTICAL AND RIGHT +0x8b U+2534 #FORMS LIGHT UP AND HORIZONTAL +0x8c U+2510 #FORMS LIGHT DOWN AND LEFT +0x8d U+250c #FORMS LIGHT DOWN AND RIGHT +0x8e U+2514 #FORMS LIGHT UP AND RIGHT +0x8f U+2518 #FORMS LIGHT UP AND LEFT +0x90 U+03b2 #GREEK SMALL BETA +0x91 U+221e #INFINITY +0x92 U+03c6 #GREEK SMALL PHI +0x93 U+00b1 #PLUS-OR-MINUS SIGN +0x94 U+00bd #FRACTION 1/2 +0x95 U+00bc #FRACTION 1/4 +0x96 U+2248 #ALMOST EQUAL TO +0x97 U+00ab #LEFT POINTING GUILLEMET +0x98 U+00bb #RIGHT POINTING GUILLEMET +0x99 U+fef7 #ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM +0x9a U+fef8 #ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM +#0x9b #UNDEFINED +#0x9c #UNDEFINED +0x9d U+fefb #ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM +0x9e U+fefc #ARABIC LIGATURE LAM WITH ALEF FINAL FORM +#0x9f #UNDEFINED +0xa0 U+00a0 #NON-BREAKING SPACE +0xa1 U+00ad #SOFT HYPHEN +0xa2 U+fe82 #ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM +0xa3 U+00a3 #POUND SIGN +0xa4 U+00a4 #CURRENCY SIGN +0xa5 U+fe84 #ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM +#0xa6 #UNDEFINED +#0xa7 #UNDEFINED +0xa8 U+fe8e #ARABIC LETTER ALEF FINAL FORM +0xa9 U+fe8f #ARABIC LETTER BEH ISOLATED FORM +0xaa U+fe95 #ARABIC LETTER TEH ISOLATED FORM +0xab U+fe99 #ARABIC LETTER THEH ISOLATED FORM +0xac U+060c #ARABIC COMMA +0xad U+fe9d #ARABIC LETTER JEEM ISOLATED FORM +0xae U+fea1 #ARABIC LETTER HAH ISOLATED FORM +0xaf U+fea5 #ARABIC LETTER KHAH ISOLATED FORM +0xb0 U+0660 #ARABIC-INDIC DIGIT ZERO +0xb1 U+0661 #ARABIC-INDIC DIGIT ONE +0xb2 U+0662 #ARABIC-INDIC DIGIT TWO +0xb3 U+0663 #ARABIC-INDIC DIGIT THREE +0xb4 U+0664 #ARABIC-INDIC DIGIT FOUR +0xb5 U+0665 #ARABIC-INDIC DIGIT FIVE +0xb6 U+0666 #ARABIC-INDIC DIGIT SIX +0xb7 U+0667 #ARABIC-INDIC DIGIT SEVEN +0xb8 U+0668 #ARABIC-INDIC DIGIT EIGHT +0xb9 U+0669 #ARABIC-INDIC DIGIT NINE +0xba U+fed1 #ARABIC LETTER FEH ISOLATED FORM +0xbb U+061b #ARABIC SEMICOLON +0xbc U+feb1 #ARABIC LETTER SEEN ISOLATED FORM +0xbd U+feb5 #ARABIC LETTER SHEEN ISOLATED FORM +0xbe U+feb9 #ARABIC LETTER SAD ISOLATED FORM +0xbf U+061f #ARABIC QUESTION MARK +0xc0 U+00a2 #CENT SIGN +0xc1 U+fe80 #ARABIC LETTER HAMZA ISOLATED FORM +0xc2 U+fe81 #ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM +0xc3 U+fe83 #ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM +0xc4 U+fe85 #ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM +0xc5 U+feca #ARABIC LETTER AIN FINAL FORM +0xc6 U+fe8b #ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM +0xc7 U+fe8d #ARABIC LETTER ALEF ISOLATED FORM +0xc8 U+fe91 #ARABIC LETTER BEH INITIAL FORM +0xc9 U+fe93 #ARABIC LETTER TEH MARBUTA ISOLATED FORM +0xca U+fe97 #ARABIC LETTER TEH INITIAL FORM +0xcb U+fe9b #ARABIC LETTER THEH INITIAL FORM +0xcc U+fe9f #ARABIC LETTER JEEM INITIAL FORM +0xcd U+fea3 #ARABIC LETTER HAH INITIAL FORM +0xce U+fea7 #ARABIC LETTER KHAH INITIAL FORM +0xcf U+fea9 #ARABIC LETTER DAL ISOLATED FORM +0xd0 U+feab #ARABIC LETTER THAL ISOLATED FORM +0xd1 U+fead #ARABIC LETTER REH ISOLATED FORM +0xd2 U+feaf #ARABIC LETTER ZAIN ISOLATED FORM +0xd3 U+feb3 #ARABIC LETTER SEEN INITIAL FORM +0xd4 U+feb7 #ARABIC LETTER SHEEN INITIAL FORM +0xd5 U+febb #ARABIC LETTER SAD INITIAL FORM +0xd6 U+febf #ARABIC LETTER DAD INITIAL FORM +0xd7 U+fec1 #ARABIC LETTER TAH ISOLATED FORM +0xd8 U+fec5 #ARABIC LETTER ZAH ISOLATED FORM +0xd9 U+fecb #ARABIC LETTER AIN INITIAL FORM +0xda U+fecf #ARABIC LETTER GHAIN INITIAL FORM +0xdb U+00a6 #BROKEN VERTICAL BAR +0xdc U+00ac #NOT SIGN +0xdd U+00f7 #DIVISION SIGN +0xde U+00d7 #MULTIPLICATION SIGN +0xdf U+fec9 #ARABIC LETTER AIN ISOLATED FORM +0xe0 U+0640 #ARABIC TATWEEL +0xe1 U+fed3 #ARABIC LETTER FEH INITIAL FORM +0xe2 U+fed7 #ARABIC LETTER QAF INITIAL FORM +0xe3 U+fedb #ARABIC LETTER KAF INITIAL FORM +0xe4 U+fedf #ARABIC LETTER LAM INITIAL FORM +0xe5 U+fee3 #ARABIC LETTER MEEM INITIAL FORM +0xe6 U+fee7 #ARABIC LETTER NOON INITIAL FORM +0xe7 U+feeb #ARABIC LETTER HEH INITIAL FORM +0xe8 U+feed #ARABIC LETTER WAW ISOLATED FORM +0xe9 U+feef #ARABIC LETTER ALEF MAKSURA ISOLATED FORM +0xea U+fef3 #ARABIC LETTER YEH INITIAL FORM +0xeb U+febd #ARABIC LETTER DAD ISOLATED FORM +0xec U+fecc #ARABIC LETTER AIN MEDIAL FORM +0xed U+fece #ARABIC LETTER GHAIN FINAL FORM +0xee U+fecd #ARABIC LETTER GHAIN ISOLATED FORM +0xef U+fee1 #ARABIC LETTER MEEM ISOLATED FORM +0xf0 U+fe7d #ARABIC SHADDA MEDIAL FORM +0xf1 U+0651 #ARABIC SHADDAH +0xf2 U+fee5 #ARABIC LETTER NOON ISOLATED FORM +0xf3 U+fee9 #ARABIC LETTER HEH ISOLATED FORM +0xf4 U+feec #ARABIC LETTER HEH MEDIAL FORM +0xf5 U+fef0 #ARABIC LETTER ALEF MAKSURA FINAL FORM +0xf6 U+fef2 #ARABIC LETTER YEH FINAL FORM +0xf7 U+fed0 #ARABIC LETTER GHAIN MEDIAL FORM +0xf8 U+fed5 #ARABIC LETTER QAF ISOLATED FORM +0xf9 U+fef5 #ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM +0xfa U+fef6 #ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM +0xfb U+fedd #ARABIC LETTER LAM ISOLATED FORM +0xfc U+fed9 #ARABIC LETTER KAF ISOLATED FORM +0xfd U+fef1 #ARABIC LETTER YEH ISOLATED FORM +0xfe U+25a0 #BLACK SQUARE +#0xff #UNDEFINED + +# TRADE MARK SIGN: +U+2122:(TM) diff --git a/src/chrtrans/cp866_uni.tbl b/src/chrtrans/cp866_uni.tbl index 2b109897..9de12f9d 100644 --- a/src/chrtrans/cp866_uni.tbl +++ b/src/chrtrans/cp866_uni.tbl @@ -23,7 +23,7 @@ ODosCyrillic (cp866) # 0x20-0x40 idem -# some mapppings of greek capital letters to latin letters added - kw +# Some mapppings of Greek capital letters to Latin letters added. - KW 0x41 U+0041 U+0391 #LATIN CAPITAL LETTER A 0x42 U+0042 U+0392 #LATIN CAPITAL LETTER B 0x43 U+0043 #LATIN CAPITAL LETTER C diff --git a/src/chrtrans/cp869_uni.tbl b/src/chrtrans/cp869_uni.tbl new file mode 100644 index 00000000..412fb8a7 --- /dev/null +++ b/src/chrtrans/cp869_uni.tbl @@ -0,0 +1,157 @@ +#The MIME name of this charset. +Mcp869 + +#Name as a Display Charset (used on Options screen) +ODosGreek2 (cp869) + +# Name: cp869_DOSGreek2 to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 +# Table format: Format A +# Date: 04/24/96 +# Authors: Lori Brownell <loribr@microsoft.com> +# K.D. Chang <a-kchang@microsoft.com> +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp869_DOSGreek2 code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp869_DOSGreek2 order +# +################## + +0x20-0x7f idem +# +#0x80 #UNDEFINED +#0x81 #UNDEFINED +#0x82 #UNDEFINED +#0x83 #UNDEFINED +#0x84 #UNDEFINED +#0x85 #UNDEFINED +0x86 U+0386 #GREEK CAPITAL LETTER ALPHA WITH TONOS +#0x87 #UNDEFINED +0x88 U+00b7 #MIDDLE DOT +0x89 U+00ac #NOT SIGN +0x8a U+00a6 #BROKEN BAR +0x8b U+2018 #LEFT SINGLE QUOTATION MARK +0x8c U+2019 #RIGHT SINGLE QUOTATION MARK +0x8d U+0388 #GREEK CAPITAL LETTER EPSILON WITH TONOS +0x8e U+2015 #HORIZONTAL BAR +0x8f U+0389 #GREEK CAPITAL LETTER ETA WITH TONOS +0x90 U+038a #GREEK CAPITAL LETTER IOTA WITH TONOS +0x91 U+03aa #GREEK CAPITAL LETTER IOTA WITH DIALYTIKA +0x92 U+038c #GREEK CAPITAL LETTER OMICRON WITH TONOS +#0x93 #UNDEFINED +#0x94 #UNDEFINED +0x95 U+038e #GREEK CAPITAL LETTER UPSILON WITH TONOS +0x96 U+03ab #GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +0x97 U+00a9 #COPYRIGHT SIGN +0x98 U+038f #GREEK CAPITAL LETTER OMEGA WITH TONOS +0x99 U+00b2 #SUPERSCRIPT TWO +0x9a U+00b3 #SUPERSCRIPT THREE +0x9b U+03ac #GREEK SMALL LETTER ALPHA WITH TONOS +0x9c U+00a3 #POUND SIGN +0x9d U+03ad #GREEK SMALL LETTER EPSILON WITH TONOS +0x9e U+03ae #GREEK SMALL LETTER ETA WITH TONOS +0x9f U+03af #GREEK SMALL LETTER IOTA WITH TONOS +0xa0 U+03ca #GREEK SMALL LETTER IOTA WITH DIALYTIKA +0xa1 U+0390 #GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +0xa2 U+03cc #GREEK SMALL LETTER OMICRON WITH TONOS +0xa3 U+03cd #GREEK SMALL LETTER UPSILON WITH TONOS +0xa4 U+0391 #GREEK CAPITAL LETTER ALPHA +0xa5 U+0392 #GREEK CAPITAL LETTER BETA +0xa6 U+0393 #GREEK CAPITAL LETTER GAMMA +0xa7 U+0394 #GREEK CAPITAL LETTER DELTA +0xa8 U+0395 #GREEK CAPITAL LETTER EPSILON +0xa9 U+0396 #GREEK CAPITAL LETTER ZETA +0xaa U+0397 #GREEK CAPITAL LETTER ETA +0xab U+00bd #VULGAR FRACTION ONE HALF +0xac U+0398 #GREEK CAPITAL LETTER THETA +0xad U+0399 #GREEK CAPITAL LETTER IOTA +0xae U+00ab #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xaf U+00bb #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xb0 U+2591 #LIGHT SHADE +0xb1 U+2592 #MEDIUM SHADE +0xb2 U+2593 #DARK SHADE +0xb3 U+2502 #BOX DRAWINGS LIGHT VERTICAL +0xb4 U+2524 #BOX DRAWINGS LIGHT VERTICAL AND LEFT +0xb5 U+039a #GREEK CAPITAL LETTER KAPPA +0xb6 U+039b #GREEK CAPITAL LETTER LAMDA +0xb7 U+039c #GREEK CAPITAL LETTER MU +0xb8 U+039d #GREEK CAPITAL LETTER NU +0xb9 U+2563 #BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0xba U+2551 #BOX DRAWINGS DOUBLE VERTICAL +0xbb U+2557 #BOX DRAWINGS DOUBLE DOWN AND LEFT +0xbc U+255d #BOX DRAWINGS DOUBLE UP AND LEFT +0xbd U+039e #GREEK CAPITAL LETTER XI +0xbe U+039f #GREEK CAPITAL LETTER OMICRON +0xbf U+2510 #BOX DRAWINGS LIGHT DOWN AND LEFT +0xc0 U+2514 #BOX DRAWINGS LIGHT UP AND RIGHT +0xc1 U+2534 #BOX DRAWINGS LIGHT UP AND HORIZONTAL +0xc2 U+252c #BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0xc3 U+251c #BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0xc4 U+2500 #BOX DRAWINGS LIGHT HORIZONTAL +0xc5 U+253c #BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0xc6 U+03a0 #GREEK CAPITAL LETTER PI +0xc7 U+03a1 #GREEK CAPITAL LETTER RHO +0xc8 U+255a #BOX DRAWINGS DOUBLE UP AND RIGHT +0xc9 U+2554 #BOX DRAWINGS DOUBLE DOWN AND RIGHT +0xca U+2569 #BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xcb U+2566 #BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xcc U+2560 #BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xcd U+2550 #BOX DRAWINGS DOUBLE HORIZONTAL +0xce U+256c #BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xcf U+03a3 #GREEK CAPITAL LETTER SIGMA +0xd0 U+03a4 #GREEK CAPITAL LETTER TAU +0xd1 U+03a5 #GREEK CAPITAL LETTER UPSILON +0xd2 U+03a6 #GREEK CAPITAL LETTER PHI +0xd3 U+03a7 #GREEK CAPITAL LETTER CHI +0xd4 U+03a8 #GREEK CAPITAL LETTER PSI +0xd5 U+03a9 #GREEK CAPITAL LETTER OMEGA +0xd6 U+03b1 #GREEK SMALL LETTER ALPHA +0xd7 U+03b2 #GREEK SMALL LETTER BETA +0xd8 U+03b3 #GREEK SMALL LETTER GAMMA +0xd9 U+2518 #BOX DRAWINGS LIGHT UP AND LEFT +0xda U+250c #BOX DRAWINGS LIGHT DOWN AND RIGHT +0xdb U+2588 #FULL BLOCK +0xdc U+2584 #LOWER HALF BLOCK +0xdd U+03b4 #GREEK SMALL LETTER DELTA +0xde U+03b5 #GREEK SMALL LETTER EPSILON +0xdf U+2580 #UPPER HALF BLOCK +0xe0 U+03b6 #GREEK SMALL LETTER ZETA +0xe1 U+03b7 #GREEK SMALL LETTER ETA +0xe2 U+03b8 #GREEK SMALL LETTER THETA +0xe3 U+03b9 #GREEK SMALL LETTER IOTA +0xe4 U+03ba #GREEK SMALL LETTER KAPPA +0xe5 U+03bb #GREEK SMALL LETTER LAMDA +0xe6 U+03bc #GREEK SMALL LETTER MU +0xe7 U+03bd #GREEK SMALL LETTER NU +0xe8 U+03be #GREEK SMALL LETTER XI +0xe9 U+03bf #GREEK SMALL LETTER OMICRON +0xea U+03c0 #GREEK SMALL LETTER PI +0xeb U+03c1 #GREEK SMALL LETTER RHO +0xec U+03c3 #GREEK SMALL LETTER SIGMA +0xed U+03c2 #GREEK SMALL LETTER FINAL SIGMA +0xee U+03c4 #GREEK SMALL LETTER TAU +0xef U+0384 #GREEK TONOS +0xf0 U+00ad #SOFT HYPHEN +0xf1 U+00b1 #PLUS-MINUS SIGN +0xf2 U+03c5 #GREEK SMALL LETTER UPSILON +0xf3 U+03c6 #GREEK SMALL LETTER PHI +0xf4 U+03c7 #GREEK SMALL LETTER CHI +0xf5 U+00a7 #SECTION SIGN +0xf6 U+03c8 #GREEK SMALL LETTER PSI +0xf7 U+0385 #GREEK DIALYTIKA TONOS +0xf8 U+00b0 #DEGREE SIGN +0xf9 U+00a8 #DIAERESIS +0xfa U+03c9 #GREEK SMALL LETTER OMEGA +0xfb U+03cb #GREEK SMALL LETTER UPSILON WITH DIALYTIKA +0xfc U+03b0 #GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS +0xfd U+03ce #GREEK SMALL LETTER OMEGA WITH TONOS +0xfe U+25a0 #BLACK SQUARE +0xff U+00a0 #NO-BREAK SPACE + +# TRADE MARK SIGN: +U+2122:(TM) diff --git a/src/chrtrans/def7_uni.tbl b/src/chrtrans/def7_uni.tbl index c4bbef7c..42cca597 100644 --- a/src/chrtrans/def7_uni.tbl +++ b/src/chrtrans/def7_uni.tbl @@ -20,24 +20,24 @@ U+00a3:Pd U+00a4:CUR U+00a5:Ye U+00a6:| -U+00a7:SE +U+00a7:S: U+00a8:" -U+00a9:(C) +U+00a9:(c) U+00aa:-a U+00ab:<< -U+00ac:NO +U+00ac:NOT U+00ad:- U+00ae:(R) U+00af:- -U+00b0:DG +U+00b0:DEG U+00b1:+- U+00b2:^2 U+00b3:^3 U+00b4:' # My -> u -U+00b6:u -U+00b6:PI -U+00b7:.M +U+00b5:u +U+00b6:P: +U+00b7:. U+00b8:, U+00b9:^1 U+00ba:-o @@ -1325,9 +1325,9 @@ U+211e:Rx U+2120:(SM) U+2122:(TM) U+2126:Ohm -U+212E:est. 0x4b U+212A # Kelvin sign - K U+212b:Ang. +U+212E:est. U+2135:Aleph U+2153: 1/3 U+2154: 2/3 diff --git a/src/chrtrans/dmcs_uni.tbl b/src/chrtrans/dmcs_uni.tbl new file mode 100644 index 00000000..676b728d --- /dev/null +++ b/src/chrtrans/dmcs_uni.tbl @@ -0,0 +1,226 @@ +#The MIME name of this charset. +Mdec-mcs + +#Name as a Display Charset (used on Options screen) +ODEC Multinational + +# +# Name: DEC Multinational (dec-mcs) [to unicode] +# Date: 29 October 1997 +# Author: Fote +# +################## + +#0x20 U+0020 # SPACE +#0x21 U+0021 # EXCLAMATION MARK +#0x22 U+0022 # QUOTATION MARK +#0x23 U+0023 # NUMBER SIGN +#0x24 U+0024 # DOLLAR SIGN +#0x25 U+0025 # PERCENT SIGN +#0x26 U+0026 # AMPERSAND +#0x27 U+0027 # APOSTROPHE +#0x28 U+0028 # LEFT PARENTHESIS +#0x29 U+0029 # RIGHT PARENTHESIS +#0x2A U+002A # ASTERISK +#0x2B U+002B # PLUS SIGN +#0x2C U+002C # COMMA +#0x2D U+002D # HYPHEN-MINUS +#0x2E U+002E # FULL STOP +#0x2F U+002F # SOLIDUS +#0x30 U+0030 # DIGIT ZERO +#0x31 U+0031 # DIGIT ONE +#0x32 U+0032 # DIGIT TWO +#0x33 U+0033 # DIGIT THREE +#0x34 U+0034 # DIGIT FOUR +#0x35 U+0035 # DIGIT FIVE +#0x36 U+0036 # DIGIT SIX +#0x37 U+0037 # DIGIT SEVEN +#0x38 U+0038 # DIGIT EIGHT +#0x39 U+0039 # DIGIT NINE +#0x3A U+003A # COLON +#0x3B U+003B # SEMICOLON +#0x3C U+003C # LESS-THAN SIGN +#0x3D U+003D # EQUALS SIGN +#0x3E U+003E # GREATER-THAN SIGN +#0x3F U+003F # QUESTION MARK +#0x40 U+0040 # COMMERCIAL AT +#0x41 U+0041 # LATIN CAPITAL LETTER A +#0x42 U+0042 # LATIN CAPITAL LETTER B +#0x43 U+0043 # LATIN CAPITAL LETTER C +#0x44 U+0044 # LATIN CAPITAL LETTER D +#0x45 U+0045 # LATIN CAPITAL LETTER E +#0x46 U+0046 # LATIN CAPITAL LETTER F +#0x47 U+0047 # LATIN CAPITAL LETTER G +#0x48 U+0048 # LATIN CAPITAL LETTER H +#0x49 U+0049 # LATIN CAPITAL LETTER I +#0x4A U+004A # LATIN CAPITAL LETTER J +#0x4B U+004B # LATIN CAPITAL LETTER K +#0x4C U+004C # LATIN CAPITAL LETTER L +#0x4D U+004D # LATIN CAPITAL LETTER M +#0x4E U+004E # LATIN CAPITAL LETTER N +#0x4F U+004F # LATIN CAPITAL LETTER O +#0x50 U+0050 # LATIN CAPITAL LETTER P +#0x51 U+0051 # LATIN CAPITAL LETTER Q +#0x52 U+0052 # LATIN CAPITAL LETTER R +#0x53 U+0053 # LATIN CAPITAL LETTER S +#0x54 U+0054 # LATIN CAPITAL LETTER T +#0x55 U+0055 # LATIN CAPITAL LETTER U +#0x56 U+0056 # LATIN CAPITAL LETTER V +#0x57 U+0057 # LATIN CAPITAL LETTER W +#0x58 U+0058 # LATIN CAPITAL LETTER X +#0x59 U+0059 # LATIN CAPITAL LETTER Y +#0x5A U+005A # LATIN CAPITAL LETTER Z +#0x5B U+005B # LEFT SQUARE BRACKET +#0x5C U+005C # REVERSE SOLIDUS +#0x5D U+005D # RIGHT SQUARE BRACKET +#0x5E U+005E # CIRCUMFLEX ACCENT +#0x5F U+005F # LOW LINE +#0x60 U+0060 # GRAVE ACCENT +#0x61 U+0061 # LATIN SMALL LETTER A +#0x62 U+0062 # LATIN SMALL LETTER B +#0x63 U+0063 # LATIN SMALL LETTER C +#0x64 U+0064 # LATIN SMALL LETTER D +#0x65 U+0065 # LATIN SMALL LETTER E +#0x66 U+0066 # LATIN SMALL LETTER F +#0x67 U+0067 # LATIN SMALL LETTER G +#0x68 U+0068 # LATIN SMALL LETTER H +#0x69 U+0069 # LATIN SMALL LETTER I +#0x6A U+006A # LATIN SMALL LETTER J +#0x6B U+006B # LATIN SMALL LETTER K +#0x6C U+006C # LATIN SMALL LETTER L +#0x6D U+006D # LATIN SMALL LETTER M +#0x6E U+006E # LATIN SMALL LETTER N +#0x6F U+006F # LATIN SMALL LETTER O +#0x70 U+0070 # LATIN SMALL LETTER P +#0x71 U+0071 # LATIN SMALL LETTER Q +#0x72 U+0072 # LATIN SMALL LETTER R +#0x73 U+0073 # LATIN SMALL LETTER S +#0x74 U+0074 # LATIN SMALL LETTER T +#0x75 U+0075 # LATIN SMALL LETTER U +#0x76 U+0076 # LATIN SMALL LETTER V +#0x77 U+0077 # LATIN SMALL LETTER W +#0x78 U+0078 # LATIN SMALL LETTER X +#0x79 U+0079 # LATIN SMALL LETTER Y +#0x7A U+007A # LATIN SMALL LETTER Z +#0x7B U+007B # LEFT CURLY BRACKET +#0x7C U+007C # VERTICAL LINE +#0x7D U+007D # RIGHT CURLY BRACKET +#0x7E U+007E # TILDE +# +0x20-0x7f idem +# +0xA1 U+00A1 # inverted exclamation mark (¡) - iexcl +0xA2 U+00A2 # cent sign (¢) - cent +0xA3 U+00A3 # pound sign (£) - pound +# currency sign (¤) - curren +U+00A4:CUR +0xA5 U+00A5 # yen sign (¥) - yen +# broken vertical bar (¦) - brvbar, brkbar +U+00A6:| +0xA7 U+00A7 # section sign (§) - sect +0xA8 U+00A8 # spacing diaresis (¨) - uml, die +0xA9 U+00A9 # copyright sign (©) - copy +0xAA U+00AA # feminine ordinal indicator (ª) - ordf +0xAB U+00AB # angle quotation mark, left («) - laquo +# negation sign (¬); - not +U+00AC:NOT +# soft hyphen (­) - shy +#U+00AD +# circled R registered sign (®) - reg +U+00AE:(R) +# spacing macron (¯) - hibar, macr +U+00AF:- +0xB0 U+00B0 # degree sign (°) - deg +0xB1 U+00B1 # plus-or-minus sign (±) - plusmn +0xB2 U+00B2 # superscript 2 (²) - sup2 +0xB3 U+00B3 # superscript 3 (³) - sup3 +#spacing acute (´) - acute +U+00B4:' +0xB5 U+00B5 # micro sign (µ) - micro +0xB6 U+00B6 # paragraph sign (¶) - para +0xB7 U+00B7 # middle dot (·) - middot +# spacing cedilla (¸) - cedil +U+00B8:, +0xB9 U+00B9 # superscript 1 (¹) - sup1 +0xBA U+00BA # masculine ordinal indicator (º) - ordm +0xBB U+00BB # angle quotation mark, right (») - raquo +0xBC U+00BC # fraction 1/4 (¼) - frac14 +0xBD U+00BD # fraction 1/2 (½) - frac12 +# fraction 3/4 (¾) - frac34 +U+00BE: 3/4 +0xBF U+00BF # inverted question mark (¿) - iquest +0xC0 U+00C0 # capital A, grave accent (À) - Agrave +0xC1 U+00C1 # capital A, acute accent (Á) - Aacute +0xC2 U+00C2 # capital A, circumflex accent (Â) - Acirc +0xC3 U+00C3 # capital A, tilde (Ã) - Atilde +0xC4 U+00C4 # capital A, dieresis or umlaut mark (Ä) - Auml +0xC5 U+00C5 # capital A, ring (Å) - Aring +0xC6 U+00C6 # capital AE diphthong (ligature) (Æ) - AElig +0xC7 U+00C7 # capital C, cedilla (Ç) - Ccedil +0xC8 U+00C8 # capital E, grave accent (È) - Egrave +0xC9 U+00C9 # capital E, acute accent (É) - Eacute +0xCA U+00CA # capital E, circumflex accent (Ê) - Ecirc +0xCB U+00CB # capital E, dieresis or umlaut mark (Ë) - Euml +0xCC U+00CC # capital I, grave accent (Ì) - Igrave +0xCD U+00CD # capital I, acute accent (Í) - Iacute +0xCE U+00CE # capital I, circumflex accent (Î) - Icirc +0xCF U+00CF # capital I, dieresis or umlaut mark (Ï) - Iuml +# capital Eth, Icelandic (Ð) - ETH */ +U+00D0:DH +# Dj # capital D with stroke - Dstrok +0xD1 U+00D1 # capital N, tilde (Ñ) - Ntilde +0xD2 U+00D2 # capital O, grave accent (Ò) - Ograve +0xD3 U+00D3 # capital O, acute accent (Ó) - Oacute +0xD4 U+00D4 # capital O, circumflex accent (Ô) - Ocirc +0xD5 U+00D5 # capital O, tilde (Õ) - Otilde +0xD6 U+00D6 # capital O, dieresis or umlaut mark (Ö) - Ouml +# multiplication sign (×) - times +U+00D7:* +0xD8 U+00D8 # capital O, slash (Ø) - Oslash +0xD9 U+00D9 # capital U, grave accent (Ù) - Ugrave +0xDA U+00DA # capital U, acute accent (Ú) - Uacute +0xDB U+00DB # capital U, circumflex accent (Û) - Ucirc +0xDC U+00DC # capital U, dieresis or umlaut mark (Ü) - Uuml +0xDD U+00DD # capital Y, acute accent (Ý) - Yacute +# capital THORN, Icelandic (Þ) - THORN */ +U+00DE:P +0xDF U+00DF # small sharp s, German (sz ligature) (ß) - szlig +0xE0 U+00E0 # small a, grave accent (à) - agrave +0xE1 U+00E1 # small a, acute accent (á) - aacute +0xE2 U+00E2 # small a, circumflex accent (â) - acirc +0xE3 U+00E3 # small a, tilde (ã) - atilde +0xE4 U+00E4 # small a, dieresis or umlaut mark (ä) - auml +0xE5 U+00E5 # small a, ring (å) - aring +0xE6 U+00E6 # small ae diphthong (ligature) (æ) - aelig +0xE7 U+00E7 # small c, cedilla (ç) - ccedil +0xE8 U+00E8 # small e, grave accent (è) - egrave +0xE9 U+00E9 # small e, acute accent (é) - eacute +0xEA U+00EA # small e, circumflex accent (ê) - ecirc +0xEB U+00EB # small e, dieresis or umlaut mark (ë) - euml +0xEC U+00EC # small i, grave accent (ì) - igrave +0xED U+00ED # small i, acute accent (í) - iacute +0xEE U+00EE # small i, circumflex accent (î) - icirc +0xEF U+00EF # small i, dieresis or umlaut mark (ï) - iuml +# small eth, Icelandic (ð) - eth +U+00F0:dh +0xF1 U+00F1 # small n, tilde (ñ) - ntilde +0xF2 U+00F2 # small o, grave accent (ò) - ograve +0xF3 U+00F3 # small o, acute accent (ó) - oacute +0xF4 U+00F4 # small o, circumflex accent (ô) - ocirc +0xF5 U+00F5 # small o, tilde (õ) - otilde +0xF6 U+00F6 # small o, dieresis or umlaut mark (ö) - ouml +# division sign (÷) - divide +U+00F7:/ +0xF8 U+00F8 # small o, slash (ø) - oslash +0xF9 U+00F9 # small u, grave accent (ù) - ugrave +0xFA U+00FA # small u, acute accent (ú) - uacute +0xFB U+00FB # small u, circumflex accent (û) - ucirc +0xFC U+00FC # small u, dieresis or umlaut mark (ü) - uuml +0xFD U+00FF # small y, dieresis or umlaut mark (ÿ) - yuml +# small y, acute accent (ý) - yacute +U+00FD:y' +# small thorn, Icelandic (þ) - thorn +U+00FE:p +# +# TRADE MARK SIGN +U+2122:(TM) diff --git a/src/chrtrans/iso01_uni.tbl b/src/chrtrans/iso01_uni.tbl index d2147771..d2701f0e 100644 --- a/src/chrtrans/iso01_uni.tbl +++ b/src/chrtrans/iso01_uni.tbl @@ -75,4 +75,4 @@ U+2122:(TM) 0x27 U+2019-U+201b # various single quotation marks 0x22 U+201c-U+201f # various double quotation marks -U+2297 "(\327)" \ No newline at end of file +U+2297 "(\327)" diff --git a/src/chrtrans/iso01_uni.tbl.orig b/src/chrtrans/iso01_uni.tbl.orig deleted file mode 100644 index 14f71ff3..00000000 --- a/src/chrtrans/iso01_uni.tbl.orig +++ /dev/null @@ -1,78 +0,0 @@ -# -# Unicode mapping table for ISO 8859-1 fonts iso01.* -# [use: unicode_start iso01.f16 iso01] -# -#Shall this become the "default" translation? -#Meaning of that is currently not well defined. It is different -#from the default input or default output charset... -#but there has to be exactly one table marked as "default". -D0 -# -#The MIME name of this charset. -Miso-8859-1 - -#Name as a Display Charset (used on Options screen) -OISO Latin 1 - -0x20 U+0020 U+1360 -0x21-0x62 idem -# The following line is an example for mapping several accented versions -# of small letter 'c' to 'c': -0x63 U+0063 U+0107 U+0109 U+010B U+010D -0x64-0x7e idem -0xa0-0xff idem -#0x00 U+fffd # don't let failed char lookups return '\0' -# Mappings of C0 control chars from original, disabled -#0x01 U+263A -#0x02 U+263B -#0x03 U+2665 -#0x04 U+2666 -#0x05 U+2663 -#0x06 U+2660 -#0x07 U+2022 -#0x08 U+25D8 -#0x09 U+25CB -#0x0A U+25D9 -#0x0B U+2642 -#0x0C U+2640 -#0x0D U+266A -#0x0E U+266B -#0x0E U+266C -#0x0F U+263C -#0x10 U+25B6 -#0x10 U+25BA -#0x11 U+25C0 -#0x11 U+25C4 -#0x12 U+2195 -#0x13 U+203C -#0x14 U+00B6 -#0x15 U+00A7 -#0x16 U+25AC -#0x17 U+21A8 -#0x18 U+2191 -#0x19 U+2193 -#0x1A U+2192 -#0x1B U+2190 -#0x1C U+221F -#0x1C U+2319 -#0x1D U+2194 -#0x1E U+25B2 -#0x1F U+25BC -#0x7f U+2302 - -0xd0 U+0110 # Dstrok and ETH are nearly the same... - -# Dont wanna see these: -# POP DIRECTIONAL FORMATTING 202C -U+202c: -# LEFT-TO-RIGHT OVERRIDE 202D -U+202d: - -# TRADE MARK SIGN: -U+2122:(TM) - -0x60 U+2018 # left single quotation mark -0x27 U+2019-U+201b # various single quotation marks -0x22 U+201c-U+201f # various double quotation marks - -U+2297:(×) \ No newline at end of file diff --git a/src/chrtrans/iso06_uni.tbl b/src/chrtrans/iso06_uni.tbl index 46eb3709..fd3452da 100644 --- a/src/chrtrans/iso06_uni.tbl +++ b/src/chrtrans/iso06_uni.tbl @@ -109,4 +109,5 @@ U+2122:(TM) # Let's try to show a question mark for character that cannot # be shown. U+fffd is used for invalid characters. -U+fffd:? +# It works, but let's stick with UHHH representatiion. - FM +#U+fffd:? diff --git a/src/chrtrans/iso08_uni.tbl b/src/chrtrans/iso08_uni.tbl index d1c33b1d..bc2bb647 100644 --- a/src/chrtrans/iso08_uni.tbl +++ b/src/chrtrans/iso08_uni.tbl @@ -106,4 +106,5 @@ U+2122:(TM) # Let's try to show a question mark for character that cannot # be shown. U+fffd is used for invalid characters. -U+fffd:? +# It works, but let's stick with UHHH representatiion. - FM +#U+fffd:? diff --git a/src/chrtrans/koi8r_uni.tbl b/src/chrtrans/koi8r_uni.tbl index c4946a50..ebe4fe55 100644 --- a/src/chrtrans/koi8r_uni.tbl +++ b/src/chrtrans/koi8r_uni.tbl @@ -1,5 +1,5 @@ # Options screen name for this character set -OKOI8-R character set +OKOI8-R Cyrillic # MIME name for this charset Mkoi8-r diff --git a/src/chrtrans/mac_uni.tbl b/src/chrtrans/mac_uni.tbl new file mode 100644 index 00000000..61c630f3 --- /dev/null +++ b/src/chrtrans/mac_uni.tbl @@ -0,0 +1,342 @@ +#The MIME name of this charset. +Mmacintosh + +#Name as a Display Charset (used on Options screen) +OMacintosh (8 bit) + +# +# Name: MacOS_Roman [to Unicode] +# Unicode versions: 1.1, 2.0 +# Table version: 0.2 (from internal ufrm version <9>) +# Date: 15 April 1995 +# Author: Peter Edberg <edberg1@applelink.apple.com> +# +# Copyright (c) 1995 Apple Computer, Inc. All Rights reserved. +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple makes no warranty or representation, either express or +# implied, with respect to these tables, their quality, accuracy, or +# fitness for a particular purpose. In no event will Apple be liable +# for direct, indirect, special, incidental, or consequential damages +# resulting from any defect or inaccuracy in this document or the +# accompanying tables. +# +# These mapping tables and character lists are preliminary and +# subject to change. Updated tables will be available from the +# Unicode Inc. ftp site (unicode.org), the Apple Computer ftp site +# (ftp.info.apple.com), the Apple Computer World-Wide Web pages +# (http://www.info.apple.com), and possibly on diskette from APDA +# (Apple's mail-order distribution service for developers). +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the MacOS Roman code (in hex as 0xNN) +# Column #2 is the Unicode (in hex as 0xNNNN) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in MacOS Roman code order. +# +# One of these mappings requires the use of a corporate character +# (for the Apple logo character). See the file "MacOS-CorpCharList". +# NOTE: The graphic image associated with the Apple logo character +# is not authorized for use without permission of Apple, and +# unauthorized use might constitute trademark infringement. +# +# Notes on MacOS Roman: +# --------------------- +# +# This character set is used for at least the following MacOS +# localizations: U.S., British, Canadian French, French, Swiss +# French, German, Swiss German, Italian, Swiss Italian, Dutch, +# Swedish, Norwegian, Danish, Finnish, Spanish, Catalan, +# Portuguese, Brazilian, and the default International system. +# +# Variants of MacRoman are used for Croatian, Icelandic, +# Turkish, and Romanian. Separate mapping tables are available +# for these encodings. +# +# At least through System 7.5, the bitmap versions of the fonts +# Chicago, New York, Geneva, and Monaco do not implement the +# full Roman character set; they only support MacOS Roman character +# codes up to 0xD8. The TrueType versions of these fonts do +# implement the full character set, as do both the bitmap and +# TrueType versions of the other standard Roman fonts. +# +# In all MacOS encodings, fonts such as Chicago which are used +# as "system" fonts (for menus, dialogs, etc.) have four glyphs +# at code points 0x11-0x14 for transient use by the Menu Manager. +# These glyphs are not intended as characters for use in normal +# text, and the associated code points are not generally +# interpreted as associated with these glyphs; they are usually +# interpreted (if at all) as the control codes DC1-DC4. +# +################## + +#0x20 U+0020 # SPACE +#0x21 U+0021 # EXCLAMATION MARK +#0x22 U+0022 # QUOTATION MARK +#0x23 U+0023 # NUMBER SIGN +#0x24 U+0024 # DOLLAR SIGN +#0x25 U+0025 # PERCENT SIGN +#0x26 U+0026 # AMPERSAND +#0x27 U+0027 # APOSTROPHE +#0x28 U+0028 # LEFT PARENTHESIS +#0x29 U+0029 # RIGHT PARENTHESIS +#0x2A U+002A # ASTERISK +#0x2B U+002B # PLUS SIGN +#0x2C U+002C # COMMA +#0x2D U+002D # HYPHEN-MINUS +#0x2E U+002E # FULL STOP +#0x2F U+002F # SOLIDUS +#0x30 U+0030 # DIGIT ZERO +#0x31 U+0031 # DIGIT ONE +#0x32 U+0032 # DIGIT TWO +#0x33 U+0033 # DIGIT THREE +#0x34 U+0034 # DIGIT FOUR +#0x35 U+0035 # DIGIT FIVE +#0x36 U+0036 # DIGIT SIX +#0x37 U+0037 # DIGIT SEVEN +#0x38 U+0038 # DIGIT EIGHT +#0x39 U+0039 # DIGIT NINE +#0x3A U+003A # COLON +#0x3B U+003B # SEMICOLON +#0x3C U+003C # LESS-THAN SIGN +#0x3D U+003D # EQUALS SIGN +#0x3E U+003E # GREATER-THAN SIGN +#0x3F U+003F # QUESTION MARK +#0x40 U+0040 # COMMERCIAL AT +#0x41 U+0041 # LATIN CAPITAL LETTER A +#0x42 U+0042 # LATIN CAPITAL LETTER B +#0x43 U+0043 # LATIN CAPITAL LETTER C +#0x44 U+0044 # LATIN CAPITAL LETTER D +#0x45 U+0045 # LATIN CAPITAL LETTER E +#0x46 U+0046 # LATIN CAPITAL LETTER F +#0x47 U+0047 # LATIN CAPITAL LETTER G +#0x48 U+0048 # LATIN CAPITAL LETTER H +#0x49 U+0049 # LATIN CAPITAL LETTER I +#0x4A U+004A # LATIN CAPITAL LETTER J +#0x4B U+004B # LATIN CAPITAL LETTER K +#0x4C U+004C # LATIN CAPITAL LETTER L +#0x4D U+004D # LATIN CAPITAL LETTER M +#0x4E U+004E # LATIN CAPITAL LETTER N +#0x4F U+004F # LATIN CAPITAL LETTER O +#0x50 U+0050 # LATIN CAPITAL LETTER P +#0x51 U+0051 # LATIN CAPITAL LETTER Q +#0x52 U+0052 # LATIN CAPITAL LETTER R +#0x53 U+0053 # LATIN CAPITAL LETTER S +#0x54 U+0054 # LATIN CAPITAL LETTER T +#0x55 U+0055 # LATIN CAPITAL LETTER U +#0x56 U+0056 # LATIN CAPITAL LETTER V +#0x57 U+0057 # LATIN CAPITAL LETTER W +#0x58 U+0058 # LATIN CAPITAL LETTER X +#0x59 U+0059 # LATIN CAPITAL LETTER Y +#0x5A U+005A # LATIN CAPITAL LETTER Z +#0x5B U+005B # LEFT SQUARE BRACKET +#0x5C U+005C # REVERSE SOLIDUS +#0x5D U+005D # RIGHT SQUARE BRACKET +#0x5E U+005E # CIRCUMFLEX ACCENT +#0x5F U+005F # LOW LINE +#0x60 U+0060 # GRAVE ACCENT +#0x61 U+0061 # LATIN SMALL LETTER A +#0x62 U+0062 # LATIN SMALL LETTER B +#0x63 U+0063 # LATIN SMALL LETTER C +#0x64 U+0064 # LATIN SMALL LETTER D +#0x65 U+0065 # LATIN SMALL LETTER E +#0x66 U+0066 # LATIN SMALL LETTER F +#0x67 U+0067 # LATIN SMALL LETTER G +#0x68 U+0068 # LATIN SMALL LETTER H +#0x69 U+0069 # LATIN SMALL LETTER I +#0x6A U+006A # LATIN SMALL LETTER J +#0x6B U+006B # LATIN SMALL LETTER K +#0x6C U+006C # LATIN SMALL LETTER L +#0x6D U+006D # LATIN SMALL LETTER M +#0x6E U+006E # LATIN SMALL LETTER N +#0x6F U+006F # LATIN SMALL LETTER O +#0x70 U+0070 # LATIN SMALL LETTER P +#0x71 U+0071 # LATIN SMALL LETTER Q +#0x72 U+0072 # LATIN SMALL LETTER R +#0x73 U+0073 # LATIN SMALL LETTER S +#0x74 U+0074 # LATIN SMALL LETTER T +#0x75 U+0075 # LATIN SMALL LETTER U +#0x76 U+0076 # LATIN SMALL LETTER V +#0x77 U+0077 # LATIN SMALL LETTER W +#0x78 U+0078 # LATIN SMALL LETTER X +#0x79 U+0079 # LATIN SMALL LETTER Y +#0x7A U+007A # LATIN SMALL LETTER Z +#0x7B U+007B # LEFT CURLY BRACKET +#0x7C U+007C # VERTICAL LINE +#0x7D U+007D # RIGHT CURLY BRACKET +#0x7E U+007E # TILDE +# +0x20-0x7f idem +# +0x80 U+00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0x81 U+00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0x82 U+00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0x83 U+00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0x84 U+00D1 # LATIN CAPITAL LETTER N WITH TILDE +0x85 U+00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0x86 U+00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0x87 U+00E1 # LATIN SMALL LETTER A WITH ACUTE +0x88 U+00E0 # LATIN SMALL LETTER A WITH GRAVE +0x89 U+00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0x8A U+00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0x8B U+00E3 # LATIN SMALL LETTER A WITH TILDE +0x8C U+00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0x8D U+00E7 # LATIN SMALL LETTER C WITH CEDILLA +0x8E U+00E9 # LATIN SMALL LETTER E WITH ACUTE +0x8F U+00E8 # LATIN SMALL LETTER E WITH GRAVE +0x90 U+00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX +0x91 U+00EB # LATIN SMALL LETTER E WITH DIAERESIS +0x92 U+00ED # LATIN SMALL LETTER I WITH ACUTE +0x93 U+00EC # LATIN SMALL LETTER I WITH GRAVE +0x94 U+00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0x95 U+00EF # LATIN SMALL LETTER I WITH DIAERESIS +0x96 U+00F1 # LATIN SMALL LETTER N WITH TILDE +0x97 U+00F3 # LATIN SMALL LETTER O WITH ACUTE +0x98 U+00F2 # LATIN SMALL LETTER O WITH GRAVE +0x99 U+00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0x9A U+00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0x9B U+00F5 # LATIN SMALL LETTER O WITH TILDE +0x9C U+00FA # LATIN SMALL LETTER U WITH ACUTE +0x9D U+00F9 # LATIN SMALL LETTER U WITH GRAVE +0x9E U+00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0x9F U+00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xA0 U+2020 # DAGGER +0xA1 U+00B0 # DEGREE SIGN +0xA2 U+00A2 # CENT SIGN +0xA3 U+00A3 # POUND SIGN +0xA4 U+00A7 # SECTION SIGN +0xA5 U+2022 # BULLET +0xA6 U+00B6 # PILCROW SIGN +0xA7 U+00DF # LATIN SMALL LETTER SHARP S +0xA8 U+00AE # REGISTERED SIGN +0xA9 U+00A9 # COPYRIGHT SIGN +0xAA U+2122 # TRADE MARK SIGN +0xAB U+00B4 # ACUTE ACCENT +0xAC U+00A8 # DIAERESIS +0xAD U+2260 # NOT EQUAL TO +0xAE U+00C6 # LATIN CAPITAL LIGATURE AE +0xAF U+00D8 # LATIN CAPITAL LETTER O WITH STROKE +0xB0 U+221E # INFINITY +0xB1 U+00B1 # PLUS-MINUS SIGN +0xB2 U+2264 # LESS-THAN OR EQUAL TO +0xB3 U+2265 # GREATER-THAN OR EQUAL TO +0xB4 U+00A5 # YEN SIGN +0xB5 U+00B5 # MICRO SIGN +0xB6 U+2202 # PARTIAL DIFFERENTIAL +0xB7 U+2211 # N-ARY SUMMATION +0xB8 U+220F # N-ARY PRODUCT +0xB9 U+03C0 # GREEK SMALL LETTER PI +0xBA U+222B # INTEGRAL +0xBB U+00AA # FEMININE ORDINAL INDICATOR +0xBC U+00BA # MASCULINE ORDINAL INDICATOR +0xBD U+2126 # OHM SIGN +0xBE U+00E6 # LATIN SMALL LIGATURE AE +0xBF U+00F8 # LATIN SMALL LETTER O WITH STROKE +0xC0 U+00BF # INVERTED QUESTION MARK +0xC1 U+00A1 # INVERTED EXCLAMATION MARK +0xC2 U+00AC # NOT SIGN +0xC3 U+221A # SQUARE ROOT +0xC4 U+0192 # LATIN SMALL LETTER F WITH HOOK +0xC5 U+2248 # ALMOST EQUAL TO +0xC6 U+2206 # INCREMENT +0xC7 U+00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xC8 U+00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xC9 U+2026 # HORIZONTAL ELLIPSIS +0xCA U+00A0 # NO-BREAK SPACE +0xCB U+00C0 # LATIN CAPITAL LETTER A WITH GRAVE +0xCC U+00C3 # LATIN CAPITAL LETTER A WITH TILDE +0xCD U+00D5 # LATIN CAPITAL LETTER O WITH TILDE +0xCE U+0152 # LATIN CAPITAL LIGATURE OE +0xCF U+0153 # LATIN SMALL LIGATURE OE +0xD0 U+2013 # EN DASH +0xD1 U+2014 # EM DASH +0xD2 U+201C # LEFT DOUBLE QUOTATION MARK +0xD3 U+201D # RIGHT DOUBLE QUOTATION MARK +0xD4 U+2018 # LEFT SINGLE QUOTATION MARK +0xD5 U+2019 # RIGHT SINGLE QUOTATION MARK +0xD6 U+00F7 # DIVISION SIGN +0xD7 U+25CA # LOZENGE +0xD8 U+00FF # LATIN SMALL LETTER Y WITH DIAERESIS +0xD9 U+0178 # LATIN CAPITAL LETTER Y WITH DIAERESIS +0xDA U+2044 # FRACTION SLASH +0xDB U+00A4 # CURRENCY SIGN +0xDC U+2039 # SINGLE LEFT-POINTING ANGLE QUOTATION MARK +0xDD U+203A # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +0xDE U+FB01 # LATIN SMALL LIGATURE FI +0xDF U+FB02 # LATIN SMALL LIGATURE FL +0xE0 U+2021 # DOUBLE DAGGER +0xE1 U+00B7 # MIDDLE DOT +0xE2 U+201A # SINGLE LOW-9 QUOTATION MARK +0xE3 U+201E # DOUBLE LOW-9 QUOTATION MARK +0xE4 U+2030 # PER MILLE SIGN +0xE5 U+00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xE6 U+00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xE7 U+00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xE8 U+00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xE9 U+00C8 # LATIN CAPITAL LETTER E WITH GRAVE +0xEA U+00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xEB U+00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xEC U+00CF # LATIN CAPITAL LETTER I WITH DIAERESIS +0xED U+00CC # LATIN CAPITAL LETTER I WITH GRAVE +0xEE U+00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xEF U+00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xF0 U+F8FF # Apple logo +0xF1 U+00D2 # LATIN CAPITAL LETTER O WITH GRAVE +0xF2 U+00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xF3 U+00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xF4 U+00D9 # LATIN CAPITAL LETTER U WITH GRAVE +0xF5 U+0131 # LATIN SMALL LETTER DOTLESS I +0xF6 U+02C6 # MODIFIER LETTER CIRCUMFLEX ACCENT +0xF7 U+02DC # SMALL TILDE +0xF8 U+00AF # MACRON +0xF9 U+02D8 # BREVE +0xFA U+02D9 # DOT ABOVE +0xFB U+02DA # RING ABOVE +0xFC U+00B8 # CEDILLA +0xFD U+02DD # DOUBLE ACUTE ACCENT +0xFE U+02DB # OGONEK +0xFF U+02C7 # CARON +# +# broken vertical bar (¦) - brvbar, brkbar +U+00A6:| +# superscript 3 (³) - sup3 +U+00B3:^3 +# superscript 2 (²) - sup2 +U+00B2:^2 +# superscript 1 (¹) - sup1 +U+00B9:^1 +# fraction 1/4 (¼) - frac14 +U+00BC: 1/4 +# fraction 1/2 (½) - frac12 +U+00BD: 1/2 +# fraction 3/4 (¾) - frac34 +U+00BE: 3/4 +# capital Eth, Icelandic (Ð) - ETH +U+00D0:DH +# Dj # capital D with stroke - Dstrok +# capital Y, acute accent (Ý) - Yacute +U+00DD:Y' +# capital THORN, Icelandic (Þ) - THORN +U+00DE:P +# multiplication sign (×) - times +U+00D7:* +# small eth, Icelandic (ð) - eth +U+00F0:dh +# small y, acute accent (ý) - yacute +U+00FD:y' +# small thorn, Icelandic (þ) - thorn +U+00FE:p +# +# TRADE MARK SIGN +U+2122:(TM) diff --git a/src/chrtrans/makefile.in b/src/chrtrans/makefile.in index 0122ecf1..cbb21618 100644 --- a/src/chrtrans/makefile.in +++ b/src/chrtrans/makefile.in @@ -9,6 +9,7 @@ SHELL = /bin/sh prefix = @prefix@ exec_prefix = @exec_prefix@ +top_srcdir = @top_srcdir@ srcdir = @srcdir@ VPATH = $(srcdir) @@ -18,7 +19,11 @@ SITE_DEFS = # FIXME: set in parent makefile CC = @CC@ CPP = @CPP@ CFLAGS = @CFLAGS@ -CPP_OPTS = @DEFS@ @CPPFLAGS@ -I.. -I../.. -I../../$(WWWINC) $(SITE_DEFS) +CPP_OPTS = @DEFS@ @CPPFLAGS@ \ + -I../.. \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/$(WWWINC) \ + $(SITE_DEFS) CC_OPTS = $(CPP_OPTS) $(CFLAGS) # @@ -33,9 +38,28 @@ FONTMAP_INC = iso01_uni.h# default, if not set by recursive call CHRTR= -TABLES= $(CHRTR)iso02_uni.h \ - $(CHRTR)iso01_uni.h \ +TABLES= $(CHRTR)iso01_uni.h \ $(CHRTR)def7_uni.h \ + $(CHRTR)iso02_uni.h \ + $(CHRTR)cp1252_uni.h \ + $(CHRTR)dmcs_uni.h \ + $(CHRTR)mac_uni.h \ + $(CHRTR)next_uni.h \ + $(CHRTR)koi8r_uni.h \ + $(CHRTR)viscii_uni.h \ + $(CHRTR)cp437_uni.h \ + $(CHRTR)cp850_uni.h \ + $(CHRTR)cp852_uni.h \ + $(CHRTR)cp866_uni.h \ + $(CHRTR)cp737_uni.h \ + $(CHRTR)cp869_uni.h \ + $(CHRTR)cp864_uni.h \ + $(CHRTR)cp862_uni.h \ + $(CHRTR)cp1250_uni.h \ + $(CHRTR)cp1251_uni.h \ + $(CHRTR)cp1253_uni.h \ + $(CHRTR)cp1255_uni.h \ + $(CHRTR)cp1256_uni.h \ $(CHRTR)iso03_uni.h \ $(CHRTR)iso04_uni.h \ $(CHRTR)iso05_uni.h \ @@ -44,15 +68,6 @@ TABLES= $(CHRTR)iso02_uni.h \ $(CHRTR)iso08_uni.h \ $(CHRTR)iso09_uni.h \ $(CHRTR)iso10_uni.h \ - $(CHRTR)koi8r_uni.h \ - $(CHRTR)cp437_uni.h \ - $(CHRTR)cp850_uni.h \ - $(CHRTR)cp852_uni.h \ - $(CHRTR)cp866_uni.h \ - $(CHRTR)cp1250_uni.h \ - $(CHRTR)cp1251_uni.h \ - $(CHRTR)cp1252_uni.h \ - $(CHRTR)viscii_uni.h \ $(CHRTR)utf8_uni.h \ $(CHRTR)rfc_suni.h \ $(CHRTR)mnemonic_suni.h \ @@ -62,8 +77,10 @@ default: $(FONTMAP_INC) tables: $(TABLES) -makeuctb: makeuctb.c UCkd.h - $(CC) $(CC_OPTS) -o makeuctb makeuctb.c +makeuctb: makeuctb.o + $(CC) $(CC_OPTS) -o makeuctb makeuctb.o + +makeuctb.o: $(srcdir)/UCkd.h $(srcdir)/makeuctb.c .SUFFIXES : .tbl .i @@ -76,34 +93,44 @@ makeuctb: makeuctb.c UCkd.h @ECHO_CC@$(CPP) -C $(CPP_OPTS) $*.c >$@ .tbl.h: - ./makeuctb $*.tbl > $@ + ./makeuctb $(srcdir)/$*.tbl > $@ # table files listed here once again to get the make dependencies # right, in case makeuctb was recompiled. -iso01_uni.h: iso01_uni.tbl makeuctb -iso02_uni.h: iso02_uni.tbl makeuctb -def7_uni.h: def7_uni.tbl makeuctb -iso03_uni.h: iso03_uni.tbl makeuctb -iso04_uni.h: iso04_uni.tbl makeuctb -iso05_uni.h: iso05_uni.tbl makeuctb -iso06_uni.h: iso06_uni.tbl makeuctb -iso07_uni.h: iso07_uni.tbl makeuctb -iso08_uni.h: iso08_uni.tbl makeuctb -iso09_uni.h: iso09_uni.tbl makeuctb -iso10_uni.h: iso10_uni.tbl makeuctb -koi8r_uni.h: koi8r_uni.tbl makeuctb -cp437_uni.h: cp437_uni.tbl makeuctb -cp850_uni.h: cp850_uni.tbl makeuctb -cp852_uni.h: cp852_uni.tbl makeuctb -cp852_uni.h: cp866_uni.tbl makeuctb -cp1250_uni.h: cp1250_uni.tbl makeuctb -cp1251_uni.h: cp1251_uni.tbl makeuctb -cp1252_uni.h: cp1252_uni.tbl makeuctb -viscii_uni.h: viscii_uni.tbl makeuctb -utf8_uni.h: utf8_uni.tbl makeuctb -mnemonic_suni.h: mnemonic_suni.tbl makeuctb -mnem_suni.h: mnem_suni.tbl makeuctb -rfc_suni.h: rfc_suni.tbl makeuctb +def7_uni.h: $(srcdir)/def7_uni.tbl makeuctb +iso01_uni.h: $(srcdir)/iso01_uni.tbl makeuctb +iso02_uni.h: $(srcdir)/iso02_uni.tbl makeuctb +cp1252_uni.h: $(srcdir)/cp1252_uni.tbl makeuctb +dmcs_uni.h: $(srcdir)/dmcs_uni.tbl makeuctb +mac_uni.h: $(srcdir)/mac_uni.tbl makeuctb +next_uni.h: $(srcdir)/next_uni.tbl makeuctb +koi8r_uni.h: $(srcdir)/koi8r_uni.tbl makeuctb +viscii_uni.h: $(srcdir)/viscii_uni.tbl makeuctb +cp437_uni.h: $(srcdir)/cp437_uni.tbl makeuctb +cp850_uni.h: $(srcdir)/cp850_uni.tbl makeuctb +cp852_uni.h: $(srcdir)/cp852_uni.tbl makeuctb +cp866_uni.h: $(srcdir)/cp866_uni.tbl makeuctb +cp737_uni.h: $(srcdir)/cp737_uni.tbl makeuctb +cp869_uni.h: $(srcdir)/cp869_uni.tbl makeuctb +cp864_uni.h: $(srcdir)/cp864_uni.tbl makeuctb +cp862_uni.h: $(srcdir)/cp862_uni.tbl makeuctb +cp1250_uni.h: $(srcdir)/cp1250_uni.tbl makeuctb +cp1251_uni.h: $(srcdir)/cp1251_uni.tbl makeuctb +cp1253_uni.h: $(srcdir)/cp1253_uni.tbl makeuctb +cp1255_uni.h: $(srcdir)/cp1255_uni.tbl makeuctb +cp1256_uni.h: $(srcdir)/cp1256_uni.tbl makeuctb +iso03_uni.h: $(srcdir)/iso03_uni.tbl makeuctb +iso04_uni.h: $(srcdir)/iso04_uni.tbl makeuctb +iso05_uni.h: $(srcdir)/iso05_uni.tbl makeuctb +iso06_uni.h: $(srcdir)/iso06_uni.tbl makeuctb +iso07_uni.h: $(srcdir)/iso07_uni.tbl makeuctb +iso08_uni.h: $(srcdir)/iso08_uni.tbl makeuctb +iso09_uni.h: $(srcdir)/iso09_uni.tbl makeuctb +iso10_uni.h: $(srcdir)/iso10_uni.tbl makeuctb +utf8_uni.h: $(srcdir)/utf8_uni.tbl makeuctb +mnemonic_suni.h: $(srcdir)/mnemonic_suni.tbl makeuctb +mnem_suni.h: $(srcdir)/mnem_suni.tbl makeuctb +rfc_suni.h: $(srcdir)/rfc_suni.tbl makeuctb clean: rm -f makeuctb *.o *uni.h diff --git a/src/chrtrans/makeuctb.c b/src/chrtrans/makeuctb.c index ad95c534..f1417cb7 100644 --- a/src/chrtrans/makeuctb.c +++ b/src/chrtrans/makeuctb.c @@ -256,7 +256,8 @@ PUBLIC int main ARGS2( if ((p = strchr(buffer, '\n')) != NULL) { *p = '\0'; } else { - fprintf(stderr, "%s: Warning: line too long or incomplete\n", + fprintf(stderr, + "%s: Warning: line too long or incomplete.\n", tblname); } @@ -276,7 +277,7 @@ PUBLIC int main ARGS2( * and <unicode> ::= U+<h><h><h><h> * and <h> ::= <hexadecimal digit> * and <replace> any string not containing '\n' or '\0' - * and <C replace> any string with C backslash escapes + * and <C replace> any string with C backslash escapes. */ p = buffer; while (*p == ' ' || *p == '\t') { @@ -416,19 +417,20 @@ PUBLIC int main ARGS2( continue; } - tbuf = (char *) malloc (4*strlen(p)); + tbuf = (char *)malloc(4*strlen(p)); + if (!(p1 = tbuf)) { fprintf(stderr, "%s: Out of memory\n", tblname); exit(EX_DATAERR); } if (*p == '"') { /* - * handle "<C replace>" + * Handle "<C replace>". * Copy chars verbatim until first '"' not \-escaped or - * end of buffer + * end of buffer. */ int escaped = 0; - for (ch = *++p; (ch = *p) != '\0'; p++) { + for (ch = *(++p); (ch = *p) != '\0'; p++) { if (escaped) { escaped = 0; } else if (ch == '"') { @@ -444,12 +446,17 @@ PUBLIC int main ARGS2( if (escaped) *p1++ = '\n'; } - } else { /* we had ':' */ - for (ch = *++p; (ch = *p) != '\0'; p++, p1++) { + } else { + /* + * We had ':'. + */ + for (ch = *(++p); (ch = *p) != '\0'; p++, p1++) { if ((unsigned char)ch < 32 || ch == '\\' || ch == '\"' || (unsigned char)ch >= 127) { sprintf(p1, "\\%.3o", (unsigned char)ch); -/* fprintf(stderr, "%s\n", tbuf); */ +#ifdef NOTDEFINED + fprintf(stderr, "%s\n", tbuf); +#endif /* NOTDEFINED */ p1 += 3; } else { *p1 = ch; @@ -458,16 +465,19 @@ PUBLIC int main ARGS2( } *p1 = '\0'; for (i = un0; i <= un1; i++) { -/* printf("U+0x%x:%s\n", i, tbuf); */ +#ifdef NOTDEFINED + printf("U+0x%x:%s\n", i, tbuf); */ +#endif /* NOTDEFINED */ addpair_str(tbuf,i); } continue; } -/* Input line (after skipping spaces) doesn't start with one - of the specially recognized characters, so try to interpret - it as starting with a fontpos. -*/ + /* + * Input line (after skipping spaces) doesn't start with one + * of the specially recognized characters, so try to interpret + * it as starting with a fontpos. + */ fp0 = strtol(p, &p1, 0); if (p1 == p) { fprintf(stderr, "Bad input line: %s\n", buffer); diff --git a/src/chrtrans/next_uni.tbl b/src/chrtrans/next_uni.tbl new file mode 100644 index 00000000..95dbff8b --- /dev/null +++ b/src/chrtrans/next_uni.tbl @@ -0,0 +1,182 @@ +#The MIME name of this charset. +MIMEname x-next + +#Name as a Display Charset (used on Options screen) +ONeXT character set + +# Name: NextStep Encoding to Unicode +# Unicode version: 1.1 +# Table version: 0.1 +# Table format: Format A +# Date: 14 February 1995 +# Authors: Rick McGowan (rick@unicode.org) +# +# Copyright (c) 1991-1995 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on magnetic media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Recipient is granted the right to make copies in any form for +# internal distribution and to freely use the information supplied +# in the creation of products supporting Unicode. Unicode, Inc. +# specifically excludes the right to re-distribute this file directly +# to third parties or other organizations whether for profit or not. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# NextStep Encoding characters map into Unicode. Since the first +# 128 characters (0x0 - 0x7f) are identical to ASCII and Unicode, +# this table only maps the NextStep range from 0x80 - 0xFF. +# +# Format: Three tab-separated columns +# Column #1 is the NextStep code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 NextStep name, Unicode name (follows a comment sign, '#') +# +# The entries are in NextStep order +# +# Any comments or problems, contact rick@unicode.org +# +# +0x20-0x7f idem +# +0x80 U+00a0 # NO-BREAK SPACE +0x81 U+00c0 # LATIN CAPITAL LETTER A WITH GRAVE +0x82 U+00c1 # LATIN CAPITAL LETTER A WITH ACUTE +0x83 U+00c2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0x84 U+00c3 # LATIN CAPITAL LETTER A WITH TILDE +0x85 U+00c4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0x86 U+00c5 # LATIN CAPITAL LETTER A WITH RING +0x87 U+00c7 # LATIN CAPITAL LETTER C WITH CEDILLA +0x88 U+00c8 # LATIN CAPITAL LETTER E WITH GRAVE +0x89 U+00c9 # LATIN CAPITAL LETTER E WITH ACUTE +0x8a U+00ca # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0x8b U+00cb # LATIN CAPITAL LETTER E WITH DIAERESIS +0x8c U+00cc # LATIN CAPITAL LETTER I WITH GRAVE +0x8d U+00cd # LATIN CAPITAL LETTER I WITH ACUTE +0x8e U+00ce # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0x8f U+00cf # LATIN CAPITAL LETTER I WITH DIAERESIS +0x90 U+00d0 # LATIN CAPITAL LETTER ETH +0x91 U+00d1 # LATIN CAPITAL LETTER N WITH TILDE +0x92 U+00d2 # LATIN CAPITAL LETTER O WITH GRAVE +0x93 U+00d3 # LATIN CAPITAL LETTER O WITH ACUTE +0x94 U+00d4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0x95 U+00d5 # LATIN CAPITAL LETTER O WITH TILDE +0x96 U+00d6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0x97 U+00d9 # LATIN CAPITAL LETTER U WITH GRAVE +0x98 U+00da # LATIN CAPITAL LETTER U WITH ACUTE +0x99 U+00db # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0x9a U+00dc # LATIN CAPITAL LETTER U WITH DIAERESIS +0x9b U+00dd # LATIN CAPITAL LETTER Y WITH ACUTE +0x9c U+00de # LATIN CAPITAL LETTER THORN +0x9d U+00b5 # MICRO SIGN +0x9e U+00d7 # MULTIPLICATION SIGN +0x9f U+00f7 # DIVISION SIGN +0xa0 U+00a9 # COPYRIGHT SIGN +0xa1 U+00a1 # INVERTED EXCLAMATION MARK +0xa2 U+00a2 # CENT SIGN +0xa3 U+00a3 # POUND SIGN +0xa4 U+2044 # FRACTION SLASH +0xa5 U+00a5 # YEN SIGN +0xa6 U+0192 # LATIN SMALL LETTER F WITH HOOK +0xa7 U+00a7 # SECTION SIGN +0xa8 U+00a4 # CURRENCY SIGN +0xa9 U+2019 # RIGHT SINGLE QUOTATION MARK +0xaa U+201c # LEFT DOUBLE QUOTATION MARK +0xab U+00ab # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xac U+2039 # LATIN SMALL LETTER +0xad U+203a # LATIN SMALL LETTER +0xae U+fb01 # LATIN SMALL LIGATURE FI +0xaf U+fb02 # LATIN SMALL LIGATURE FL +0xb0 U+00ae # REGISTERED SIGN +0xb1 U+2013 # EN DASH +0xb2 U+2020 # DAGGER +0xb3 U+2021 # DOUBLE DAGGER +0xb4 U+00b7 # MIDDLE DOT +0xb5 U+00a6 # BROKEN BAR +0xb6 U+00b6 # PILCROW SIGN +0xb7 U+2022 # BULLET +0xb8 U+201a # SINGLE LOW-9 QUOTATION MARK +0xb9 U+201e # DOUBLE LOW-9 QUOTATION MARK +0xba U+201d # RIGHT DOUBLE QUOTATION MARK +0xbb U+00bb # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xbc U+2026 # HORIZONTAL ELLIPSIS +0xbd U+2030 # PER MILLE SIGN +0xbe U+00ac # NOT SIGN +0xbf U+00bf # INVERTED QUESTION MARK +0xc0 U+00b9 # SUPERSCRIPT ONE +0xc1 U+02cb # MODIFIER LETTER GRAVE ACCENT +0xc2 U+00b4 # ACUTE ACCENT +0xc3 U+02c6 # MODIFIER LETTER CIRCUMFLEX ACCENT +0xc4 U+02dc # SMALL TILDE +0xc5 U+00af # MACRON +0xc6 U+02d8 # BREVE +0xc7 U+02d9 # DOT ABOVE +0xc8 U+00a8 # DIAERESIS +0xc9 U+00b2 # SUPERSCRIPT TWO +0xca U+02da # RING ABOVE +0xcb U+00b8 # CEDILLA +0xcc U+00b3 # SUPERSCRIPT THREE +0xcd U+02dd # DOUBLE ACUTE ACCENT +0xce U+02db # OGONEK +0xcf U+02c7 # CARON +0xd0 U+2014 # EM DASH +0xd1 U+00b1 # PLUS-MINUS SIGN +0xd2 U+00bc # VULGAR FRACTION ONE QUARTER +0xd3 U+00bd # VULGAR FRACTION ONE HALF +0xd4 U+00be # VULGAR FRACTION THREE QUARTERS +0xd5 U+00e0 # LATIN SMALL LETTER A WITH GRAVE +0xd6 U+00e1 # LATIN SMALL LETTER A WITH ACUTE +0xd7 U+00e2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xd8 U+00e3 # LATIN SMALL LETTER A WITH TILDE +0xd9 U+00e4 # LATIN SMALL LETTER A WITH DIAERESIS +0xda U+00e5 # LATIN SMALL LETTER A WITH RING ABOVE +0xdb U+00e7 # LATIN SMALL LETTER C WITH CEDILLA +0xdc U+00e8 # LATIN SMALL LETTER E WITH GRAVE +0xdd U+00e9 # LATIN SMALL LETTER E WITH ACUTE +0xde U+00ea # LATIN SMALL LETTER E WITH CIRCUMFLEX +0xdf U+00eb # LATIN SMALL LETTER E WITH DIAERESIS +0xe0 U+00ec # LATIN SMALL LETTER I WITH GRAVE +0xe1 U+00c6 # LATIN CAPITAL LETTER AE +0xe2 U+00ed # LATIN SMALL LETTER I WITH ACUTE +0xe3 U+00aa # FEMININE ORDINAL INDICATOR +0xe4 U+00ee # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xe5 U+00ef # LATIN SMALL LETTER I WITH DIAERESIS +0xe6 U+00f0 # LATIN SMALL LETTER ETH +0xe7 U+00f1 # LATIN SMALL LETTER N WITH TILDE +0xe8 U+0141 # LATIN CAPITAL LETTER L WITH STROKE +0xe9 U+00d8 # LATIN CAPITAL LETTER O WITH STROKE +0xea U+0152 # LATIN CAPITAL LIGATURE OE +0xeb U+00ba # MASCULINE ORDINAL INDICATOR +0xec U+00f2 # LATIN SMALL LETTER O WITH GRAVE +0xed U+00f3 # LATIN SMALL LETTER O WITH ACUTE +0xee U+00f4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xef U+00f5 # LATIN SMALL LETTER O WITH TILDE +0xf0 U+00f6 # LATIN SMALL LETTER O WITH DIAERESIS +0xf1 U+00e6 # LATIN SMALL LETTER AE +0xf2 U+00f9 # LATIN SMALL LETTER U WITH GRAVE +0xf3 U+00fa # LATIN SMALL LETTER U WITH ACUTE +0xf4 U+00fb # LATIN SMALL LETTER U WITH CIRCUMFLEX +0xf5 U+0131 # LATIN SMALL LETTER DOTLESS I +0xf6 U+00fc # LATIN SMALL LETTER U WITH DIAERESIS +0xf7 U+00fd # LATIN SMALL LETTER Y WITH ACUTE +0xf8 U+0142 # LATIN SMALL LETTER L WITH STROKE +0xf9 U+00f8 # LATIN SMALL LETTER O WITH STROKE +0xfa U+0153 # LATIN SMALL LIGATURE OE +0xfb U+00df # LATIN SMALL LETTER SHARP S +0xfc U+00fe # LATIN SMALL LETTER THORN +0xfd U+00ff # LATIN SMALL LETTER Y WITH DIAERESIS +#0xfe U+fffd # .notdef, REPLACEMENT CHARACTER +#0xff U+fffd # .notdef, REPLACEMENT CHARACTER +# +# degree sign (°) - deg +U+00B0:DEG +# +# TRADE MARK SIGN +U+2122:(TM) diff --git a/src/chrtrans/utf8_uni.tbl b/src/chrtrans/utf8_uni.tbl index 4cc4df97..61cdb259 100644 --- a/src/chrtrans/utf8_uni.tbl +++ b/src/chrtrans/utf8_uni.tbl @@ -2,17 +2,11 @@ # This one is not really much of a "translation table", it mostly just # tells Lynx that "unicode-1-1-utf-8" is Unicode/UCS2 encoded in UTF8... # -#Shall this become the "default" translation? -#Meaning of that is currently unclear... It's different -#from the default input or defualt output charset... -#but there has to be exactly one table marked as "default". -D0 -# #The MIME name of this charset. -Municode-1-1-utf-8 +MIMEname unicode-1-1-utf-8 #Name as a Display Charset (used on Options screen) -O UNICODE UTF 8 +OptionName UNICODE UTF 8 # Some kind of raw Unicode? # Use 6 for for really "raw" 16bit UCS-2, 7 for UTF-8, ... @@ -30,4 +24,11 @@ O UNICODE UTF 8 R 7 +#Shall this become the "default" translation? +#There has to be exactly one table marked as "default". +Default NO + +# Don't fall back to default table for unicode -> 8bit +Fallback NO + 0x20-0x7f idem |