about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorThomas E. Dickey <dickey@invisible-island.net>1998-03-04 19:00:00 -0500
committerThomas E. Dickey <dickey@invisible-island.net>1998-03-04 19:00:00 -0500
commite9b52cbfe84bc9e13568e784836c9e0b4b1e0913 (patch)
treec3174e1d8d535e0e82e22dfad2bb803bef288cdd /src
parent349da2fb30fd6d2be4bd47a95fee9915b50f6d67 (diff)
downloadlynx-snapshots-e9b52cbfe84bc9e13568e784836c9e0b4b1e0913.tar.gz
snapshot of project "lynx", label v2-8pre_3
Diffstat (limited to 'src')
-rw-r--r--src/HTML.c2
-rw-r--r--src/HTML.h3
-rw-r--r--src/LYCharUtils.c52
-rw-r--r--src/chrtrans/README.format9
-rw-r--r--src/chrtrans/cp1251_uni.tbl2
-rw-r--r--src/chrtrans/cp437_uni.tbl130
-rw-r--r--src/chrtrans/cp850_uni.tbl66
-rw-r--r--src/chrtrans/def7_uni.tbl94
8 files changed, 130 insertions, 228 deletions
diff --git a/src/HTML.c b/src/HTML.c
index 8d5c705b..14a11eff 100644
--- a/src/HTML.c
+++ b/src/HTML.c
@@ -6334,7 +6334,7 @@ End_Object:
 	     *	SGML unescape any character references in TEXTAREA
 	     *	content, then parse it into individual lines
 	     *	to be handled as a series of INPUT fields (ugh!).
-	     *	Any raw 8-bit or multibye characters already have been
+	     *	Any raw 8-bit or multibyte characters already have been
 	     *	handled in relation to the display character set
 	     *	in SGML_character().
 	     */
diff --git a/src/HTML.h b/src/HTML.h
index 5a642367..7cb4adfe 100644
--- a/src/HTML.h
+++ b/src/HTML.h
@@ -26,9 +26,6 @@
 #define TRANSLATE_AND_UNESCAPE_ENTITIES(s, p, h) \
 	LYUCFullyTranslateString(s, ATTR_CS_IN, current_char_set, YES, p, h, st_HTML)
 
-#define TRANSLATE_AND_UNESCAPE_ENTITIES4(s, cs_to, p, h) \
-	LYUCFullyTranslateString(s, ATTR_CS_IN, cs_to, YES, p, h, st_HTML) /* not used */
-
 #define TRANSLATE_AND_UNESCAPE_ENTITIES5(s,cs_from,cs_to,p,h) \
 	LYUCFullyTranslateString(s, cs_from, cs_to, YES, p, h, st_HTML)
 
diff --git a/src/LYCharUtils.c b/src/LYCharUtils.c
index 33e775b0..7055814f 100644
--- a/src/LYCharUtils.c
+++ b/src/LYCharUtils.c
@@ -413,6 +413,11 @@ PUBLIC void LYFillLocalFileURL ARGS2(
 **  The META tag is not written if the display character set (passed as
 **  disp_chndl) already corresponds to the charset assumption that
 **  would be made when the file is read. - KW
+**
+**  Currently this function used for temporary files like "Lynx Info Page"
+**  and for one permanent - bookmarks (so may be a problem if you change
+**  display charset later: new bookmark entries may be wrongly interpreted).
+** 								 - LP
 */
 PUBLIC void LYAddMETAcharsetToFD ARGS2(
 	FILE *, 	fd,
@@ -1513,16 +1518,33 @@ PRIVATE char * UCPutUtf8ToBuffer ARGS3(char *, q, UCode_t, code, BOOL, terminate
 PRIVATE char *hex = "0123456789ABCDEF";
 
 /*
-**  This function translates a string from charset
-**  `cs_from' to charset `cs_to', reallocating it if necessary.
-**  If `do_ent' is YES, it also converts HTML named entities
-**  and numeric character references (NCRs) to their `cs_to'
-**  replacements.
+ *	  Any raw 8-bit or multibyte characters already have been 
+ *	  handled in relation to the display character set        
+ *	  in SGML_character(), including named and numeric entities.
+ *
+**  This function used for translations HTML special fields inside tags
+**  (ALT=, VALUE=, etc.) from charset `cs_from' to charset `cs_to'.
+**  It also unescapes non-ASCII characters from URL (#fragments !)
+**  if st_URL is active.
+**
+**  If `do_ent' is YES, it converts named entities
+**  and numeric character references (NCRs) to their `cs_to' replacements.
+**
+**  Named entities converted to unicodes.  NCRs (unicodes) converted
+**  by UCdomap.c chartrans functions.
+**  ???NCRs with values in the ISO-8859-1 range 160-255 may be converted
+**  to their HTML entity names (via old-style entities) and then translated
+**  according to the LYCharSets.c array for `cs_out'???.
+**
+**  Some characters (see descriptions in `put_special_unicodes' from SGML.c)
+**  translated in relation with the state of boolean variables
+**  `use_lynx_specials', `plain_space' and `hidden'. It is not clear yet:
+**
 **  If plain_space is TRUE, nbsp (160) will be treated as an ASCII
 **  space (32).  If hidden is TRUE, entities will be translated
 **  (if `do_ent' is YES) but escape sequences will be passed unaltered.
 **  If `hidden' is FALSE, some characters are converted to Lynx special
-**  codes (160, 173, .. @@ need list @@) (or ASCII space if `plain_space'
+**  codes (see `put_special_unicodes') or ASCII space if `plain_space'
 **  applies).  @@ is `use_lynx_specials' needed, does it have any effect? @@
 **  If `use_lynx_specials' is YES, translate byte values 160 and 173
 **  meaning U+00A0 and U+00AD given as or converted from raw char input
@@ -1536,15 +1558,6 @@ PRIVATE char *hex = "0123456789ABCDEF";
 **  If `Back' is YES, an attempt is made to use UCReverseTransChar() for
 **  back translation which may be more efficient. (?)
 **
-**  Named entities may be converted to their translations in the
-**  active LYCharSets.c array for `cs_out' or looked up as a Unicode
-**  value which is then passed to the chartrans functions (see UCdomap.c).
-**  @@ order? @@
-**  NCRs with values in the ISO-8859-1 range 160-255 may be converted
-**  to their HTML entity names and then translated according to the
-**  LYCharSets.c array for `cs_out', in general NCRs are translated
-**  by UCdomap.c chartrans functions if necessary.
-**
 **  If `stype' is st_URL, non-ASCII characters are URL-encoded instead.
 **  The sequence of bytes being URL-encoded is the raw input character if
 **  we couldn't translate it from `cs_in' (CJK etc.); otherwise it is the
@@ -1560,8 +1573,11 @@ PRIVATE char *hex = "0123456789ABCDEF";
 **  - dropped		if `stype'  is st_other, otherwise (i.e. st_HTML)
 **  - passed		if `hidden' is TRUE or HTCJK is set, otherwise
 **  - dropped.
-*/
-/*
+**
+**  (If `stype' is st_URL or st_other most of the parameters really predefined:
+**  cs_from=cs_to, use_lynx_specials=plain_space=NO, and hidden=YES)
+**
+**
 **  Returns pointer to the char** passed in
 **		 if string translated or translation unnecessary,
 **	    NULL otherwise
@@ -2204,7 +2220,7 @@ PRIVATE char ** LYUCFullyTranslateString_1 ARGS9(
 	    }
 	    /*
 	    **	Didn't find the entity.
-	    **	Return to screen verbatim.
+	    **	Return verbatim.
 	    */
 	    state = S_recover;
 	    break;
diff --git a/src/chrtrans/README.format b/src/chrtrans/README.format
index 7afc1c68..4ced0a14 100644
--- a/src/chrtrans/README.format
+++ b/src/chrtrans/README.format
@@ -120,4 +120,11 @@ Motivation:
 
 - The format is independent of details of other parts of the Lynx code,
   unlike the "old" LYCharsets.c mechanism.  The tables don't have to
-  be changed in synch when e.g. new entities are added to the HTMLDTD.
+  be changed in synch when e.g. new entities are added to the entities.h.
+
+
+Note: the Default "7bit approximation" table can be used for
+case-insensitive search for non-ascii letters if no upper/lower case
+information provided by other means, e.g. locale.  It is assumed that
+upper/lower case letters have their "7bit approximation" images
+in def7_uni.tbl matched case-insensitively.
diff --git a/src/chrtrans/cp1251_uni.tbl b/src/chrtrans/cp1251_uni.tbl
index 21a44414..e9bb9460 100644
--- a/src/chrtrans/cp1251_uni.tbl
+++ b/src/chrtrans/cp1251_uni.tbl
@@ -21,7 +21,9 @@ OWinCyrillic (cp1251)
 #
 #    The entries are in cp1251_WinCyrillic order
 #
+#
 0x20-0x7f       idem
+#
 0x80    U+0402  #CYRILLIC CAPITAL LETTER DJE
 0x81    U+0403  #CYRILLIC CAPITAL LETTER GJE
 0x82    U+201A  #SINGLE LOW-9 QUOTATION MARK
diff --git a/src/chrtrans/cp437_uni.tbl b/src/chrtrans/cp437_uni.tbl
index ad8d9940..621e730e 100644
--- a/src/chrtrans/cp437_uni.tbl
+++ b/src/chrtrans/cp437_uni.tbl
@@ -27,134 +27,8 @@ ODosLatinUS (cp437)
 # some mapppings of greek letters to latin letters added,
 #  just for fun.. - KW
 #
-0x00	U+0000	#NULL
-0x01	U+0001	#START OF HEADING
-0x02	U+0002	#START OF TEXT
-0x03	U+0003	#END OF TEXT
-0x04	U+0004	#END OF TRANSMISSION
-0x05	U+0005	#ENQUIRY
-0x06	U+0006	#ACKNOWLEDGE
-0x07	U+0007	#BELL
-0x08	U+0008	#BACKSPACE
-0x09	U+0009	#HORIZONTAL TABULATION
-0x0a	U+000a	#LINE FEED
-0x0b	U+000b	#VERTICAL TABULATION
-0x0c	U+000c	#FORM FEED
-0x0d	U+000d	#CARRIAGE RETURN
-0x0e	U+000e	#SHIFT OUT
-0x0f	U+000f	#SHIFT IN
-0x10	U+0010	#DATA LINK ESCAPE
-0x11	U+0011	#DEVICE CONTROL ONE
-0x12	U+0012	#DEVICE CONTROL TWO
-0x13	U+0013	#DEVICE CONTROL THREE
-0x14	U+0014	U+03a0	#DEVICE CONTROL FOUR
-0x15	U+0015	#NEGATIVE ACKNOWLEDGE
-0x16	U+0016	#SYNCHRONOUS IDLE
-0x17	U+0017	#END OF TRANSMISSION BLOCK
-0x18	U+0018	#CANCEL
-0x19	U+0019	#END OF MEDIUM
-0x1a	U+001a	#SUBSTITUTE
-0x1b	U+001b	#ESCAPE
-0x1c	U+001c	#FILE SEPARATOR
-0x1d	U+001d	#GROUP SEPARATOR
-0x1e	U+001e	#RECORD SEPARATOR
-0x1f	U+001f	#UNIT SEPARATOR
-0x20	U+0020	#SPACE
-0x21	U+0021	#EXCLAMATION MARK
-0x22	U+0022	#QUOTATION MARK
-0x23	U+0023	#NUMBER SIGN
-0x24	U+0024	#DOLLAR SIGN
-0x25	U+0025	#PERCENT SIGN
-0x26	U+0026	#AMPERSAND
-0x27	U+0027	#APOSTROPHE
-0x28	U+0028	#LEFT PARENTHESIS
-0x29	U+0029	#RIGHT PARENTHESIS
-0x2a	U+002a	#ASTERISK
-0x2b	U+002b	#PLUS SIGN
-0x2c	U+002c	#COMMA
-0x2d	U+002d	#HYPHEN-MINUS
-0x2e	U+002e	#FULL STOP
-0x2f	U+002f	#SOLIDUS
-0x30	U+0030	#DIGIT ZERO
-0x31	U+0031	#DIGIT ONE
-0x32	U+0032	#DIGIT TWO
-0x33	U+0033	#DIGIT THREE
-0x34	U+0034	#DIGIT FOUR
-0x35	U+0035	#DIGIT FIVE
-0x36	U+0036	#DIGIT SIX
-0x37	U+0037	#DIGIT SEVEN
-0x38	U+0038	#DIGIT EIGHT
-0x39	U+0039	#DIGIT NINE
-0x3a	U+003a	#COLON
-0x3b	U+003b	#SEMICOLON
-0x3c	U+003c	#LESS-THAN SIGN
-0x3d	U+003d	#EQUALS SIGN
-0x3e	U+003e	#GREATER-THAN SIGN
-0x3f	U+003f	#QUESTION MARK
-0x40	U+0040	#COMMERCIAL AT
-0x41	U+0041	U+0391	#LATIN CAPITAL LETTER A
-0x42	U+0042	U+0392	#LATIN CAPITAL LETTER B
-0x43	U+0043	#LATIN CAPITAL LETTER C
-0x44	U+0044	#LATIN CAPITAL LETTER D
-0x45	U+0045	U+0395	#LATIN CAPITAL LETTER E
-0x46	U+0046	#LATIN CAPITAL LETTER F
-0x47	U+0047	#LATIN CAPITAL LETTER G
-0x48	U+0048	U+0397	#LATIN CAPITAL LETTER H
-0x49	U+0049	U+0399	#LATIN CAPITAL LETTER I
-0x4a	U+004a	#LATIN CAPITAL LETTER J
-0x4b	U+004b	U+039a	#LATIN CAPITAL LETTER K
-0x4c	U+004c	#LATIN CAPITAL LETTER L
-0x4d	U+004d	U+039c	#LATIN CAPITAL LETTER M
-0x4e	U+004e	U+039d	#LATIN CAPITAL LETTER N
-0x4f	U+004f	U+039f	#LATIN CAPITAL LETTER O
-0x50	U+0050	U+03a1	#LATIN CAPITAL LETTER P
-0x51	U+0051	#LATIN CAPITAL LETTER Q
-0x52	U+0052	#LATIN CAPITAL LETTER R
-0x53	U+0053	#LATIN CAPITAL LETTER S
-0x54	U+0054	U+03a4	#LATIN CAPITAL LETTER T
-0x55	U+0055	#LATIN CAPITAL LETTER U
-0x56	U+0056	#LATIN CAPITAL LETTER V
-0x57	U+0057	#LATIN CAPITAL LETTER W
-0x58	U+0058	U+03a7	#LATIN CAPITAL LETTER X
-0x59	U+0059	U+03a5	#LATIN CAPITAL LETTER Y
-0x5a	U+005a	U+0396	#LATIN CAPITAL LETTER Z
-0x5b	U+005b	#LEFT SQUARE BRACKET
-0x5c	U+005c	#REVERSE SOLIDUS
-0x5d	U+005d	#RIGHT SQUARE BRACKET
-0x5e	U+005e	#CIRCUMFLEX ACCENT
-0x5f	U+005f	#LOW LINE
-0x60	U+0060	#GRAVE ACCENT
-0x61	U+0061	#LATIN SMALL LETTER A
-0x62	U+0062	#LATIN SMALL LETTER B
-0x63	U+0063	#LATIN SMALL LETTER C
-0x64	U+0064	#LATIN SMALL LETTER D
-0x65	U+0065	#LATIN SMALL LETTER E
-0x66	U+0066	#LATIN SMALL LETTER F
-0x67	U+0067	U+03b3	#LATIN SMALL LETTER G
-0x68	U+0068	U+03b7	#LATIN SMALL LETTER H
-0x69	U+0069	U+03b9	#LATIN SMALL LETTER I
-0x6a	U+006a	#LATIN SMALL LETTER J
-0x6b	U+006b	U+03ba	#LATIN SMALL LETTER K
-0x6c	U+006c	U+03bb	#LATIN SMALL LETTER L
-0x6d	U+006d	#LATIN SMALL LETTER M
-0x6e	U+006e	#LATIN SMALL LETTER N
-0x6f	U+006f	U+03bf	#LATIN SMALL LETTER O
-0x70	U+0070	U+03c1	#LATIN SMALL LETTER P
-0x71	U+0071	#LATIN SMALL LETTER Q
-0x72	U+0072	#LATIN SMALL LETTER R
-0x73	U+0073	U+03c2	#LATIN SMALL LETTER S
-0x74	U+0074	#LATIN SMALL LETTER T
-0x75	U+0075	U+03c5	#LATIN SMALL LETTER U
-0x76	U+0076	U+03bd	#LATIN SMALL LETTER V
-0x77	U+0077	U+03c9	#LATIN SMALL LETTER W
-0x78	U+0078	U+03c7	#LATIN SMALL LETTER X
-0x79	U+0079	#LATIN SMALL LETTER Y
-0x7a	U+007a	U+03b6	#LATIN SMALL LETTER Z
-0x7b	U+007b	#LEFT CURLY BRACKET
-0x7c	U+007c	#VERTICAL LINE
-0x7d	U+007d	#RIGHT CURLY BRACKET
-0x7e	U+007e	#TILDE
-0x7f	U+007f	#DELETE
+0x20-0x7f	idem
+#
 0x80	U+00c7	#LATIN CAPITAL LETTER C WITH CEDILLA
 0x81	U+00fc	U+03cb	#LATIN SMALL LETTER U WITH DIAERESIS
 0x82	U+00e9	#LATIN SMALL LETTER E WITH ACUTE
diff --git a/src/chrtrans/cp850_uni.tbl b/src/chrtrans/cp850_uni.tbl
index 8a191fb7..05685971 100644
--- a/src/chrtrans/cp850_uni.tbl
+++ b/src/chrtrans/cp850_uni.tbl
@@ -4,7 +4,7 @@
 #but there has to be exactly one table marked as "default".
 D0
 #
-#The MIME name of this charset. 
+#The MIME name of this charset.
 Mcp850
 
 #Name as a Display Charset (used on Options screen)
@@ -27,38 +27,38 @@ ODosLatin1 (cp850)
 #
 #    The entries are in cp850_DOSLatin1 order
 #
-0x00	U+0000	#NULL
-0x01	U+0001	#START OF HEADING
-0x02	U+0002	#START OF TEXT
-0x03	U+0003	#END OF TEXT
-0x04	U+0004	#END OF TRANSMISSION
-0x05	U+0005	#ENQUIRY
-0x06	U+0006	#ACKNOWLEDGE
-0x07	U+0007	#BELL
-0x08	U+0008	#BACKSPACE
-0x09	U+0009	#HORIZONTAL TABULATION
-0x0a	U+000a	#LINE FEED
-0x0b	U+000b	#VERTICAL TABULATION
-0x0c	U+000c	#FORM FEED
-0x0d	U+000d	#CARRIAGE RETURN
-0x0e	U+000e	#SHIFT OUT
-0x0f	U+000f	#SHIFT IN
-0x10	U+0010	#DATA LINK ESCAPE
-0x11	U+0011	#DEVICE CONTROL ONE
-0x12	U+0012	#DEVICE CONTROL TWO
-0x13	U+0013	#DEVICE CONTROL THREE
-0x14	U+0014	#DEVICE CONTROL FOUR
-0x15	U+0015	#NEGATIVE ACKNOWLEDGE
-0x16	U+0016	#SYNCHRONOUS IDLE
-0x17	U+0017	#END OF TRANSMISSION BLOCK
-0x18	U+0018	#CANCEL
-0x19	U+0019	#END OF MEDIUM
-0x1a	U+001a	#SUBSTITUTE
-0x1b	U+001b	#ESCAPE
-0x1c	U+001c	#FILE SEPARATOR
-0x1d	U+001d	#GROUP SEPARATOR
-0x1e	U+001e	#RECORD SEPARATOR
-0x1f	U+001f	#UNIT SEPARATOR
+#0x00	U+0000	#NULL
+#0x01	U+0001	#START OF HEADING
+#0x02	U+0002	#START OF TEXT
+#0x03	U+0003	#END OF TEXT
+#0x04	U+0004	#END OF TRANSMISSION
+#0x05	U+0005	#ENQUIRY
+#0x06	U+0006	#ACKNOWLEDGE
+#0x07	U+0007	#BELL
+#0x08	U+0008	#BACKSPACE
+#0x09	U+0009	#HORIZONTAL TABULATION
+#0x0a	U+000a	#LINE FEED
+#0x0b	U+000b	#VERTICAL TABULATION
+#0x0c	U+000c	#FORM FEED
+#0x0d	U+000d	#CARRIAGE RETURN
+#0x0e	U+000e	#SHIFT OUT
+#0x0f	U+000f	#SHIFT IN
+#0x10	U+0010	#DATA LINK ESCAPE
+#0x11	U+0011	#DEVICE CONTROL ONE
+#0x12	U+0012	#DEVICE CONTROL TWO
+#0x13	U+0013	#DEVICE CONTROL THREE
+#0x14	U+0014	#DEVICE CONTROL FOUR
+#0x15	U+0015	#NEGATIVE ACKNOWLEDGE
+#0x16	U+0016	#SYNCHRONOUS IDLE
+#0x17	U+0017	#END OF TRANSMISSION BLOCK
+#0x18	U+0018	#CANCEL
+#0x19	U+0019	#END OF MEDIUM
+#0x1a	U+001a	#SUBSTITUTE
+#0x1b	U+001b	#ESCAPE
+#0x1c	U+001c	#FILE SEPARATOR
+#0x1d	U+001d	#GROUP SEPARATOR
+#0x1e	U+001e	#RECORD SEPARATOR
+#0x1f	U+001f	#UNIT SEPARATOR
 0x20	U+0020	#SPACE
 0x21	U+0021	#EXCLAMATION MARK
 0x22	U+0022	#QUOTATION MARK
diff --git a/src/chrtrans/def7_uni.tbl b/src/chrtrans/def7_uni.tbl
index 0c86d234..66a63f76 100644
--- a/src/chrtrans/def7_uni.tbl
+++ b/src/chrtrans/def7_uni.tbl
@@ -92,6 +92,7 @@ U+00fc:u:
 0x79	U+00fd
 U+00fe:th
 0x79	U+00ff
+# end of latin-1 repertoire
 0x41	U+0100	U+0102	U+0104			# A
 0x61	U+0101	U+0103	U+0105			# a
 0x43	U+0106	U+0108	U+010a	U+010c		# C
@@ -243,7 +244,8 @@ U+0217:u)
 #   Linkname: FAQ: Representing IPA Phonetics in ASCII
 #        URL: http://www.hpl.hp.com/personal/Evan_Kirshenbaum/IPA/faq.html
 #        (corrected in Russian Cyrillic area).
-# 
+#        (corrected in Greek area).
+#
 0x41	U+0251 #	LATIN SMALL LETTER SCRIPT A	-> A
 U+0252:A.
 U+0253:b`
@@ -344,6 +346,7 @@ U+037a:j3
 U+037e:?%
 U+0384:'*
 U+0385:'%
+# Greek letters
 U+0386:A%
 U+0387:.*
 U+0388:E%
@@ -355,69 +358,70 @@ U+038f:W%
 U+0390:i3
 U+0391:A
 U+0392:B
-U+0393:G*
-U+0394:D*
+U+0393:G
+U+0394:D
 U+0395:E
 U+0396:Z
-U+0397:Y*
-U+0398:H*
+U+0397:Y
+U+0398:TH
 U+0399:I
 U+039a:K
-U+039b:L*
+U+039b:L
 U+039c:M
 U+039d:N
-U+039e:C*
+U+039e:C
 U+039f:O
-U+03a0:P*
-U+03a1:R*
-U+03a3:S*
+U+03a0:P
+U+03a1:R
+U+03a3:S
 U+03a4:T
-U+03a5:U*
-U+03a6:F*
-U+03a7:X*
-U+03a8:Q*
-U+03a9:W*
-U+03aa:J*
+U+03a5:U
+U+03a6:F
+U+03a7:X
+U+03a8:Q
+U+03a9:W
+U+03aa:J
 U+03ab:V*
 U+03ac:a%
 U+03ad:e%
 U+03ae:y%
 U+03af:i%
 U+03b0:u3
-U+03b1:a*
-U+03b2:b*
-U+03b3:g*
-U+03b4:d*
-U+03b5:e*
-U+03b6:z*
-U+03b7:y*
-U+03b8:h*
-U+03b9:i*
-U+03ba:k*
-U+03bb:l*
-U+03bc:m*
-U+03bd:n*
-U+03be:c*
+U+03b1:a
+U+03b2:b
+U+03b3:g
+U+03b4:d
+U+03b5:e
+U+03b6:z
+U+03b7:y
+U+03b8:th
+U+03b9:i
+U+03ba:k
+U+03bb:l
+U+03bc:m
+U+03bd:n
+U+03be:c
 U+03bf:o
-U+03c0:p*
-U+03c1:r*
+U+03c0:p
+U+03c1:r
 U+03c2:*s
-U+03c3:s*
-U+03c4:t*
-U+03c5:u*
-U+03c6:f*
-U+03c7:x*
-U+03c8:q*
-U+03c9:w*
-U+03ca:j*
+U+03c3:s
+U+03c4:t
+U+03c5:u
+U+03c6:f
+U+03c7:x
+U+03c8:q
+U+03c9:w
+U+03ca:j
 U+03cb:v*
 U+03cc:o%
 U+03cd:u%
 U+03ce:w%
-U+03d0:b3
+# Greek symbols
+U+03d0:beta 
 U+03d1:theta 
-U+03d2:upsi
-U+03d5:phi
+U+03d2:upsi 
+U+03d5:phi 
 U+03d6:pi 
 U+03da:T3
 U+03db:t3
@@ -427,7 +431,7 @@ U+03de:K3
 U+03df:k3
 U+03e0:P3
 U+03e1:p3
-U+03f0:kappa
+U+03f0:kappa 
 U+03f1:rho 
 U+03f4:'%
 U+03f5:j3
@@ -1276,6 +1280,7 @@ U+1fdf:?;
 U+1fed:!:
 U+1fef:!*
 U+1ffe:;;
+# General punctuation:
 0x20	U+2000 U+2002	U+2004-U+2009	# spaces
 U+2001:  
 U+2003:  
@@ -1315,6 +1320,7 @@ U+203c:!!
 U+203e:'-
 0x2d   U+2043  # HYPHEN BULLET ?
 U+2044:/
+# end of General punctuation.
 U+2070:^0
 U+2074:^4
 U+2075:^5