snapshot of project "lynx", label v2-8-1dev_13

author: Thomas E. Dickey <dickey@invisible-island.net> 1998-05-24 00:20:00 -0400
committer: Thomas E. Dickey <dickey@invisible-island.net> 1998-05-24 00:20:00 -0400
commit: 6e75abc094af51d02563fd5250d1b55e9bb96912 (patch)
tree: 467ddc6c66cc643b960c48f70c93907c1bc775aa /WWW
parent: becd7d203aa4cc62c6e4113a496c88f6eab9edaf (diff)
download: lynx-snapshots-6e75abc094af51d02563fd5250d1b55e9bb96912.tar.gz
3 files changed, 122 insertions, 118 deletions
diff --git a/WWW/Library/Implementation/HTAAProt.c b/WWW/Library/Implementation/HTAAProt.c
index 12c6361a..0c8dac86 100644
--- a/WWW/Library/Implementation/HTAAProt.c
+++ b/WWW/Library/Implementation/HTAAProt.c
@@ -179,7 +179,7 @@ PUBLIC int HTAA_getGid NOARGS
 		CTRACE(tfp, "%s(%s) returned (%s:%s:%d:...)\n",
 			    "HTAA_getGid: getgrgid",
 			    current_prot->gid_name,
-			    gr->gr_name, (int) gr->gr_passwd, (int) gr->gr_gid);
+			    gr->gr_name, gr->gr_passwd, (int) gr->gr_gid);
 #endif
 		return gr->gr_gid;
 	    }
@@ -190,7 +190,7 @@ PUBLIC int HTAA_getGid NOARGS
 		CTRACE(tfp, "%s(\"%s\") returned (%s:%s:%d:...)\n",
 			    "HTAA_getGid: getgrnam",
 			    current_prot->gid_name,
-			    gr->gr_name, (int) gr->gr_passwd, (int) gr->gr_gid);
+			    gr->gr_name, gr->gr_passwd, (int) gr->gr_gid);
 #endif
 		return gr->gr_gid;
 	    }
diff --git a/WWW/Library/Implementation/HTPlain.c b/WWW/Library/Implementation/HTPlain.c
index df6cba9a..5caa4550 100644
--- a/WWW/Library/Implementation/HTPlain.c
+++ b/WWW/Library/Implementation/HTPlain.c
@@ -33,7 +33,6 @@
 
 #define FREE(x) if (x) {free(x); x = NULL;}
 
-extern BOOLEAN LYRawMode;
 extern BOOL HTPassEightBitRaw;
 extern BOOL HTPassHighCtrlRaw;
 extern HTCJKlang HTCJK;
@@ -54,6 +53,7 @@ struct _HTStream {
     /*
     **	The node_anchor UCInfo and handle for the output (HTEXT) stage. - FM
     */
+    LYUCcharset	*		outUCI;
     int outUCLYhndl;
     /*
     **	Counter, value, buffer and pointer for UTF-8 handling. - FM
@@ -91,6 +91,7 @@ PRIVATE void HTPlain_getChartransInfo ARGS2(
 	me->outUCLYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT);
     }
     me->inUCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_PARSER);
+    me->outUCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_HTEXT);
 }
 
 /*	Write the buffer out to the socket
@@ -136,14 +137,14 @@ PRIVATE void HTPlain_put_character ARGS2(
     HTPlain_lastraw = c;
     if (c == '\r') {
 	HText_appendCharacter(me->text, '\n');
-    } else if (HTCJK != NOCJK) {
-	HText_appendCharacter(me->text, c);
     } else if ((unsigned char)c >= 127) {
 	/*
 	**  For now, don't repeat everything here
 	**  that has been done below - KW
 	*/
 	HTPlain_write(me, &c, 1);
+    } else if (HTCJK != NOCJK) {
+	HText_appendCharacter(me->text, c);
     } else if ((unsigned char)c >= 127 && (unsigned char)c < 161 &&
 	       HTPassHighCtrlRaw) {
 	HText_appendCharacter(me->text, c);
@@ -156,9 +157,9 @@ PRIVATE void HTPlain_put_character ARGS2(
 	HText_appendCharacter(me->text, c);
     } else if ((unsigned char)c > 160) {
 	if (!HTPassEightBitRaw &&
-	    current_char_set != 0) {
-	    size_t len, high, low, i;
-	    int diff = 1;
+	    !((me->outUCLYhndl == 0) ||
+	      (me->outUCI->enc & (UCT_CP_SUPERSETOF_LAT1)))) {
+	    int len, high, low, i, diff = 1;
 	    CONST char * name;
 	    UCode_t value = (UCode_t)((unsigned char)c - 160);
 
@@ -172,7 +173,7 @@ PRIVATE void HTPlain_put_character ARGS2(
 		diff = strncmp(HTML_dtd.entity_names[i], name, len);
 		if (diff == 0) {
 		    HText_appendText(me->text,
-				     LYCharSets[current_char_set][i]);
+				     LYCharSets[me->outUCLYhndl][i]);
 		    break;
 		}
 	    }
@@ -218,8 +219,8 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
     char c;
     unsigned char c_unsign;
     BOOL chk;
-    UCode_t code;
-    long uck = 0;
+    UCode_t code, uck;
+    char saved_char_in = '\0';
 
     for (p = s; p < e; p++) {
 #ifdef REMOVE_CR_ONLY
@@ -252,6 +253,7 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 	c = *p;
 	c_unsign = (unsigned char)c;
 	code = (UCode_t)c_unsign;
+	saved_char_in = '\0';
 	/*
 	**  Combine any UTF-8 multibytes into Unicode
 	**  to check for special characters. - FM
@@ -282,8 +284,9 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 			*/
 			*(me->utf_buf_p) = '\0';
 			code = me->utf_char;
-			if (code < 256) {
+			if (code > 0 && code < 256) {
 			    c = FROMASCII((char)code);
+			    c_unsign = (unsigned char)c;
 			}
 		    } else {
 			/*
@@ -295,9 +298,8 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 		    /*
 		    **	Start handling a new multibyte character. - FM
 		    */
-		    me->utf_buf_p = me->utf_buf;
 		    me->utf_buf_p[0] = c;
-		    (me->utf_buf_p)++;
+		    me->utf_buf_p = &me->utf_buf[1];
 		    if ((*p & 0xe0) == 0xc0) {
 			me->utf_count = 1;
 			me->utf_char = (c & 0x1f);
@@ -318,24 +320,43 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 			 *  We got garbage, so ignore it. - FM
 			 */
 			me->utf_count = 0;
-			me->utf_buf_p = me->utf_buf;
 			me->utf_buf_p[0] = '\0';
+			me->utf_buf_p = me->utf_buf;
 		    }
 		    /*
 		    **	Get the next byte. - FM
 		    */
 		    continue;
 		}
-	    } else {
+	    } else if (me->utf_count > 0) {
 		/*
-		**  Got an ASCII character.
+		**  Got an ASCII character when expecting
+		**  UTF-8 multibytes, so ignore the buffered
+		**  multibye characters and fall through with
+		**  the current ASCII character. - FM
 		*/
 		me->utf_count = 0;
 		me->utf_buf[0] = '\0';
 		me->utf_buf_p = me->utf_buf;
+		code = (UCode_t)c_unsign;
+	    } else {
+		/*
+		**  Got a valid ASCII character, so fall
+		**  through with it. - FM
+		*/
+		code = (UCode_t)c_unsign;
 	    }
 	}
-
+	/*
+	**  Convert characters from non-UTF-8 charsets
+	**  to Unicode (if appropriate). - FM
+	*/
+	if (!(me->T.decode_utf8 &&
+	      (unsigned char)(*p) > 127)) {
+#ifdef NOTDEFINED
+	    if (me->T.strip_raw_char_in)
+		saved_char_in = c;
+#endif /* NOTDEFINED */
 	if (me->T.trans_to_uni &&
 	    (code >= LYlowest_eightbit[me->inUCLYhndl] ||
 	     (code < 32 && code != 0 &&
@@ -345,10 +366,58 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 		*/
 	    code = (UCode_t)UCTransToUni(c, me->inUCLYhndl);
 	    if (code > 0) {
+		    saved_char_in = c;
 		if (code < 256) {
-		    c = FROMASCII((char)code);
+			c = FROMASCII((char)code);
+			c_unsign = (unsigned char)c;
 		}
 	    }
+	    } else if (code < 32 && code != 0 &&
+		       me->T.trans_C0_to_uni) {
+		/*
+		**  Quote from SGML.c:
+		**  	"This else if may be too ugly to keep. - KW"
+		*/
+		if (me->T.trans_from_uni &&
+		    (((code = UCTransToUni(c, me->inUCLYhndl)) >= 32) ||
+		     (me->T.transp &&
+		      (code = UCTransToUni(c, me->inUCLYhndl)) > 0))) {
+		    saved_char_in = c;
+		    if (code < 256) {
+			c = FROMASCII((char)code);
+			c_unsign = (unsigned char)c;
+		    }
+		} else {
+		    uck = -1;
+		    if (me->T.transp) {
+			uck = UCTransCharStr(replace_buf, 60, c,
+					     me->inUCLYhndl,
+					     me->inUCLYhndl, NO);
+		    }
+		    if (!me->T.transp || uck < 0) {
+			uck = UCTransCharStr(replace_buf, 60, c,
+					     me->inUCLYhndl,
+					     me->outUCLYhndl, YES);
+		    }
+		    if (uck == 0) {
+			continue;
+		    } else if (uck < 0) {
+			me->utf_buf[0] = '\0';
+			code = (unsigned char)c;
+		    } else {
+			c = replace_buf[0];
+			if (c && replace_buf[1]) {
+			    HText_appendText(me->text, replace_buf);
+			    continue;
+			}
+		    }
+		    me->utf_buf[0] = '\0';
+		    code = (unsigned char)c;
+		} /*  Next line end of ugly stuff for C0. - KW */
+	    } else {
+		me->utf_buf[0] = '\0';
+		code = (unsigned char)c;
+	    }
 	}
 	/*
 	**  At this point we have either code in Unicode
@@ -399,17 +468,16 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 		    c >= LYlowest_eightbit[me->outUCLYhndl]) ||
 		   *p == '\n' || *p == '\t') {
 	    HText_appendCharacter(me->text, c);
-
-	} else if (me->T.use_raw_char_in) {
-	    HText_appendCharacter(me->text, *p);
-#ifdef NOTDEFINED
 	/*
 	**  Use an ASCII space (32) for ensp, emsp or thinsp. - FM
 	*/
 	} else if (code == 8194 || code == 8195 || code == 8201) {
 	    HText_appendCharacter(me->text, ' ');
-#endif /* NOTDEFINED */
-
+	/*
+	**  If we want the raw character, pass it now. - FM
+	*/
+	} else if (me->T.use_raw_char_in && saved_char_in) {
+	    HText_appendCharacter(me->text, saved_char_in);
 /******************************************************************
  *   I. LATIN-1 OR UCS2  TO  DISPLAY CHARSET
  ******************************************************************/
@@ -460,23 +528,29 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 	    **	(somewhat) readable ASCII.
 	    */
 	    HText_appendCharacter(me->text, (char)(*p & 0x7f));
+#endif /* NOTDEFINED */
+	   /*
+	   **  If we don't actually want the character,
+	   **  make it safe and output that now. - FM
+	   */
+	   } else if ((c_unsign > 0 &&
+		      c_unsign < LYlowest_eightbit[me->outUCLYhndl]) ||
+		      (me->T.trans_from_uni && !HTPassEightBitRaw)) {
 	    /*
 	    **	If we do not have the "7-bit approximations" as our
 	    **	output character set (in which case we did it already)
 	    **	seek a translation for that.  Otherwise, or if the
 	    **	translation fails, use UHHH notation. - FM
 	    */
-	} else if (chk &&
-		   (chk = (!HTPassEightBitRaw &&
-			   (me->outUCLYhndl !=
-			    UCGetLYhndl_byMIME("us-ascii")))) &&
+	    if ((chk = (me->outUCLYhndl !=
+			UCGetLYhndl_byMIME("us-ascii"))) &&
 		   (uck = UCTransUniChar(code,
 					 UCGetLYhndl_byMIME("us-ascii")))
 				      >= 32 && uck < 127) {
 		/*
 		**  Got an ASCII character (yippey). - FM
 		*/
-	    c = ((char)(uck & 0xff));
+	    c = FROMASCII((char)uck);
 	    HText_appendCharacter(me->text, c);
 	} else if ((chk && uck == -4) &&
 		       (uck = UCTransUniCharStr(replace_buf,
@@ -486,104 +560,33 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 		/*
 		**  Got a repacement string (yippey). - FM
 		*/
-	    HText_appendText(me->text, replace_buf);
-	} else if (code == 8204 || code == 8205) {
-	    /*
-	    **	Ignore 8204 (zwnj) or 8205 (zwj), if we get to here. - FM
-	    */
-	    CTRACE(tfp, "HTPlain_write: Ignoring '%ld'.\n", code);
-	} else if (code == 8206 || code == 8207) {
-	    /*
-	    **	Ignore 8206 (lrm) or 8207 (rlm), if we get to here. - FM
-	    */
-	    CTRACE(tfp, "HTPlain_write: Ignoring '%ld'.\n", code);
-#endif /* NOTDEFINED */
-	} else if (me->T.trans_from_uni && code > 255) {
-	    if (PASSHI8BIT && PASSHICTRL && LYRawMode &&
-		(unsigned char)*p >= LYlowest_eightbit[me->outUCLYhndl]) {
-		HText_appendCharacter(me->text, *p);
-	    } else {
-		sprintf(replace_buf, "U%.2lX", code);
 		HText_appendText(me->text, replace_buf);
-	    }
-	/*
-	**  If we get to here and HTPassEightBitRaw or the
-	**  selected character set is not "ISO Latin 1",
-	**  use the translation tables for 161-255 8-bit
-	**  characters (173 was handled above). - FM
-	*/
-	} else if (code > 160) {
-	    if (!HTPassEightBitRaw && code <= 255 &&
-		me->outUCLYhndl != 0) {
+	    } else if (code == 8204 || code == 8205) {
+		/*
+		**	Ignore 8204 (zwnj) or 8205 (zwj), if we get to here. - FM
+		*/
+		CTRACE(tfp, "HTPlain_write: Ignoring '%ld'.\n", code);
+	    } else if (code == 8206 || code == 8207) {
+		/*
+		**	Ignore 8206 (lrm) or 8207 (rlm), if we get to here. - FM
+		*/
+		CTRACE(tfp, "HTPlain_write: Ignoring '%ld'.\n", code);
+	    } else {
 		/*
 		**  Out of luck, so use the UHHH notation (ugh). - FM
 		*/
-		size_t len, high, low, i;
-		int diff = 1;
-		CONST char * name;
-		int value = (int)(code - 160);
-
-		name = HTMLGetEntityName(value);
-		len =  strlen(name);
-		for(low = 0, high = HTML_dtd.number_of_entities;
-		    high > low;
-		    diff < 0 ? (low = i+1) : (high = i)) {
-		    /* Binary search */
-		    i = (low + (high-low)/2);
-		    diff = strncmp(HTML_dtd.entity_names[i], name, len);
-		    if (diff == 0) {
-			HText_appendText(me->text,
-					 LYCharSets[me->outUCLYhndl][i]);
-			break;
-		    }
-		}
-		if (diff) {
-		    /*
-		    **	Something went wrong in the translation, so
-		    **	either output as UTF8 or a hex representation or
-		    **	pass the raw character and hope it's OK.
-		    */
-		    if (!PASSHI8BIT)
-			c = FROMASCII((char)code);
-		    if (me->T.output_utf8 &&
-			*me->utf_buf) {
-			HText_appendText(me->text, me->utf_buf);
-			me->utf_buf_p = me->utf_buf;
-			*(me->utf_buf_p) = '\0';
-
-		    } else if (me->T.trans_from_uni) {
+			/* do not print UHHH for now
 			sprintf(replace_buf, "U%.2lX", code);
 			HText_appendText(me->text, replace_buf);
-		    } else
-			HText_appendCharacter(me->text, c);
+			*/
 		}
-	    } else {
 		/*
-		**  Didn't attempt a translation. - FM
+		**  If we get to here and have a monobyte character,
+		**  pass it. - FM
 		*/
-		/*  Either output as UTF8 or a hex representation or
-		**  pass the raw character and hope it's OK.
-		*/
-		if (code <= 255 && !PASSHI8BIT)
-		    c = FROMASCII((char)code);
-		if (code > 127 && me->T.output_utf8 && *me->utf_buf) {
-		    HText_appendText(me->text, me->utf_buf);
-		    me->utf_buf_p = me->utf_buf;
-		    *(me->utf_buf_p) = '\0';
-
-		} else if (LYRawMode &&
-			   me->inUCLYhndl != me->outUCLYhndl &&
-			   (PASSHI8BIT || PASSHICTRL) &&
-			   (unsigned char)c >=
-				     LYlowest_eightbit[me->outUCLYhndl]) {
-		    HText_appendCharacter(me->text, c);
-		} else if (me->T.trans_from_uni && code >= 127) {
-		    sprintf(replace_buf, "U%.2lX", code);
-		    HText_appendText(me->text, replace_buf);
-		} else
+	} else if (c_unsign > 0 && c_unsign < 256) {
 		HText_appendCharacter(me->text, c);
 	    }
-	}
 #endif /* REMOVE_CR_ONLY */
     }
 }
diff --git a/WWW/Library/Implementation/SGML.c b/WWW/Library/Implementation/SGML.c
index 32243519..d2a84397 100644
--- a/WWW/Library/Implementation/SGML.c
+++ b/WWW/Library/Implementation/SGML.c
@@ -329,7 +329,8 @@ PRIVATE void handle_attribute_value ARGS2(
 **
 **  Additional issue, like handling bidirectional text if necessary
 **  may be called from here:  zwnj (8204), zwj (8205), lrm (8206), rlm (8207)
-**  - currently they are passed to def7_uni.tbl as regular characters.
+**  - currently they are ignored in SGML.c and LYCharUtils.c
+**  but also in UCdomap.c because they are non printable...
 **
 */
 PRIVATE BOOL put_special_unicodes ARGS2(
author	Thomas E. Dickey <dickey@invisible-island.net>	1998-05-24 00:20:00 -0400
committer	Thomas E. Dickey <dickey@invisible-island.net>	1998-05-24 00:20:00 -0400
commit	6e75abc094af51d02563fd5250d1b55e9bb96912 (patch)
tree	467ddc6c66cc643b960c48f70c93907c1bc775aa /WWW
parent	becd7d203aa4cc62c6e4113a496c88f6eab9edaf (diff)
download	lynx-snapshots-6e75abc094af51d02563fd5250d1b55e9bb96912.tar.gz