about summary refs log tree commit diff stats
path: root/WWW/Library/Implementation/SGML.c
diff options
context:
space:
mode:
Diffstat (limited to 'WWW/Library/Implementation/SGML.c')
-rw-r--r--WWW/Library/Implementation/SGML.c428
1 files changed, 182 insertions, 246 deletions
diff --git a/WWW/Library/Implementation/SGML.c b/WWW/Library/Implementation/SGML.c
index 91c1b00d..7f6324a0 100644
--- a/WWW/Library/Implementation/SGML.c
+++ b/WWW/Library/Implementation/SGML.c
@@ -4,13 +4,12 @@
 **	This module implements an HTStream object. To parse an
 **	SGML file, create this object which is a parser. The object
 **	is (currently) created by being passed a DTD structure,
-**	and a target HTStructured oject at which to throw the parsed stuff.
+**	and a target HTStructured object at which to throw the parsed stuff.
 **
-**	 6 Feb 93  Binary seraches used. Intreface modified.
+**	 6 Feb 93  Binary searches used. Interface modified.
 */
 
 #include <HTUtils.h>
-#include <tcp.h>		/* For FROMASCII */
 
 /* Remove the following to disable the experimental HTML DTD parsing.
    Currently only used in this source file. - kw */
@@ -26,24 +25,21 @@
 #include <UCDefs.h>
 #include <UCAux.h>
 
-#include <ctype.h>
-/*#include <stdio.h> included in HTUtils.h -- FM */
 #include <HTChunk.h>
 
 #include <LYCharSets.h>
+#include <LYStrings.h>
 #include <LYLeaks.h>
 
 #define INVALID (-1)
 
-#define FREE(x) if (x) {free(x); x = NULL;}
-
 PUBLIC HTCJKlang HTCJK = NOCJK; 	/* CJK enum value.		*/
 PUBLIC BOOL HTPassEightBitRaw = FALSE;	/* Pass 161-172,174-255 raw.	*/
 PUBLIC BOOL HTPassEightBitNum = FALSE;	/* Pass ^ numeric entities raw. */
 PUBLIC BOOL HTPassHighCtrlRaw = FALSE;	/* Pass 127-160,173,&#127; raw. */
 PUBLIC BOOL HTPassHighCtrlNum = FALSE;	/* Pass &#128;-&#159; raw.	*/
 
-extern int LYlowest_eightbit[];
+/*  extern int LYlowest_eightbit[];  for completeness here  */
 
 /*	The State (context) of the parser
 **
@@ -56,7 +52,7 @@ extern int LYlowest_eightbit[];
 
 /*		Element Stack
 **		-------------
-**	This allows us to return down the stack reselcting styles.
+**	This allows us to return down the stack reselecting styles.
 **	As we return, attribute values will be garbage in general.
 */
 typedef struct _HTElement HTElement;
@@ -74,7 +70,7 @@ struct _HTStream {
     CONST HTStreamClass *	isa;		/* inherited from HTStream */
 
     CONST SGML_dtd		*dtd;
-    HTStructuredClass		*actions;	/* target class  */
+    CONST HTStructuredClass	*actions;	/* target class  */
     HTStructured		*target;	/* target object */
 
     HTTag			*current_tag;
@@ -200,7 +196,7 @@ PRIVATE void set_chartrans_handling ARGS3(
 	       context->T.trans_from_uni) {
 	context->current_tag_charset = UCGetLYhndl_byMIME("utf-8");
     } else {
-	context->current_tag_charset = 0;
+	context->current_tag_charset = LATIN1;
     }
 }
 
@@ -282,17 +278,15 @@ PRIVATE void handle_attribute_name ARGS2(
 	    FREE(context->value[i]);
 #ifdef USE_COLOR_STYLE
 	    current_is_class=(!strcasecomp("class", s));
-	    if (TRACE)
-		fprintf(stderr, "SGML: found attribute %s, %d\n", s, current_is_class);
+	    CTRACE(tfp, "SGML: found attribute %s, %d\n", s, current_is_class);
 #endif
 	    return;
 	} /* if */
 
     } /* for */
 
-    if (TRACE)
-	fprintf(stderr, "SGML: Unknown attribute %s for tag %s\n",
-	    s, context->current_tag->name);
+    CTRACE(tfp, "SGML: Unknown attribute %s for tag %s\n",
+		s, context->current_tag->name);
     context->current_attribute_number = INVALID;	/* Invalid */
 }
 
@@ -310,18 +304,15 @@ PRIVATE void handle_attribute_value ARGS2(
 	if (current_is_class)
 	{
 	    strncpy (class_string, s, TEMPSTRINGSIZE);
-	    if (TRACE)
-		fprintf(stderr, "SGML: class is '%s'\n", s);
+	    CTRACE(tfp, "SGML: class is '%s'\n", s);
 	}
 	else
 	{
-	    if (TRACE)
-		fprintf(stderr, "SGML: attribute value is '%s'\n", s);
+	    CTRACE(tfp, "SGML: attribute value is '%s'\n", s);
 	}
 #endif
     } else {
-	if (TRACE)
-	    fprintf(stderr, "SGML: Attribute value %s ignored\n", s);
+	CTRACE(tfp, "SGML: Attribute value %s ignored\n", s);
     }
     context->current_attribute_number = INVALID; /* can't have two assignments! */
 }
@@ -331,9 +322,10 @@ PRIVATE void handle_attribute_value ARGS2(
 **  Translate some Unicodes to Lynx special codes and output them.
 **  Special codes - ones those output depend on parsing.
 **
-**  Additional issue, like handling bidirectional text if nesseccery
+**  Additional issue, like handling bidirectional text if necessary
 **  may be called from here:  zwnj (8204), zwj (8205), lrm (8206), rlm (8207)
-**  - currently they are passed to def7_uni.tbl as regular characters.
+**  - currently they are ignored in SGML.c and LYCharUtils.c
+**  but also in UCdomap.c because they are non printable...
 **
 */
 PRIVATE BOOL put_special_unicodes ARGS2(
@@ -361,10 +353,7 @@ PRIVATE BOOL put_special_unicodes ARGS2(
 	**  in the context of line wrapping.  Unfortunately, if we use
 	**  HT_EM_SPACE we override the chartrans tables for those spaces
 	**  (e.g., emsp= double space) with a single '32' for all (but do line
-	**  wrapping more fancy).  In the future we need HT_SPACE with a
-	**  transferred parameter (Unicode number) which falls back to
-	**  chartrans if line wrapping is not the case.
-	**
+	**  wrapping more fancy).  So we probably need HT_EN_SPACE etc...
 	*/
 	PUTC(HT_EM_SPACE);
 #ifdef NOTUSED_FOTEMODS
@@ -403,11 +392,6 @@ PRIVATE BOOL put_special_unicodes ARGS2(
 PRIVATE char replace_buf [64];	      /* buffer for replacement strings */
 PRIVATE BOOL FoundEntity = FALSE;
 
-#define IncludesLatin1Enc \
-		(context->outUCLYhndl == 0 || \
-		 (context->outUCI && \
-		  (context->outUCI->enc & (UCT_CP_SUPERSETOF_LAT1))))
-
 PRIVATE void handle_entity ARGS2(
 	HTStream *,	context,
 	char,		term)
@@ -416,9 +400,6 @@ PRIVATE void handle_entity ARGS2(
     long uck;
     CONST char *p;
     CONST char *s = context->string->data;
-#ifdef NOTUSED_FOTEMODS
-    int high, low, i, diff;
-#endif
 
 
     /*
@@ -472,89 +453,32 @@ PRIVATE void handle_entity ARGS2(
 	    FoundEntity = TRUE;
 	    return;
 	}
-#ifdef NOTUSED_FOTEMODS
 	/*
-	**  If the value is greater than 255 and we do not
-	**  have the "7-bit approximations" as our output
-	**  character set (in which case we did it already)
-	**  seek a translation for that. - FM
+	**  Ignore zwnj (8204) and zwj (8205), if we get to here.
+	**  Note that zwnj may have been handled as <WBR>
+	**  by the calling function. - FM
 	*/
-	if ((chk = ((code > 255) &&
-		    context->outUCLYhndl !=
-				   UCGetLYhndl_byMIME("us-ascii"))) &&
-	    (uck = UCTransUniChar(code,
-				   UCGetLYhndl_byMIME("us-ascii")))>= 32 &&
-	    uck < 127) {
-	    /*
-	    **	Got an ASCII character (yippey). - FM
-	    */
-	    PUTC(((char)(uck & 0xff)));
+	if (!strcmp(s, "zwnj") ||
+	    !strcmp(s, "zwj")) {
+	    CTRACE(tfp, "handle_entity: Ignoring '%s'.\n", s);
 	    FoundEntity = TRUE;
 	    return;
-	} else if ((chk && uck == -4) &&
-		   (uck = UCTransUniCharStr(replace_buf,
-					    60, code,
-					    UCGetLYhndl_byMIME("us-ascii"),
-					    0) >= 0)) {
-	    /*
-	    **	Got a replacement string (yippey). - FM
-	    */
-	    for (p = replace_buf; *p; p++)
-		PUTC(*p);
-	    FoundEntity = TRUE;
-	    return;
-	}
-    }
-    /*
-    **	Ignore zwnj (8204) and zwj (8205), if we get to here.
-    **	Note that zwnj may have been handled as <WBR>
-    **	by the calling function. - FM
-    */
-    if (!strcmp(s, "zwnj") ||
-	!strcmp(s, "zwj")) {
-	if (TRACE) {
-	    fprintf(stderr, "handle_entity: Ignoring '%s'.\n", s);
-	}
-	FoundEntity = TRUE;
-	return;
-    }
-
-    /*
-    **	Ignore lrm (8206), and rln (8207), if we get to here. - FM
-    */
-    if (!strcmp(s, "lrm") ||
-	!strcmp(s, "rlm")) {
-	if (TRACE) {
-	    fprintf(stderr, "handle_entity: Ignoring '%s'.\n", s);
 	}
-	FoundEntity = TRUE;
-	return;
-    }
-
-    /*
-    **	We haven't succeeded yet, so try the old LYCharSets
-    **	arrays for translation strings. - FM
-    */
-    for (low = 0, high = context->dtd->number_of_entities;
-	 high > low;
-	 diff < 0 ? (low = i+1) : (high = i)) {  /* Binary search */
-	i = (low + (high-low)/2);
-	diff = strcmp(entities[i], s);	/* Case sensitive! */
-	if (diff == 0) {		/* success: found it */
-	    for (p = LYCharSets[context->outUCLYhndl][i]; *p; p++) {
-		PUTC(*p);
-	    }
+	/*
+	**  Ignore lrm (8206), and rln (8207), if we get to here. - FM
+	*/
+	if (!strcmp(s, "lrm") ||
+	    !strcmp(s, "rlm")) {
+	    CTRACE(tfp, "handle_entity: Ignoring '%s'.\n", s);
 	    FoundEntity = TRUE;
 	    return;
 	}
-#endif
     }
 
     /*
     **	If entity string not found, display as text.
     */
-    if (TRACE)
-	fprintf(stderr, "SGML: Unknown entity '%s'\n", s);
+    CTRACE(tfp, "SGML: Unknown entity '%s'\n", s);
     PUTC('&');
     for (p = s; *p; p++) {
 	PUTC(*p);
@@ -572,8 +496,7 @@ PRIVATE void handle_comment ARGS1(
 {
     CONST char *s = context->string->data;
 
-    if (TRACE)
-	fprintf(stderr, "SGML Comment:\n<%s>\n", s);
+    CTRACE(tfp, "SGML Comment:\n<%s>\n", s);
 
     if (context->csi == NULL &&
 	strncmp(s, "!--#", 4) == 0 &&
@@ -593,8 +516,7 @@ PRIVATE void handle_identifier ARGS1(
 {
     CONST char *s = context->string->data;
 
-    if (TRACE)
-	fprintf(stderr, "SGML Identifier\n<%s>\n", s);
+    CTRACE(tfp, "SGML Identifier\n<%s>\n", s);
 
     return;
 }
@@ -608,8 +530,7 @@ PRIVATE void handle_doctype ARGS1(
 {
     CONST char *s = context->string->data;
 
-    if (TRACE)
-	fprintf(stderr, "SGML Doctype\n<%s>\n", s);
+    CTRACE(tfp, "SGML Doctype\n<%s>\n", s);
 
     return;
 }
@@ -623,8 +544,7 @@ PRIVATE void handle_marked ARGS1(
 {
     CONST char *s = context->string->data;
 
-    if (TRACE)
-	fprintf(stderr, "SGML Marked Section:\n<%s>\n", s);
+    CTRACE(tfp, "SGML Marked Section:\n<%s>\n", s);
 
     return;
 }
@@ -638,8 +558,7 @@ PRIVATE void handle_sgmlent ARGS1(
 {
     CONST char *s = context->string->data;
 
-    if (TRACE)
-	fprintf(stderr, "SGML Entity Declaration:\n<%s>\n", s);
+    CTRACE(tfp, "SGML Entity Declaration:\n<%s>\n", s);
 
     return;
 }
@@ -653,8 +572,7 @@ PRIVATE void handle_sgmlele ARGS1(
 {
     CONST char *s = context->string->data;
 
-    if (TRACE)
-	fprintf(stderr, "SGML Element Declaration:\n<%s>\n", s);
+    CTRACE(tfp, "SGML Element Declaration:\n<%s>\n", s);
 
     return;
 }
@@ -668,8 +586,7 @@ PRIVATE void handle_sgmlatt ARGS1(
 {
     CONST char *s = context->string->data;
 
-    if (TRACE)
-	fprintf(stderr, "SGML Attribute Declaration:\n<%s>\n", s);
+    CTRACE(tfp, "SGML Attribute Declaration:\n<%s>\n", s);
 
     return;
 }
@@ -716,6 +633,7 @@ PRIVATE canclose_t can_close ARGS2(
 	return ((stacked_tag->tagclass & new_tag->canclose) ?
 		close_error : close_NO);
 }
+
 PRIVATE void do_close_stacked ARGS1(
     HTStream *, context)
 {
@@ -732,6 +650,7 @@ PRIVATE void do_close_stacked ARGS1(
     context->element_stack = stacked->next;
     FREE(stacked);
 }
+
 PRIVATE int is_on_stack ARGS2(
 	HTStream *,	context,
 	HTTag *,	old_tag)
@@ -765,8 +684,7 @@ PRIVATE void end_element ARGS2(
 	       (stackpos > 1 || (!extra_action_taken && stackpos == 0))) {
 	    canclose_check = can_close(old_tag, context->element_stack->tag);
 	    if (canclose_check != close_NO) {
-		if (TRACE)
-		    fprintf(stderr, "SGML: End </%s> \t<- %s end </%s>\n",
+		CTRACE(tfp, "SGML: End </%s> \t<- %s end </%s>\n",
 			    context->element_stack->tag->name,
 			    canclose_check == close_valid ? "supplied," : "forced by",
 			    old_tag->name);
@@ -774,8 +692,7 @@ PRIVATE void end_element ARGS2(
 		extra_action_taken = YES;
 		stackpos = is_on_stack(context, old_tag);
 	    } else {
-		if (TRACE)
-		    fprintf(stderr, "SGML: Still open %s \t<- invalid end </%s>\n",
+		CTRACE(tfp, "SGML: Still open %s \t<- invalid end </%s>\n",
 			    context->element_stack->tag->name,
 			    old_tag->name);
 		return;
@@ -783,8 +700,7 @@ PRIVATE void end_element ARGS2(
 	}
 
 	if (stackpos == 0 && old_tag->contents != SGML_EMPTY) {
-	    if (TRACE)
-		fprintf(stderr, "SGML: Still open %s, no open %s for </%s>\n",
+	    CTRACE(tfp, "SGML: Still open %s, no open %s for </%s>\n",
 			context->element_stack ?
 			context->element_stack->tag->name : "none",
 			old_tag->name,
@@ -792,8 +708,7 @@ PRIVATE void end_element ARGS2(
 	    return;
 	}
 	if (stackpos > 1) {
-	    if (TRACE)
-		fprintf(stderr, "SGML: Nesting <%s>...<%s> \t<- invalid end </%s>\n",
+	    CTRACE(tfp, "SGML: Nesting <%s>...<%s> \t<- invalid end </%s>\n",
 			old_tag->name,
 			context->element_stack->tag->name,
 			old_tag->name);
@@ -818,42 +733,36 @@ PRIVATE void end_element ARGS2(
 	    /*
 	    **	Ignore the end tag. - FM
 	    */
-	    if (TRACE) {
-		fprintf(stderr,
-			"SGML: Ignoring end tag </%s> in SELECT block.\n",
+	    CTRACE(tfp, "SGML: Ignoring end tag </%s> in SELECT block.\n",
 			old_tag->name);
-	    }
 	    return;
 	}
     }
     /*
     **	Handle the end tag. - FM
     */
-    if (TRACE)
-	fprintf(stderr, "SGML: End </%s>\n", old_tag->name);
+    CTRACE(tfp, "SGML: End </%s>\n", old_tag->name);
     if (old_tag->contents == SGML_EMPTY) {
-	if (TRACE)
-	    fprintf(stderr, "SGML: Illegal end tag </%s> found.\n",
-			    old_tag->name);
+	CTRACE(tfp, "SGML: Illegal end tag </%s> found.\n",
+		    old_tag->name);
 	return;
     }
 #ifdef WIND_DOWN_STACK
-    while (context->element_stack) { /* Loop is error path only */
+    while (context->element_stack) /* Loop is error path only */
 #else
-    if (context->element_stack) { /* Substitute and remove one stack element */
+    if (context->element_stack) /* Substitute and remove one stack element */
 #endif /* WIND_DOWN_STACK */
+    {
 	HTElement * N = context->element_stack;
 	HTTag * t = N->tag;
 
 	if (old_tag != t) {		/* Mismatch: syntax error */
 	    if (context->element_stack->next) { /* This is not the last level */
-		if (TRACE) fprintf(stderr,
-		"SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
-		    old_tag->name, t->name, t->name);
+		CTRACE(tfp, "SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
+			    old_tag->name, t->name, t->name);
 	    } else {			/* last level */
-		if (TRACE) fprintf(stderr,
-		    "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
-		    old_tag->name, t->name, old_tag->name);
+		CTRACE(tfp, "SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
+			    old_tag->name, t->name, old_tag->name);
 		return; 		/* Ignore */
 	    }
 	}
@@ -872,9 +781,8 @@ PRIVATE void end_element ARGS2(
 	/* Syntax error path only */
 
     }
-    if (TRACE)
-	fprintf(stderr, "SGML: Extra end tag </%s> found and ignored.\n",
-			old_tag->name);
+    CTRACE(tfp, "SGML: Extra end tag </%s> found and ignored.\n",
+		old_tag->name);
 }
 
 
@@ -901,8 +809,7 @@ PRIVATE void start_element ARGS1(
 					      direct_container))) {
 	    canclose_check = can_close(new_tag, context->element_stack->tag);
 	    if (canclose_check != close_NO) {
-		if (TRACE)
-		    fprintf(stderr, "SGML: End </%s> \t<- %s start <%s>\n",
+		CTRACE(tfp, "SGML: End </%s> \t<- %s start <%s>\n",
 			    context->element_stack->tag->name,
 			    canclose_check == close_valid ? "supplied," : "forced by",
 			    new_tag->name);
@@ -911,8 +818,7 @@ PRIVATE void start_element ARGS1(
 		if (canclose_check  == close_error)
 		    direct_container = NO;
 	    } else {
-		if (TRACE)
-		    fprintf(stderr, "SGML: Still open %s \t<- invalid start <%s>\n",
+		CTRACE(tfp, "SGML: Still open %s \t<- invalid start <%s>\n",
 			    context->element_stack->tag->name,
 			    new_tag->name);
 	    }
@@ -921,8 +827,7 @@ PRIVATE void start_element ARGS1(
 	    (context->element_stack->tag->flags & Tgf_strict) &&
 	    !(valid = element_valid_within(new_tag, context->element_stack->tag,
 					   direct_container))) {
-	    if (TRACE)
-		fprintf(stderr, "SGML: Still open %s \t<- ignoring start <%s>\n",
+	    CTRACE(tfp, "SGML: Still open %s \t<- ignoring start <%s>\n",
 			context->element_stack->tag->name,
 			new_tag->name);
 	    return;
@@ -935,8 +840,7 @@ PRIVATE void start_element ARGS1(
 	    for (; i< new_tag->number_of_attributes && !has_attributes; i++)
 		has_attributes = context->present[i];
 	    if (!has_attributes) {
-		if (TRACE)
-		    fprintf(stderr, "SGML: Still open %s, converting invalid <%s> to </%s>\n",
+		CTRACE(tfp, "SGML: Still open %s, converting invalid <%s> to </%s>\n",
 			    context->element_stack->tag->name,
 			    new_tag->name,
 			    new_tag->name);
@@ -951,8 +855,7 @@ PRIVATE void start_element ARGS1(
 						   new_tag,
 						   context->element_stack->tag,
 						   direct_container))) {
-	    if (TRACE)
-		fprintf(stderr, "SGML: Still open %s \t<- invalid start <%s>\n",
+	    CTRACE(tfp, "SGML: Still open %s \t<- invalid start <%s>\n",
 			context->element_stack->tag->name,
 			new_tag->name);
 	}
@@ -998,18 +901,14 @@ PRIVATE void start_element ARGS1(
 		**  It is another form-related start tag, so terminate
 		**  the current SELECT block and fall through. - FM
 		*/
-		if (TRACE)
-		    fprintf(stderr,
-		       "SGML: Faking SELECT end tag before <%s> start tag.\n",
+		CTRACE(tfp, "SGML: Faking SELECT end tag before <%s> start tag.\n",
 			    new_tag->name);
 		end_element(context, SGMLFindTag(context->dtd, "SELECT"));
 	    } else {
 		/*
 		**  Ignore the start tag. - FM
 		*/
-		if (TRACE)
-		    fprintf(stderr,
-			  "SGML: Ignoring start tag <%s> in SELECT block.\n",
+		CTRACE(tfp, "SGML: Ignoring start tag <%s> in SELECT block.\n",
 			    new_tag->name);
 		return;
 	    }
@@ -1018,8 +917,7 @@ PRIVATE void start_element ARGS1(
     /*
     **	Handle the start tag. - FM
     */
-    if (TRACE)
-	fprintf(stderr, "SGML: Start <%s>\n", new_tag->name);
+    CTRACE(tfp, "SGML: Start <%s>\n", new_tag->name);
     (*context->actions->start_element)(
 	context->target,
 	new_tag - context->dtd->tags,
@@ -1047,7 +945,7 @@ PRIVATE void start_element ARGS1(
 **		------------------------
 **
 ** On entry,
-**	dtd	points to dtd structire including valid tag list
+**	dtd	points to dtd structure including valid tag list
 **	string	points to name of tag in question
 **
 ** On exit,
@@ -1073,7 +971,7 @@ PUBLIC HTTag * SGMLFindTag ARGS2(
 	/*
 	**  Unrecognized, but may be valid. - KW
 	*/
-	return (HTTag *)&HTTag_unrecognized;
+	return &HTTag_unrecognized;
     }
     return NULL;
 }
@@ -1199,7 +1097,7 @@ PRIVATE void SGML_character ARGS2(
     CONST char * EntityName;
     char * p;
     BOOLEAN chk;	/* Helps (?) walk through all the else ifs... */
-    UCode_t clong, uck; /* Enough bits for UCS4 ... */
+    UCode_t clong, uck = 0; /* Enough bits for UCS4 ... */
     char c;
     char saved_char_in = '\0';
 
@@ -1308,7 +1206,7 @@ PRIVATE void SGML_character ARGS2(
     **	to Unicode, try that now. - FM
     */
     if (context->T.trans_to_uni &&
-	((unsign_c >= 127) ||
+	((unsign_c >= LYlowest_eightbit[context->inUCLYhndl]) ||
 	 (unsign_c < 32 && unsign_c != 0 &&
 	  context->T.trans_C0_to_uni))) {
 	/*
@@ -1403,7 +1301,7 @@ top0a:
 **  which unsign_c has been defined), and from below
 **  when we are recycling a character (e.g., because
 **  it terminated an entity but is not the standard
-**  semi-colon).  The chararcter will already have
+**  semi-colon).  The character will already have
 **  been put through the Unicode conversions. - FM
 */
 top1:
@@ -1533,11 +1431,8 @@ top1:
 		   (uck = UCTransUniChar(unsign_c,
 					 context->outUCLYhndl)) >= 32 &&
 		   uck < 256) {
-	    if (TRACE) {
-		fprintf(stderr,
-			"UCTransUniChar returned 0x%.2lX:'%c'.\n",
+	    CTRACE(tfp, "UCTransUniChar returned 0x%.2lX:'%c'.\n",
 			uck, FROMASCII((char)uck));
-	    }
 	    /*
 	    **	We got one octet from the conversions, so use it. - FM
 	    */
@@ -1554,7 +1449,7 @@ top1:
 					    0) >= 0)) {
 	    /*
 	    **	Got a replacement string.
-	    **	No further tests for valididy - assume that whoever
+	    **	No further tests for validity - assume that whoever
 	    **	defined replacement strings knew what she was doing. - KW
 	    */
 	    for (p = replace_buf; *p; p++)
@@ -1565,13 +1460,19 @@ top1:
 	} else if (context->T.output_utf8 && PUTUTF8(clong)) {
 	    ; /* do nothing more */
 	/*
-	**  If it's any other (> 160) 8-bit chararcter, and
+	**  If it's any other (> 160) 8-bit character, and
 	**  we have not set HTPassEightBitRaw nor HTCJK, nor
 	**  have the "ISO Latin 1" character set selected,
 	**  back translate for our character set. - FM
 	*/
+#define IncludesLatin1Enc \
+		(context->outUCLYhndl == LATIN1 || \
+		 (context->outUCI && \
+		  (context->outUCI->enc & (UCT_CP_SUPERSETOF_LAT1))))
+
 #define PASSHI8BIT (HTPassEightBitRaw || \
 		    (context->T.do_8bitraw && !context->T.trans_from_uni))
+
 	} else if (unsign_c > 160 && unsign_c < 256 &&
 		   !(PASSHI8BIT || HTCJK != NOCJK) &&
 		   !IncludesLatin1Enc) {
@@ -1652,14 +1553,16 @@ top1:
 		for (p = replace_buf; *p; p++)
 		    PUTC(*p);
 	    } else {
+#endif /* NOTUSED_FOTEMODS */
 		/*
 		**  Out of luck, so use the UHHH notation (ugh). - FM
 		*/
-#endif /* NOTUSED_FOTEMODS */
+			/* do not print UHHH for now
 		sprintf(replace_buf, "U%.2lX", unsign_c);
 		for (p = replace_buf; *p; p++) {
 		    PUTC(*p);
 		}
+			 */
 #ifdef NOTUSED_FOTEMODS
 	    }
 #endif /* NOTUSED_FOTEMODS */
@@ -1749,10 +1652,8 @@ top1:
 		*/
 		char temp[8];
 
-		if (TRACE) {
-		    fprintf(stderr,
-		"SGML_character: Handling 'zwnj' entity as 'WBR' element.\n");
-		}
+		CTRACE(tfp, "SGML_character: Handling 'zwnj' entity as 'WBR' element.\n");
+
 		if (c != ';') {
 		    sprintf(temp, "<WBR>%c", c);
 		} else {
@@ -1841,7 +1742,7 @@ top1:
 	    if ((context->isHex ? sscanf(string->data, "%lx", &code) :
 				  sscanf(string->data, "%ld", &code)) == 1) {
 		if ((code == 1) ||
-		    (code > 129 && code < 156)) {
+		    (code > 127 && code < 156)) {
 		    /*
 		    **	Assume these are Microsoft code points,
 		    **	inflicted on us by FrontPage. - FM
@@ -1857,6 +1758,12 @@ top1:
 			    */
 			    code = 0x263a;
 			    break;
+			case 128:
+			    /*
+			    **	EURO currency sign
+			    */
+			    code = 0x20ac;
+			    break;
 			case 130:
 			    /*
 			    **	SINGLE LOW-9 QUOTATION MARK (sbquo)
@@ -1981,10 +1888,8 @@ top1:
 		    */
 		    char temp[8];
 
-		    if (TRACE) {
-			fprintf(stderr,
-      "SGML_character: Handling '8204' (zwnj) reference as 'WBR' element.\n");
-		    }
+		    CTRACE(tfp, "SGML_character: Handling '8204' (zwnj) reference as 'WBR' element.\n");
+
 		    /*
 		    **	Include the terminator if it is not
 		    **	the standard semi-colon. - FM
@@ -2073,6 +1978,7 @@ top1:
 		    */
 		    for (p = replace_buf; *p; p++)
 			PUTC(*p);
+#endif /* NOTUSED_FOTEMODS */
 		/*
 		**  Ignore 8205 (zwj),
 		**  8206 (lrm), and 8207 (rln), if we get to here. - FM
@@ -2085,7 +1991,7 @@ top1:
 			LYstrncpy(replace_buf,
 				  string->data,
 				  (string->size < 64 ? string->size : 63));
-			fprintf(stderr,
+			fprintf(tfp,
 				"SGML_character: Ignoring '%s%s'.\n",
 				(context->isHex ? "&#x" : "&#"),
 				replace_buf);
@@ -2096,7 +2002,6 @@ top1:
 		    if (c != ';')
 			goto top1;
 		    break;
-#endif /* NOTUSED_FOTEMODS */
 		/*
 		**  Show the numeric entity if we get to here
 		**  and the value:
@@ -2266,9 +2171,8 @@ top1:
 	    */
 	    HTTag * t;
 	    if (c == '/') {
-		if (TRACE)
-		    if (string->size!=0)
-			fprintf(stderr,"SGML: `<%s/' found!\n", string->data);
+		if (string->size != 0)
+		    CTRACE(tfp,"SGML: `<%s/' found!\n", string->data);
 		context->state = S_end;
 		break;
 	    }
@@ -2286,21 +2190,18 @@ top1:
 		for (i = 0; i < 3; i++) /* recover */
 		    PUTC(string->data[i]);
 		PUTC(c);
-		if (TRACE)
-		    fprintf(stderr, "SGML: Treating <%s%c as text\n",
+		CTRACE(tfp, "SGML: Treating <%s%c as text\n",
 			    string->data, c);
 		string->size = 0;
 		context->state = S_text;
 		break;
 	    } else if (!t) {
-		if (TRACE)
-		    fprintf(stderr, "SGML: *** Invalid element %s\n",
+		CTRACE(tfp, "SGML: *** Invalid element %s\n",
 			    string->data);
 		context->state = (c == '>') ? S_text : S_junk_tag;
 		break;
 	    } else if (t == context->unknown_tag) {
-		if (TRACE)
-		    fprintf(stderr, "SGML: *** Unknown element %s\n",
+		CTRACE(tfp, "SGML: *** Unknown element %s\n",
 			    string->data);
 		/*
 		**  Fall through and treat like valid
@@ -2655,8 +2556,7 @@ top1:
 	if (WHITE(c))
 	    break;		/* Before attribute value */
 	if (c == '>') { 	/* End of tag */
-	    if (TRACE)
-		fprintf(stderr, "SGML: found = but no value\n");
+	    CTRACE(tfp, "SGML: found = but no value\n");
 	    if (context->current_tag->name)
 		start_element(context);
 	    context->state = S_text;
@@ -2802,8 +2702,7 @@ top1:
 		t = SGMLFindTag(dtd, string->data);
 	    }
 	    if (!t || t == context->unknown_tag) {
-		if (TRACE)
-		    fprintf(stderr, "Unknown end tag </%s>\n", string->data);
+		CTRACE(tfp, "Unknown end tag </%s>\n", string->data);
 	    } else {
 		BOOL tag_OK = (c == '>' || WHITE(c));
 		context->current_tag = t;
@@ -2831,9 +2730,7 @@ top1:
 		    **	Don't treat these end tags as invalid,
 		    **	nor act on them. - FM
 		    */
-		    if (TRACE)
-			fprintf(stderr,
-				"SGML: `</%s%c' found!  Ignoring it.\n",
+		    CTRACE(tfp, "SGML: `</%s%c' found!  Ignoring it.\n",
 				string->data, c);
 		    string->size = 0;
 		    context->current_attribute_number = INVALID;
@@ -2871,27 +2768,19 @@ top1:
 			    /*
 			    **	It is not at FORM end tag, so ignore it. - FM
 			    */
-			    if (TRACE) {
-				fprintf(stderr,
-			    "SGML: Ignoring end tag </%s> in SELECT block.\n",
+			    CTRACE(tfp, "SGML: Ignoring end tag </%s> in SELECT block.\n",
 					string->data);
-			    }
 			} else {
 			    /*
 			    **	End the SELECT block and then
 			    **	handle the FORM end tag. - FM
 			    */
-			    if (TRACE) {
-				fprintf(stderr,
-			"SGML: Faking SELECT end tag before </%s> end tag.\n",
+			    CTRACE(tfp, "SGML: Faking SELECT end tag before </%s> end tag.\n",
 					string->data);
-			    }
 			    end_element(context,
 					SGMLFindTag(context->dtd, "SELECT"));
-			    if (TRACE) {
-				fprintf(stderr,
-					"SGML: End </%s>\n", string->data);
-			    }
+			    CTRACE(tfp, "SGML: End </%s>\n", string->data);
+
 			    (*context->actions->end_element)
 				(context->target,
 				 (context->current_tag - context->dtd->tags),
@@ -2902,9 +2791,7 @@ top1:
 			**  Treat a P end tag like a P start tag (Ugh,
 			**  what a hack! 8-). - FM
 			*/
-			if (TRACE)
-			    fprintf(stderr,
-				    "SGML: `</%s%c' found!  Treating as '<%s%c'.\n",
+			CTRACE(tfp, "SGML: `</%s%c' found!  Treating as '<%s%c'.\n",
 				    string->data, c, string->data, c);
 			{
 			    int i;
@@ -2917,10 +2804,8 @@ top1:
 			if (context->current_tag->name)
 			    start_element(context);
 		    } else {
-			if (TRACE) {
-			    fprintf(stderr,
-				    "SGML: End </%s>\n", string->data);
-			}
+			CTRACE(tfp, "SGML: End </%s>\n", string->data);
+
 			(*context->actions->end_element)
 			    (context->target,
 			     (context->current_tag - context->dtd->tags),
@@ -2945,8 +2830,8 @@ top1:
 	    string->size = 0;
 	    context->current_attribute_number = INVALID;
 	    if (c != '>') {
-		if (TRACE && !WHITE(c))
-		    fprintf(stderr,"SGML: `</%s%c' found!\n", string->data, c);
+		if (!WHITE(c))
+		    CTRACE(tfp,"SGML: `</%s%c' found!\n", string->data, c);
 		context->state = S_junk_tag;
 	    } else {
 		context->state = S_text;
@@ -3208,7 +3093,7 @@ PUBLIC HTStream* SGML_new  ARGS3(
     context->string = HTChunkCreate(128);	/* Grow by this much */
     context->dtd = dtd;
     context->target = target;
-    context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
+    context->actions = (CONST HTStructuredClass*)(((HTStream*)target)->isa);
 					/* Ugh: no OO */
     context->unknown_tag = &HTTag_unrecognized;
     context->state = S_text;
@@ -3265,7 +3150,7 @@ PUBLIC HTStream* SGML_new  ARGS3(
 **	Added 24-Mar-96 by FM, based on:
 **
 ////////////////////////////////////////////////////////////////////////
-Copyright (c) 1993 Electrotechnical Laboratry (ETL)
+Copyright (c) 1993 Electrotechnical Laboratory (ETL)
 
 Permission to use, copy, modify, and distribute this material
 for any purpose and without fee is hereby granted, provided
@@ -3296,19 +3181,69 @@ PUBLIC void JISx0201TO0208_EUC ARGS4(
 	register unsigned char *,	OLO)
 {
     static char *table[] = {
-	"\xA1\xA3", "\xA1\xD6", "\xA1\xD7", "\xA1\xA2", "\xA1\xA6", "\xA5\xF2",
-	"\xA5\xA1", "\xA5\xA3", "\xA5\xA5", "\xA5\xA7", "\xA5\xA9",
-	"\xA5\xE3", "\xA5\xE5", "\xA5\xE7", "\xA5\xC3", "\xA1\xBC",
-	"\xA5\xA2", "\xA5\xA4", "\xA5\xA6", "\xA5\xA8", "\xA5\xAA",
-	"\xA5\xAB", "\xA5\xAD", "\xA5\xAF", "\xA5\xB1", "\xA5\xB3",
-	"\xA5\xB5", "\xA5\xB7", "\xA5\xB9", "\xA5\xBB", "\xA5\xBD",
-	"\xA5\xBF", "\xA5\xC1", "\xA5\xC4", "\xA5\xC6", "\xA5\xC8",
-	"\xA5\xCA", "\xA5\xCB", "\xA5\xCC", "\xA5\xCD", "\xA5\xCE",
-	"\xA5\xCF", "\xA5\xD2", "\xA5\xD5", "\xA5\xD8", "\xA5\xDB",
-	"\xA5\xDE", "\xA5\xDF", "\xA5\xE0", "\xA5\xE1", "\xA5\xE2",
-	"\xA5\xE4", "\xA5\xE6", "\xA5\xE8", "\xA5\xE9", "\xA5\xEA",
-	"\xA5\xEB", "\xA5\xEC", "\xA5\xED", "\xA5\xEF", "\xA5\xF3",
-	"\xA1\xAB", "\xA1\xAC"
+	"\241\243",	/* A1,A3 */
+	"\241\326",	/* A1,D6 */
+	"\241\327",	/* A1,D7 */
+	"\241\242",	/* A1,A2 */
+	"\241\246",	/* A1,A6 */
+	"\245\362",	/* A5,F2 */
+	"\245\241",	/* A5,A1 */
+	"\245\243",	/* A5,A3 */
+	"\245\245",	/* A5,A5 */
+	"\245\247",	/* A5,A7 */
+	"\245\251",	/* A5,A9 */
+	"\245\343",	/* A5,E3 */
+	"\245\345",	/* A5,E5 */
+	"\245\347",	/* A5,E7 */
+	"\245\303",	/* A5,C3 */
+	"\241\274",	/* A1,BC */
+	"\245\242",	/* A5,A2 */
+	"\245\244",	/* A5,A4 */
+	"\245\246",	/* A5,A6 */
+	"\245\250",	/* A5,A8 */
+	"\245\252",	/* A5,AA */
+	"\245\253",	/* A5,AB */
+	"\245\255",	/* A5,AD */
+	"\245\257",	/* A5,AF */
+	"\245\261",	/* A5,B1 */
+	"\245\263",	/* A5,B3 */
+	"\245\265",	/* A5,B5 */
+	"\245\267",	/* A5,B7 */
+	"\245\271",	/* A5,B9 */
+	"\245\273",	/* A5,BB */
+	"\245\275",	/* A5,BD */
+	"\245\277",	/* A5,BF */
+	"\245\301",	/* A5,C1 */
+	"\245\304",	/* A5,C4 */
+	"\245\306",	/* A5,C6 */
+	"\245\310",	/* A5,C8 */
+	"\245\312",	/* A5,CA */
+	"\245\313",	/* A5,CB */
+	"\245\314",	/* A5,CC */
+	"\245\315",	/* A5,CD */
+	"\245\316",	/* A5,CE */
+	"\245\317",	/* A5,CF */
+	"\245\322",	/* A5,D2 */
+	"\245\325",	/* A5,D5 */
+	"\245\330",	/* A5,D8 */
+	"\245\333",	/* A5,DB */
+	"\245\336",	/* A5,DE */
+	"\245\337",	/* A5,DF */
+	"\245\340",	/* A5,E0 */
+	"\245\341",	/* A5,E1 */
+	"\245\342",	/* A5,E2 */
+	"\245\344",	/* A5,E4 */
+	"\245\346",	/* A5,E6 */
+	"\245\350",	/* A5,E8 */
+	"\245\351",	/* A5,E9 */
+	"\245\352",	/* A5,EA */
+	"\245\353",	/* A5,EB */
+	"\245\354",	/* A5,EC */
+	"\245\355",	/* A5,ED */
+	"\245\357",	/* A5,EF */
+	"\245\363",	/* A5,F3 */
+	"\241\253",	/* A1,AB */
+	"\241\254"	/* A1,AC */
     };
 
     if ((IHI == 0x8E) && (ILO >= 0xA1) && (ILO <= 0xDF)) {
@@ -3377,7 +3312,7 @@ PUBLIC void JISx0201TO0208_SJIS ARGS3(
 {
     unsigned char SJCODE[2];
 
-    JISx0201TO0208_EUC('\x8E', I, OHI, OLO);
+    JISx0201TO0208_EUC('\216', I, OHI, OLO);
     JIS_TO_SJIS1(*OHI&0x7F, *OLO&0x7F, SJCODE);
     *OHI = SJCODE[0];
     *OLO = SJCODE[1];
@@ -3487,10 +3422,11 @@ PUBLIC unsigned char * EUC_TO_JIS ARGS4(
 }
 
 PUBLIC unsigned char * TO_EUC ARGS2(
-	unsigned char *,	jis,
+	CONST unsigned char *,	jis,
 	unsigned char *,	euc)
 {
-    register unsigned char *s, *d, c, jis_stat;
+    register CONST unsigned char *s;
+    register unsigned char *d, c, jis_stat;
     register int to1B, to2B;
     register int in_sjis = 0;
 
@@ -3540,7 +3476,7 @@ PUBLIC unsigned char * TO_EUC ARGS2(
 }
 
 PUBLIC void TO_SJIS ARGS2(
-	unsigned char *,	any,
+	CONST unsigned char *,	any,
 	unsigned char *,	sjis)
 {
     unsigned char *euc;
@@ -3558,7 +3494,7 @@ PUBLIC void TO_SJIS ARGS2(
 }
 
 PUBLIC void TO_JIS ARGS2(
-	unsigned char *,	any,
+	CONST unsigned char *,	any,
 	unsigned char *,	jis)
 {
     unsigned char *euc;