snapshot of project "lynx", label v2-7-1ac_0-76

author: Thomas E. Dickey <dickey@invisible-island.net> 1997-10-06 04:08:00 -0400
committer: Thomas E. Dickey <dickey@invisible-island.net> 1997-10-06 04:08:00 -0400
commit: 1d80538b4b84eadd223c7b61839b950389c2d49d (patch)
tree: a46f327e82edb06d8d789b60c3395f873476e040 /WWW/Library
parent: 443226a5ffcf805f6ab3ccbcc2a6b4802793b07d (diff)
download: lynx-snapshots-1d80538b4b84eadd223c7b61839b950389c2d49d.tar.gz
33 files changed, 918 insertions, 439 deletions
diff --git a/WWW/Library/Implementation/HTChunk.c b/WWW/Library/Implementation/HTChunk.c
index fc46ac4a..64a7e0ba 100644
--- a/WWW/Library/Implementation/HTChunk.c
+++ b/WWW/Library/Implementation/HTChunk.c
@@ -79,6 +79,74 @@ PUBLIC void HTChunkEnsure ARGS2 (HTChunk *,ch, int,needed)
         outofmem(__FILE__, "HTChunkEnsure");
 }
 
+#ifdef EXP_CHARTRANS
+
+#define PUTC(code) ch->data[ch->size++] = (char)(code)
+#define PUTC2(code) ch->data[ch->size++] = (char)(0x80|(0x3f &(code)))
+
+PUBLIC void HTChunkPutUtf8Char ARGS2 (HTChunk *,ch, UCode_t,code)
+{
+    int utflen;
+    if (code < 128)
+	utflen = 1;
+    else if   (code <     0x800L) {
+	utflen = 2;
+    } else if (code <   0x10000L) {
+	utflen = 3;
+    } else if (code <  0x200000L) {
+	utflen = 4;
+    } else if (code < 0x4000000L) {
+	utflen = 5;
+    } else if (code<=0x7fffffffL) {
+	utflen = 6;
+    } else
+	utflen = 0;
+
+    if (ch->size + utflen > ch->allocated) {
+	int growby = (ch->growby >= utflen) ? ch->growby : utflen;
+	ch->allocated = ch->allocated + growby;
+        ch->data = ch->data ? (char *)realloc(ch->data, ch->allocated)
+			    : (char *)calloc(1, ch->allocated);
+      if (!ch->data)
+          outofmem(__FILE__, "HTChunkPutUtf8Char");
+    }
+
+    switch(utflen) {
+    case 0:
+	return;
+    case 1:
+	ch->data[ch->size++] = (char)code;
+	return;
+    case 2:
+	PUTC(0xc0 | (code>>6));
+	break;
+    case 3:
+	PUTC(0xe0 | (code>>12));
+	break;
+    case 4:
+	PUTC(0xf0 | (code>>18));
+	break;
+    case 5:
+	PUTC(0xf8 | (code>>24));
+	break;
+    case 6:
+	PUTC(0xfc | (code>>30));
+    }
+    switch(utflen) {
+    case 6:
+	PUTC2(code>>24);
+    case 5:
+	PUTC2(code>>18);
+    case 4:
+	PUTC2(code>>12);
+    case 3:
+	PUTC2(code>>6);
+    case 2:
+	PUTC2(code);
+    }
+}
+
+#endif /* EXP_CHARTRANS */
 
 /*	Terminate a chunk
 **	-----------------
diff --git a/WWW/Library/Implementation/HTChunk.h b/WWW/Library/Implementation/HTChunk.h
index 260f798a..c7308165 100644
--- a/WWW/Library/Implementation/HTChunk.h
+++ b/WWW/Library/Implementation/HTChunk.h
@@ -7,6 +7,10 @@
    automatically reallocating them as necessary.
    
  */
+#ifdef EXP_CHARTRANS
+#include "UCMap.h"
+#endif
+
 typedef struct {
         int     size;           /* In bytes                     */
         int     growby;         /* Allocation unit in bytes     */
@@ -114,8 +118,12 @@ Append a character to a  chunk
  */
 extern void HTChunkPutc PARAMS((HTChunk * ch, char c));
 
-/*
+#ifdef EXP_CHARTRANS
+extern void HTChunkPutUtf8Char PARAMS((HTChunk * ch, UCode_t code));
 
+#endif /* EXP_CHARTRANS */
+
+/*
 Append a string to a  chunk
 
   ON ENTRY,
diff --git a/WWW/Library/Implementation/HTDOS.c b/WWW/Library/Implementation/HTDOS.c
index 79a8b463..c5687808 100644
--- a/WWW/Library/Implementation/HTDOS.c
+++ b/WWW/Library/Implementation/HTDOS.c
@@ -1,25 +1,25 @@
-/*             DOS specific routines
-
- */
-
-#include <mem.h>
-#include <dos.h>
-#include "htstring.h"
-
-/* PUBLIC                                                       HTDOS_wwwName()
-**              CONVERTS DOS Name into WWW Name
-** ON ENTRY:
-**      dosname         DOS file specification (NO NODE)
-**
-** ON EXIT:
-**	returns 	www file specification
-**
-*/
-char * HTDOS_wwwName (char *dosname)
-{
-	static char wwwname[1024];
-	char *cp_url = wwwname;
-
+/*             DOS specific routines
+
+ */
+
+#include <mem.h>
+#include <dos.h>
+#include "htstring.h"
+
+/* PUBLIC                                                       HTDOS_wwwName()
+**              CONVERTS DOS Name into WWW Name
+** ON ENTRY:
+**      dosname         DOS file specification (NO NODE)
+**
+** ON EXIT:
+**	returns 	www file specification
+**
+*/
+char * HTDOS_wwwName (char *dosname)
+{
+	static char wwwname[1024];
+	char *cp_url = wwwname;
+
 	strcpy(wwwname,dosname);
 
 	for ( ; *cp_url != '\0' ; cp_url++)
@@ -36,25 +36,25 @@ char * HTDOS_wwwName (char *dosname)
 
 /*
 	if((strlen(wwwname)>2)&&(wwwname[1]==':')) wwwname[1]='|';
-	printf("\n\nwww: %s\n\ndos: %s\n\n",wwwname,dosname);
-	sleep(5);
-*/
+	printf("\n\nwww: %s\n\ndos: %s\n\n",wwwname,dosname);
+	sleep(5);
+*/
 	return(wwwname);
-}
-
-
-/* PUBLIC                                                       HTDOS_name()
-**              CONVERTS WWW name into a DOS name
-** ON ENTRY:
-**	fn		WWW file name
-**
-** ON EXIT:
-**      returns         dos file specification
-**
-** Bug:	Returns pointer to static -- non-reentrant
-*/
-char * HTDOS_name(char *dosname)	{
-
+}
+
+
+/* PUBLIC                                                       HTDOS_name()
+**              CONVERTS WWW name into a DOS name
+** ON ENTRY:
+**	fn		WWW file name
+**
+** ON EXIT:
+**      returns         dos file specification
+**
+** Bug:	Returns pointer to static -- non-reentrant
+*/
+char * HTDOS_name(char *dosname)	{
+
 	static char cp_url[1024];
 	int joe;
 
@@ -92,4 +92,4 @@ char * HTDOS_name(char *dosname)	{
 	}
 }
 
-
+
diff --git a/WWW/Library/Implementation/HTFTP.c b/WWW/Library/Implementation/HTFTP.c
index 3cf9dd3d..a3ad9de1 100644
--- a/WWW/Library/Implementation/HTFTP.c
+++ b/WWW/Library/Implementation/HTFTP.c
@@ -129,7 +129,7 @@ typedef struct _connection {
 
 #define PUTC(c) (*targetClass.put_character)(target, c)
 #define PUTS(s) (*targetClass.put_string)(target, s)
-#define START(e) (*targetClass.start_element)(target, e, 0, 0, 0)
+#define START(e) (*targetClass.start_element)(target, e, 0, 0, -1, 0)
 #define END(e) (*targetClass.end_element)(target, e, 0)
 #define FREE_TARGET (*targetClass._free)(target)
 #define ABORT_TARGET (*targetClass._free)(target)
@@ -2143,17 +2143,19 @@ PRIVATE EntryInfo * parse_dir_entry ARGS2(
     **  Get real types eventually.
     */
     if (!entry_info->type) {
-	char *cp;
+	CONST char *cp;
         HTFormat format;
         HTAtom * encoding;  /* @@ not used at all */
-        format = HTFileFormat(entry_info->filename, &encoding);
+        format = HTFileFormat(entry_info->filename, &encoding, &cp);
 
-	if (!strncmp(HTAtom_name(format), "application",11)) {
-	    cp = HTAtom_name(format) + 12;
-	    if (!strncmp(cp,"x-",2))
-		cp += 2;
-	} else {
-	    cp = HTAtom_name(format);
+	if (cp == NULL) {
+	    if (!strncmp(HTAtom_name(format), "application",11)) {
+		cp = HTAtom_name(format) + 12;
+		if (!strncmp(cp,"x-",2))
+		    cp += 2;
+	    } else {
+		cp = HTAtom_name(format);
+	    }
 	}
 
         StrAllocCopy(entry_info->type, cp);
@@ -2574,6 +2576,7 @@ PUBLIC int HTFTPLoad ARGS4(
 	HTStream *,		sink)
 {
     BOOL isDirectory = NO;
+    HTAtom * encoding = NULL;
     int status;
     int retry;			/* How many times tried? */
     HTFormat format;
@@ -2689,7 +2692,6 @@ PUBLIC int HTFTPLoad ARGS4(
         char *filename = HTParse(name, "", PARSE_PATH + PARSE_PUNCTUATION);
 	char *fname = filename;	/** Save for subsequent free() **/
 	BOOL binary;
-	HTAtom * encoding;
 	char *type = NULL;
 	char *cp;
 
@@ -2795,16 +2797,16 @@ PUBLIC int HTFTPLoad ARGS4(
 		    (cp > (filename + 3) &&
 	             0 == strncasecomp((cp - 4), "read.me", 7))) {
 		    *cp = '\0';
-		    format = HTFileFormat(filename, &encoding);
+		    format = HTFileFormat(filename, &encoding, NULL);
 		    *cp = '.';
 		} else {
-		    format = HTFileFormat(filename, &encoding);
+		    format = HTFileFormat(filename, &encoding, NULL);
 		}
 	    } else {
-	        format = HTFileFormat(filename, &encoding);
+	        format = HTFileFormat(filename, &encoding, NULL);
 	    }
 	} else {
-	    format = HTFileFormat(filename, &encoding);
+	    format = HTFileFormat(filename, &encoding, NULL);
 	}
 	format = HTCharsetFormat(format, anchor, -1);
 	binary = (encoding != HTAtom_for("8bit") &&
@@ -3218,7 +3220,6 @@ listen:
     } else {
         int rv;
 	int len;
-	HTAtom * encoding;
 	char *FileName = HTParse(name, "", PARSE_PATH + PARSE_PUNCTUATION);
 
 	/** Clear any login messages **/
@@ -3226,14 +3227,24 @@ listen:
 
 	/** Fake a Content-Encoding for compressed files. - FM **/
 	HTUnEscape(FileName);
-	if ((len = strlen(FileName)) > 2) {
+	if (!IsUnityEnc(encoding)) {
+	    /*
+	     *  We already know from the call to HTFileFormat above that
+	     *  this is a compressed file, no need to look at the filename
+	     *  again. - kw
+	     */
+	    StrAllocCopy(anchor->content_type, format->name);
+	    StrAllocCopy(anchor->content_encoding, HTAtom_name(encoding));
+	    format = HTAtom_for("www/compressed");
+
+	} else if ((len = strlen(FileName)) > 2) {
 	    if ((FileName[len - 1] == 'Z') &&
 	        (FileName[len - 2] == '.' ||
 		 FileName[len - 2] == '-' ||
 		 FileName[len - 2] == '_')) {
 		
 		FileName[len - 2] = '\0';
-		format = HTFileFormat(FileName, &encoding);
+		format = HTFileFormat(FileName, &encoding, NULL);
 		format = HTCharsetFormat(format, anchor, -1);
 		StrAllocCopy(anchor->content_type, format->name);
 		StrAllocCopy(anchor->content_encoding, "x-compress");
@@ -3244,7 +3255,7 @@ listen:
 		    FileName[len - 3] == '-' ||
 		    FileName[len - 3] == '_') {
 		    FileName[len - 3] = '\0';
-		    format = HTFileFormat(FileName, &encoding);
+		    format = HTFileFormat(FileName, &encoding, NULL);
 		    format = HTCharsetFormat(format, anchor, -1);
 		    StrAllocCopy(anchor->content_type, format->name);
 		    StrAllocCopy(anchor->content_encoding, "x-gzip");
diff --git a/WWW/Library/Implementation/HTFile.c b/WWW/Library/Implementation/HTFile.c
index 71ea8823..93553bcb 100644
--- a/WWW/Library/Implementation/HTFile.c
+++ b/WWW/Library/Implementation/HTFile.c
@@ -87,6 +87,7 @@ typedef struct _HTSuffix {
 	char *		suffix;
 	HTAtom *	rep;
 	HTAtom *	encoding;
+        char *		desc;
 	float		quality;
 } HTSuffix;
 
@@ -102,7 +103,7 @@ typedef struct _HTSuffix {
 
 #define PUTC(c) (*target->isa->put_character)(target, c)
 #define PUTS(s) (*target->isa->put_string)(target, s)
-#define START(e) (*target->isa->start_element)(target, e, 0, 0, 0)
+#define START(e) (*target->isa->start_element)(target, e, 0, 0, -1, 0)
 #define END(e) (*target->isa->end_element)(target, e, 0)
 #define MAYBE_END(e) if (HTML_dtd.tags[e].contents != SGML_EMPTY) \
                         (*target->isa->end_element)(target, e, 0)
@@ -148,8 +149,8 @@ PRIVATE char *HTCacheRoot = "/tmp/W3_Cache_";	/* Where to cache things */
 **  Suffix registration.
 */
 PRIVATE HTList * HTSuffixes = 0;
-PRIVATE HTSuffix no_suffix = { "*", NULL, NULL, 1.0 };
-PRIVATE HTSuffix unknown_suffix = { "*.*", NULL, NULL, 1.0};
+PRIVATE HTSuffix no_suffix = { "*", NULL, NULL, NULL, 1.0 };
+PRIVATE HTSuffix unknown_suffix = { "*.*", NULL, NULL, NULL, 1.0};
 
 
 #ifdef _WINDOWS
@@ -354,16 +355,21 @@ PRIVATE void LYListFmtParse ARGS5(
 **	Calling this with suffix set to "*.*" will set the default
 **	representation for unknown suffix files which contain a ".".
 **
-**	If filename suffix is already defined its previous
-**	definition is overridden.
+**	The encoding parameter can give a trivial (8bit, 7bit, binary)
+**	or real (gzip, compress) encoding.
+**
+**	If filename suffix is already defined with the same encoding
+**	its previous definition is overridden.
 */
-PUBLIC void HTSetSuffix ARGS4(
+PUBLIC void HTSetSuffix5 ARGS5(
 	CONST char *,	suffix,
 	CONST char *,	representation,
 	CONST char *,	encoding,
+	CONST char *,	desc,
 	float,		value)
 {
     HTSuffix * suff;
+    BOOL trivial_enc = IsUnityEncStr(encoding);
 
     if (strcmp(suffix, "*") == 0)
         suff = &no_suffix;
@@ -373,7 +379,10 @@ PUBLIC void HTSetSuffix ARGS4(
 	HTList *cur = HTSuffixes;
 
 	while (NULL != (suff = (HTSuffix*)HTList_nextObject(cur))) {
-	    if (suff->suffix && 0 == strcmp(suff->suffix, suffix))
+	    if (suff->suffix && 0 == strcmp(suff->suffix, suffix) &&
+		((trivial_enc && IsUnityEnc(suff->encoding)) ||
+		 (!trivial_enc && !IsUnityEnc(suff->encoding) &&
+		     strcmp(encoding, HTAtom_name(suff->encoding)) == 0)))
 		break;
 	}
 	if (!suff) { /* Not found -- create a new node */
@@ -396,7 +405,8 @@ PUBLIC void HTSetSuffix ARGS4(
 	}
     }
 
-    suff->rep = HTAtom_for(representation);
+    if (representation)
+	suff->rep = HTAtom_for(representation);
    
     /*
     **	Memory leak fixed.
@@ -404,7 +414,9 @@ PUBLIC void HTSetSuffix ARGS4(
     **	Invariant code removed.
     */
     suff->encoding = HTAtom_for(encoding);
-    
+
+    StrAllocCopy(suff->desc, desc);
+
     suff->quality = value;
 }
 
@@ -430,6 +442,7 @@ PRIVATE void free_suffixes NOARGS
 	*/
 	suff = (HTSuffix *)HTList_removeLastObject(HTSuffixes);
 	FREE(suff->suffix);
+	FREE(suff->desc);
 	FREE(suff);
     }
     /*
@@ -643,15 +656,21 @@ PUBLIC char * WWW_nameOfFile ARGS1(
 **
 **  On entry,
 **	rep	is the atomized MIME style representation
+**	enc	is an encoding, trivial (8bit, binary, etc.) or gzip etc.
 **
 **  On exit:
 **	Returns	a pointer to a suitable suffix string if one has been
 **	found, else "".
 */
-PUBLIC CONST char * HTFileSuffix ARGS1(
-	HTAtom*,	rep)
+PUBLIC CONST char * HTFileSuffix ARGS2(
+	HTAtom*,	rep,
+	CONST char *,	enc)
 {
     HTSuffix * suff;
+#ifdef FNAMES_8_3
+    HTSuffix * first_found = NULL;
+#endif
+    BOOL trivial_enc;
     int n;
     int i;
 
@@ -660,13 +679,44 @@ PUBLIC CONST char * HTFileSuffix ARGS1(
     if (!HTSuffixes)
         HTFileInit();
 #endif /* !NO_INIT */
+
+    trivial_enc = IsUnityEncStr(enc);
     n = HTList_count(HTSuffixes);
     for (i = 0; i < n; i++) {
 	suff = (HTSuffix *)HTList_objectAt(HTSuffixes, i);
-	if (suff->rep == rep) {
+	if (suff->rep == rep &&
+#if defined(VMS) || defined(FNAMES_8_3)
+	    /*  Don't return a suffix whose first char is a dot and which
+		has more dots or with asterisks, for
+		these systems - kw */
+	    (!suff->suffix || !suff->suffix[0] || suff->suffix[0] != '.' ||
+	     (strchr(suff->suffix + 1, '.') == NULL &&
+	      strchr(suff->suffix + 1, '.') == NULL)) &&
+#endif
+	    ((trivial_enc && IsUnityEnc(suff->encoding)) ||
+	     (!trivial_enc && !IsUnityEnc(suff->encoding) &&
+	      strcmp(enc, HTAtom_name(suff->encoding)) == 0))) {
+#ifdef FNAMES_8_3
+	    if (suff->suffix && (strlen(suff->suffix) <= 4)) {
+		/*
+		 *  If length of suffix (including dot) is 4 or smaller,
+		 *  return this one even if we found a longer one
+		 *  earlier - kw
+		 */
+		return suff->suffix;
+	    } else if (!first_found) {
+		first_found = suff; 		/* remember this one */
+	    }
+#else
 	    return suff->suffix;		/* OK -- found */
+#endif
 	}
     }
+#ifdef FNAMES_8_3
+    if (first_found)
+	return first_found->suffix;
+    else
+#endif
     return "";		/* Dunno */
 }
 
@@ -676,11 +726,15 @@ PUBLIC CONST char * HTFileSuffix ARGS1(
 **	This version will return the representation and also set
 **	a variable for the encoding.
 **
+**	Encoding may be a unity encoding (binary, 8bit, etc.) or
+**      a content-coding like gzip, compress.
+**
 **	It will handle for example  x.txt, x.txt,Z, x.Z
 */
-PUBLIC HTFormat HTFileFormat ARGS2(
+PUBLIC HTFormat HTFileFormat ARGS3(
 	CONST char *,	filename,
-	HTAtom **,	pencoding)
+	HTAtom **,	pencoding,
+	CONST char**,	pdesc)
 {
     HTSuffix * suff;
     int n;
@@ -691,7 +745,13 @@ PUBLIC HTFormat HTFileFormat ARGS2(
 #endif /* VMS */
     extern char LYforce_HTML_mode;
 
+    if (pencoding)
+	*pencoding = NULL;
+    if (pdesc)
+	*pdesc = NULL;
     if (LYforce_HTML_mode) {
+	if (pencoding)
+	    *pencoding = WWW_ENC_8BIT;
         return WWW_HTML;
     }
 
@@ -709,8 +769,6 @@ PUBLIC HTFormat HTFileFormat ARGS2(
     if (!HTSuffixes)
         HTFileInit();
 #endif /* !NO_INIT */
-    if (pencoding)
-	*pencoding = NULL;
     lf  = strlen(filename);
     n = HTList_count(HTSuffixes);
     for (i = 0; i < n; i++) {
@@ -721,6 +779,8 @@ PUBLIC HTFormat HTFileFormat ARGS2(
 	    int j;
 	    if (pencoding)
 		*pencoding = suff->encoding;
+	    if (pdesc)
+		*pdesc = suff->desc;
 	    if (suff->rep) {
 #ifdef VMS
 		if (semicolon != NULL)
@@ -732,9 +792,11 @@ PUBLIC HTFormat HTFileFormat ARGS2(
 		int ls2;
 		suff = (HTSuffix *)HTList_objectAt(HTSuffixes, j);
 		ls2 = strlen(suff->suffix);
-		if ((ls <= lf) && 0 == strncasecomp(
+		if ((ls + ls2 <= lf) && 0 == strncasecomp(
 			suff->suffix, filename + lf - ls -ls2, ls2)) {
 		    if (suff->rep) {
+			if (pdesc && !(*pdesc))
+			    *pdesc = suff->desc;
 #ifdef VMS
 			if (semicolon != NULL)
 			    *semicolon = ';';
@@ -1419,7 +1481,8 @@ PUBLIC int HTLoadFile ARGS4(
     HTFormat format;
     char * nodename = NULL;
     char * newname = NULL;	/* Simplified name of file */
-    HTAtom * encoding;		/* @@ not used yet */
+    HTAtom * encoding;		/* @@ not used */
+    HTAtom * myEncoding = NULL;	/* enc of this file, may be gzip etc. */
     int status;
 #ifdef VMS
     struct stat stat_info;
@@ -1466,17 +1529,25 @@ PUBLIC int HTLoadFile ARGS4(
     /*
     **  Determine the format and encoding mapped to any suffix.
     */
-    format = HTFileFormat(filename, &encoding);
-
+    if (anchor->content_type && anchor->content_encoding) {
+	/*
+	 *  If content_type and content_encoding are BOTH already set
+	 *  in the anchor object, we believe it and don't try to
+	 *  derive format and ancoding from the filename. - kw
+	 */
+	format = HTAtom_for(anchor->content_type);
+	myEncoding = HTAtom_for(anchor->content_encoding);
+    } else {
+	format = HTFileFormat(filename, &myEncoding, NULL);
+    
     /*
     **  Check the format for an extended MIME charset value, and
-    **  act on it if present.  Otherwise, assume the ISO-8859-1
-    **  character set for local files.  If it's actually another
-    **  charset (e.g., ISO-8859-2 or KOI8-R) and the terminal is
-    **  using that, Lynx users should make the current character
-    **  set "ISO Latin 1" so that 8-bit characters are passed raw.
+    **  act on it if present.  Otherwise, assume what is indicated
+    **  by the last parameter (fallback will effectively be
+    **  UCLYhndl_for_unspec, by default ISO-8859-1). - kw
     */
-    format = HTCharsetFormat(format, anchor, UCLYhndl_HTFile_for_unspec);
+	format = HTCharsetFormat(format, anchor, UCLYhndl_HTFile_for_unspec);
+    }
 
 #ifdef VMS
     /*
@@ -1576,7 +1647,7 @@ PUBLIC int HTLoadFile ARGS4(
 		    vmsname[len - 3] != ':') {
 		    StrAllocCopy(cp, vmsname);
 		    cp[len - 2] = '\0';
-		    format = HTFileFormat(cp, &encoding);
+		    format = HTFileFormat(cp, &encoding, NULL);
 		    FREE(cp);
 		    format = HTCharsetFormat(format, anchor,
 					     UCLYhndl_HTFile_for_unspec);
@@ -1590,7 +1661,7 @@ PUBLIC int HTLoadFile ARGS4(
 			vmsname[len - 3] == '_') {
 			StrAllocCopy(cp, vmsname);
 			cp[len - 3] = '\0';
-			format = HTFileFormat(cp, &encoding);
+			format = HTFileFormat(cp, &encoding, NULL);
 			FREE(cp);
 			format = HTCharsetFormat(format, anchor,
 						 UCLYhndl_HTFile_for_unspec);
@@ -1692,6 +1763,7 @@ PUBLIC int HTLoadFile ARGS4(
 	    STRUCT_DIRENT * dirbuf;
 	    float best = NO_VALUE_FOUND;	/* So far best is bad */
 	    HTFormat best_rep = NULL;	/* Set when rep found */
+	    HTAtom * best_enc = NULL;
 	    char * best_name = NULL;	/* Best dir entry so far */
 
 	    char *base = strrchr(localname, '/');
@@ -1722,8 +1794,9 @@ PUBLIC int HTLoadFile ARGS4(
 		    continue;	/* if the entry is not being used, skip it */
 #endif
 		if ((int)strlen(dirbuf->d_name) > baselen &&     /* Match? */
-		    !strncmp(dirbuf->d_name, base, baselen)) {	
-		    HTFormat rep = HTFileFormat(dirbuf->d_name, NULL);
+		    !strncmp(dirbuf->d_name, base, baselen)) {
+		    HTAtom * enc;
+		    HTFormat rep = HTFileFormat(dirbuf->d_name, &enc, NULL);
 		    float filevalue = HTFileValue(dirbuf->d_name);
 		    float value = HTStackValue(rep, format_out,
 		    				filevalue,
@@ -1731,12 +1804,13 @@ PUBLIC int HTLoadFile ARGS4(
 		    if (value <= 0.0) {
 			char * cp = NULL;
 			int len = strlen(dirbuf->d_name);
+			enc = NULL;
     			if (len > 2 &&
 			    dirbuf->d_name[len - 1] == 'Z' &&
 			    dirbuf->d_name[len - 2] == '.') {
 			    StrAllocCopy(cp, dirbuf->d_name);
 			    cp[len - 2] = '\0';
-			    format = HTFileFormat(cp, NULL);
+			    format = HTFileFormat(cp, NULL, NULL);
 			    FREE(cp);
 			    value = HTStackValue(format, format_out,
 						 filevalue, 0);
@@ -1756,7 +1830,7 @@ PUBLIC int HTLoadFile ARGS4(
 				   dirbuf->d_name[len - 3] == '.') {
 			    StrAllocCopy(cp, dirbuf->d_name);
 			    cp[len - 3] = '\0';
-			    format = HTFileFormat(cp, NULL);
+			    format = HTFileFormat(cp, NULL, NULL);
 			    FREE(cp);
 			    value = HTStackValue(format, format_out,
 						 filevalue, 0);
@@ -1779,6 +1853,7 @@ PUBLIC int HTLoadFile ARGS4(
 				    HTAtom_name(rep), value);
 			if  (value > best) {
 			    best_rep = rep;
+			    best_enc = enc;
 			    best = value;
 			    StrAllocCopy(best_name, dirbuf->d_name);
 		       }
@@ -1790,6 +1865,7 @@ PUBLIC int HTLoadFile ARGS4(
 	    
 	    if (best_rep) {
 		format = best_rep;
+		myEncoding = best_enc;
 		base[-1] = '/';		/* Restore directory name */
 		base[0] = '\0';
 		StrAllocCat(localname, best_name);
@@ -2188,12 +2264,37 @@ PUBLIC int HTLoadFile ARGS4(
 		/*
 		**  Fake a Content-Encoding for compressed files. - FM
 		*/
-		if ((len = strlen(localname)) > 2) {
+		if (!IsUnityEnc(myEncoding)) {
+		    /*
+		     *  We already know from the call to HTFileFormat above
+		     *  that this is a compressed file, no need to look at
+		     *  the filename again. - kw
+		     */
+#ifdef USE_ZLIB
+		    if (strcmp(format_out->name, "www/download") != 0 &&
+			(!strcmp(HTAtom_name(myEncoding), "gzip") ||
+			 !strcmp(HTAtom_name(myEncoding), "x-gzip"))) {
+			fclose(fp);
+			gzfp = gzopen(localname, "rb");
+
+			if (TRACE)
+			    fprintf(stderr,
+				    "HTLoadFile: gzopen of `%s' gives %p\n",
+				    localname, (void*)gzfp);
+			use_gzread = YES;
+		    } else
+#endif  /* USE_ZLIB */
+		    {
+			StrAllocCopy(anchor->content_type, format->name);
+			StrAllocCopy(anchor->content_encoding, HTAtom_name(myEncoding));
+			format = HTAtom_for("www/compressed");
+		    }
+		} else if ((len = strlen(localname)) > 2) {
 		    if (localname[len - 1] == 'Z' &&
 		        localname[len - 2] == '.') {
 			StrAllocCopy(cp, localname);
 			cp[len - 2] = '\0';
-			format = HTFileFormat(cp, &encoding);
+			format = HTFileFormat(cp, &encoding, NULL);
 			FREE(cp);
 			format = HTCharsetFormat(format, anchor,
 						 UCLYhndl_HTFile_for_unspec);
@@ -2206,7 +2307,7 @@ PUBLIC int HTLoadFile ARGS4(
 			       localname[len - 3] == '.') {
 			StrAllocCopy(cp, localname);
 			cp[len - 3] = '\0';
-			format = HTFileFormat(cp, &encoding);
+			format = HTFileFormat(cp, &encoding, NULL);
 			FREE(cp);
 			format = HTCharsetFormat(format, anchor,
 						 UCLYhndl_HTFile_for_unspec);
diff --git a/WWW/Library/Implementation/HTFile.h b/WWW/Library/Implementation/HTFile.h
index 2d37a537..e6a7926a 100644
--- a/WWW/Library/Implementation/HTFile.h
+++ b/WWW/Library/Implementation/HTFile.h
@@ -90,19 +90,31 @@ extern void HTDirEntry PARAMS((
 **  representation	is MIME-style content-type
 **
 **  encoding		is MIME-style content-transfer-encoding
-**			(8bit, 7bit, etc)
+**			(8bit, 7bit, etc) or HTTP-style content-encoding
+**			(gzip, compress etc.)
 **
 **  quality		an a priori judgement of the quality of such files
 **			(0.0..1.0)
 **
-**  Example:   HTSetSuffix(".ps", "application/postscript", "8bit", 1.0);
+**  HTSetSuffix5 has one more parameter for a short description of the type
+**  which is otherwise derived from the representation:
+**
+**  desc		is a short textual description, or NULL
+**
+**  Examples:   HTSetSuffix(".ps", "application/postscript", "8bit", 1.0);
+**  Examples:   HTSetSuffix(".psz", "application/postscript", "gzip", 1.0);
+**  A MIME type could also indicate a non-trivial encoding on its own
+**  ("application/x-compressed-tar"), but in that case don't use enconding
+**  to also indicate it but use "binary" etc.
 */
-extern void HTSetSuffix PARAMS((
+extern void HTSetSuffix5 PARAMS((
         CONST char *    suffix,
         CONST char *    representation,
         CONST char *    encoding,
+        CONST char *    desc,
         float           quality));
-        
+
+#define HTSetSuffix(suff,rep,enc,q) HTSetSuffix5(suff, rep, enc, NULL, q)
 
 /*
 **  HTFileFormat: Get Representation and Encoding from file name.
@@ -115,7 +127,8 @@ extern void HTSetSuffix PARAMS((
 */
 extern HTFormat HTFileFormat PARAMS((
 	CONST char *		filename,
-	HTAtom **		pEncoding));
+	HTAtom **		pEncoding,
+	CONST char **		pDesc));
 
 /*
 **  HTCharsetFormat: Revise the file format in relation to the Lynx charset.
@@ -155,6 +168,7 @@ extern BOOL HTEditable PARAMS((CONST char * filename));
 **  ON ENTRY,
 **
 **  rep			is the atomized MIME style representation
+**  enc			is an encoding (8bit, binary, gzip, compress,..)
 **
 **  ON EXIT,
 **
@@ -162,7 +176,8 @@ extern BOOL HTEditable PARAMS((CONST char * filename));
 **			been found, else NULL.
 */
 extern CONST char * HTFileSuffix PARAMS((
-                HTAtom* rep));
+                HTAtom* rep,
+                CONST char* enc));
 
 /*
 **  The Protocols
diff --git a/WWW/Library/Implementation/HTFinger.c b/WWW/Library/Implementation/HTFinger.c
index d00b57cb..06188d2b 100644
--- a/WWW/Library/Implementation/HTFinger.c
+++ b/WWW/Library/Implementation/HTFinger.c
@@ -46,7 +46,7 @@
 
 #define PUTC(c) (*targetClass.put_character)(target, c)
 #define PUTS(s) (*targetClass.put_string)(target, s)
-#define START(e) (*targetClass.start_element)(target, e, 0, 0, 0)
+#define START(e) (*targetClass.start_element)(target, e, 0, 0, -1, 0)
 #define END(e) (*targetClass.end_element)(target, e, 0)
 #define FREE_TARGET (*targetClass._free)(target)
 #define NEXT_CHAR HTGetCharacter() 
@@ -91,7 +91,7 @@ PRIVATE void start_anchor ARGS1(CONST char *,  href)
     }
     ((CONST char **)value)[HTML_A_HREF] = href;
     (*targetClass.start_element)(target, HTML_A, present,
-    				 (CONST char **)value, 0);
+    				 (CONST char **)value, -1, 0);
 
 }
 
diff --git a/WWW/Library/Implementation/HTFormat.c b/WWW/Library/Implementation/HTFormat.c
index b5f8584d..3b33fb9a 100644
--- a/WWW/Library/Implementation/HTFormat.c
+++ b/WWW/Library/Implementation/HTFormat.c
@@ -254,6 +254,7 @@ PUBLIC char HTGetCharacter NOARGS
     return FROMASCII(ch);
 }
 
+#ifdef NOT_USED
 /*	Stream the data to an ouput file as binary
 */
 PUBLIC int HTOutputBinary ARGS2( int, 		input,
@@ -271,6 +272,7 @@ PUBLIC int HTOutputBinary ARGS2( int, 		input,
 	    fwrite(input_buffer, sizeof(char), status, output);
     } while (YES);
 }
+#endif /* NOT_USED */
 
 /*  Match maintype to any MIME type starting with maintype,
  *  for example:  image/gif should match image
diff --git a/WWW/Library/Implementation/HTFormat.h b/WWW/Library/Implementation/HTFormat.h
index 812e2fc3..b21a0ec3 100644
--- a/WWW/Library/Implementation/HTFormat.h
+++ b/WWW/Library/Implementation/HTFormat.h
@@ -125,6 +125,19 @@ typedef HTAtom* HTEncoding;
  */
 #define WWW_ENC_COMPRESS        HTAtom_for("compress")
 
+/*
+   Does a string designate a real encoding, or is it just
+   a "dummy" as for example 7bit, 8bit, and binary?
+  */
+#define IsUnityEncStr(senc) \
+        ((senc)==NULL || *(senc)=='\0' || !strcmp(senc,"identity") ||\
+        !strcmp(senc,"8bit") || !strcmp(senc,"binary") || !strcmp(senc,"7bit"))
+
+#define IsUnityEnc(enc) \
+        ((enc)==NULL || (enc)==HTAtom_for("identity") ||\
+        (enc)==WWW_ENC_8BIT || (enc)==WWW_ENC_BINARY || (enc)==WWW_ENC_7BIT)
+
+
 #include "HTAnchor.h"
 
 /*
diff --git a/WWW/Library/Implementation/HTGopher.c b/WWW/Library/Implementation/HTGopher.c
index 7af74580..c94c89d8 100644
--- a/WWW/Library/Implementation/HTGopher.c
+++ b/WWW/Library/Implementation/HTGopher.c
@@ -78,7 +78,7 @@
 
 #define PUTC(c) (*targetClass.put_character)(target, c)
 #define PUTS(s) (*targetClass.put_string)(target, s)
-#define START(e) (*targetClass.start_element)(target, e, 0, 0, 0)
+#define START(e) (*targetClass.start_element)(target, e, 0, 0, -1, 0)
 #define END(e) (*targetClass.end_element)(target, e, 0)
 #define FREE_TARGET (*targetClass._free)(target)
 
@@ -201,7 +201,7 @@ PRIVATE void write_anchor ARGS2(CONST char *,text, CONST char *,addr)
     
     HT_Is_Gopher_URL = TRUE;  /* tell HTML.c that this is a Gopher URL */
     (*targetClass.start_element)(target, HTML_A, present,
-    				 (CONST char **)value, 0);
+    				 (CONST char **)value, -1, 0);
 	    
     PUTS(text);
     END(HTML_A);
@@ -1773,7 +1773,7 @@ PUBLIC int HTLoadGopher ARGS4(
         int len;
 
 	if ((len = strlen(arg)) > 5) {
-	    if (0 == strcmp(&arg[len-6], ":105/2")) {
+	    if (0 == strcmp((CONST char *)&arg[len-6], ":105/2")) {
 	        /* Use CSO gateway. */
 		if (TRACE)
 		    fprintf(stderr, "HTGopher: Passing to CSO/PH gateway.\n");
diff --git a/WWW/Library/Implementation/HTMIME.c b/WWW/Library/Implementation/HTMIME.c
index db6d75b7..0d5c6fdd 100644
--- a/WWW/Library/Implementation/HTMIME.c
+++ b/WWW/Library/Implementation/HTMIME.c
@@ -2292,26 +2292,60 @@ PUBLIC void HTmmdec_quote ARGS2(
     strcpy(t, buf);
 }
 
+/* Generalized HTmmdecode for chartrans - kweide 1997-03-06 */
+
 PUBLIC void HTmmdecode ARGS2(
 	char *,		trg,
 	char *,		str)
 {
     char buf[BUFLEN], mmbuf[BUFLEN];
-    char *s, *t, *u;
+    char *s, *t, *u, *qm2;
     int  base64, quote;
 
     buf[0] = '\0';
 
+/* encoded-words look like  =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=  */
+
     for (s = str, u = buf; *s; ) {
-	if (!strncasecomp(s, "=?ISO-2022-JP?B?", 16)) {
-	    base64 = 1;
-	} else {
-	    base64 = 0;
-	}
-	if (!strncasecomp(s, "=?ISO-2022-JP?Q?", 16)) {
-	    quote = 1;
-	} else {
-	    quote = 0;
+	base64 = quote = 0;
+	if (*s == '=' && s[1] == '?' &&
+	    (s == str || *(s-1) == '(' || WHITE(*(s-1))))
+	{ /* must be beginning of word */
+	    qm2 = strchr(s+2, '?'); /* 2nd question mark */
+	    if (qm2 &&
+		(qm2[1] == 'B' || qm2[1] == 'b' || qm2[1] == 'Q' || qm2[1] == 'q') &&
+		qm2[2] == '?') { /* 3rd question mark */
+		char * qm4 = strchr(qm2 + 3, '?'); /* 4th question mark */
+		if (qm4 && qm4 - s < 74 &&  /* RFC 2047 length restriction */
+		    qm4[1] == '=') {
+		    char *p;
+		    BOOL invalid = NO;
+		    for (p = s+2; p < qm4; p++)
+			if (WHITE(*p)) {
+			    invalid = YES;
+			    break;
+			}
+		    if (!invalid) {
+			int LYhndl;
+			*qm2 = '\0';
+#ifdef EXP_CHARTRANS
+			for (p = s+2; *p; p++)
+			    *p = TOLOWER(*p);
+			invalid = ((LYhndl = UCGetLYhndl_byMIME(s+2)) < 0 ||
+				   !UCCanTranslateFromTo(LYhndl, current_char_set));
+#else
+			invalid = (0!=strncasecomp(s+2, "ISO-2022-JP", 11));
+#endif
+			*qm2 = '?';
+		    }
+		    if (!invalid) {
+			if (qm2[1] == 'B' || qm2[1] == 'b')
+			    base64 = 1;
+			else if (qm2[1] == 'Q' || qm2[1] == 'q')
+			    quote = 1;
+		    }
+		}
+	    }
 	}
 	if (base64 || quote) {
 	    if (HTmmcont) {
@@ -2320,7 +2354,7 @@ PUBLIC void HTmmdecode ARGS2(
 			u--;
 		}
 	    }
-	    for (s += 16, t = mmbuf; *s; ) {
+	    for (s = qm2 + 3, t = mmbuf; *s; ) {
 		if (s[0] == '?' && s[1] == '=') { 
 		    break;
 		} else {
@@ -2461,7 +2495,7 @@ PUBLIC int HTrjis ARGS2(
 */
 /*
  * RJIS ( Recover JIS code from broken file )
- * $Header: /usr/build/VCS/lynx/WWW/Library/Implementation/RCS/HTMIME.c,v 1.10 1997/09/19 01:14:00 klaus Exp $
+ * @Header: rjis.c,v 0.2 92/09/04 takahasi Exp @
  * Copyright (C) 1992 1994
  * Hironobu Takahashi (takahasi@tiny.or.jp)
  *
diff --git a/WWW/Library/Implementation/HTML.h b/WWW/Library/Implementation/HTML.h
index 7480b678..f1653cf1 100644
--- a/WWW/Library/Implementation/HTML.h
+++ b/WWW/Library/Implementation/HTML.h
@@ -76,6 +76,8 @@ struct _HTStructured {
     BOOL			select_disabled;
     HTChunk			textarea;	/* Grow by 128 */
     char *			textarea_name;
+    int				textarea_name_cs;
+    char *			textarea_accept_cs;
     char *			textarea_cols;
     int 			textarea_rows;
     int				textarea_disabled;
@@ -148,6 +150,7 @@ struct _HTStructured {
     LYUCcharset	* UCI;	/* pointer to node_anchor's UCInfo */
     int	UCLYhndl;		/* tells us what charset we are fed */
     UCTransParams T;
+    int 		tag_charset; /* charset for attribute values etc. */
 #endif
 };
 
diff --git a/WWW/Library/Implementation/HTMLDTD.c b/WWW/Library/Implementation/HTMLDTD.c
index 7eb1d86b..cef1eaef 100644
--- a/WWW/Library/Implementation/HTMLDTD.c
+++ b/WWW/Library/Implementation/HTMLDTD.c
@@ -135,9 +135,8 @@ static CONST char* entities[] = {
 /* 	Extra Entity Names
 **	------------------
 **
-**	This table contains Unicodes in addition to the Names.
+**	This table contains Unicodes in addition to the Names. - kw
 **
-**      Just an idea how it could be done. -kw
 *
 *	I think in the future the whole entities[] thing above could migrate
 *	to this kind of structure.  The structured streams to which
@@ -160,137 +159,137 @@ static CONST char* entities[] = {
                      - lrm, rlm, zwnj and zwj 
 */
 static CONST UC_entity_info extra_entities[] = {
-  {"Aacute",  0x00c1},  /* A with acute */
-  {"Abreve",  0x0102},  /* A with breve */
-  {"Acirc",  0x00c2},  /* A with circumflex */
-  {"Aogon",  0x0104},  /* A with ogonek */
-  {"Auml",  0x00c4},  /* A with diaeresis */
-  {"Cacute",  0x0106},  /* C with acute */
-  {"Ccaron",  0x010c},  /* C with caron */
-  {"Ccedil",  0x00c7},  /* C with cedilla */
-  {"Dcaron",  0x010e},  /* D with caron */
-  {"Dstrok",  0x0110},  /* D with stroke */
-  {"Eacute",  0x00c9},  /* E with acute */
-  {"Ecaron",  0x011a},  /* E with caron */
-  {"Eogon",  0x0118},  /* E with ogonek */
-  {"Euml",  0x00cb},  /* E with diaeresis */
-  {"Iacute",  0x00cd},  /* I with acute */
-  {"Icirc",  0x00ce},  /* I with circumflex */
-  {"Lacute",  0x0139},  /* L with acute */
-  {"Lcaron",  0x013d},  /* L with caron */
-  {"Lstrok",  0x0141},  /* L with stroke */
-  {"Nacute",  0x0143},  /* N with acute */
-  {"Ncaron",  0x0147},  /* N with caron */
-  {"Oacute",  0x00d3},  /* O with acute */
-  {"Ocirc",  0x00d4},  /* O with circumflex */
-  {"Odblac",  0x0150},  /* O with double acute */
-  {"Ouml",  0x00d6},  /* O with diaeresis */
-  {"Racute",  0x0154},  /* R with acute */
-  {"Rcaron",  0x0158},  /* R with caron */
-  {"Sacute",  0x015a},  /* S with acute */
-  {"Scaron",  0x0160},  /* S with caron */
-  {"Scedil",  0x015e},  /* S with cedilla */
-  {"Tcaron",  0x0164},  /* T with caron */
-  {"Tcedil",  0x0162},  /* T with cedilla */
-  {"Uacute",  0x00da},  /* U with acute */
-  {"Udblac",  0x0170},  /* U with double acute */
-  {"Uring",  0x016e},  /* U with ring above */
-  {"Uuml",  0x00dc},  /* U with diaeresis */
-  {"Yacute",  0x00dd},  /* Y with acute */
-  {"Zacute",  0x0179},  /* Z with acute */
-  {"Zcaron",  0x017d},  /* Z with caron */
-  {"Zdot",  0x017b},  /* Z with dot above */
-  {"aacute",  0x00e1},  /* a with acute */
-  {"abreve",  0x0103},  /* a with breve */
-  {"acirc",  0x00e2},  /* a with circumflex */
-  {"acute",  0x00b4},  /* acuteaccent */
-  {"amp",  0x0026},  /* ampersand */
-  {"aogon",  0x0105},  /* a with ogonek */
-  {"apos",  0x0027},  /* apostrophe */
-  {"ast",  0x002a},  /* asterisk */
-  {"auml",  0x00e4},  /* a with diaeresis */
-  {"breve",  0x02d8},  /* breve */
-  {"bsol",  0x005c},  /* reversesolidus */
-  {"cacute",  0x0107},  /* c with acute */
-  {"caron",  0x02c7},  /* caron */
-  {"ccaron",  0x010d},  /* c with caron */
-  {"ccedil",  0x00e7},  /* c with cedilla */
-  {"cedil",  0x00b8},  /* cedilla */
-  {"circ",  0x005e},  /* circumflexaccent */
-  {"colon",  0x003a},  /* colon */
-  {"comma",  0x002c},  /* comma */
-  {"commat",  0x0040},  /* commercialat */
-  {"curren",  0x00a4},  /* currencysign */
-  {"dblac",  0x02dd},  /* doubleacuteaccent */
-  {"dcaron",  0x010f},  /* d with caron */
-  {"deg",  0x00b0},  /* degreesign */
-  {"divide",  0x00f7},  /* divisionsign */
-  {"dollar",  0x0024},  /* dollarsign */
-  {"dot",  0x02d9},  /* dotabove */
-  {"dstrok",  0x0111},  /* d with stroke */
-  {"eacute",  0x00e9},  /* e with acute */
-  {"ecaron",  0x011b},  /* e with caron */
-  {"eogon",  0x0119},  /* e with ogonek */
-  {"equals",  0x003d},  /* equalssign */
-  {"euml",  0x00eb},  /* e with diaeresis */
-  {"excl",  0x0021},  /* exclamationmark */
-  {"grave",  0x0060},  /* graveaccent */
-  {"gt",  0x003e},  /* greater-thansign */
-  {"hyphen",  0x002d},  /* hyphen-minus */
-  {"iacute",  0x00ed},  /* i with acute */
-  {"icirc",  0x00ee},  /* i with circumflex */
-  {"lacute",  0x013a},  /* l with acute */
-  {"lcaron",  0x013e},  /* l with caron */
-  {"lcub",  0x007b},  /* leftcurlybracket */
-  {"lowbar",  0x005f},  /* lowline */
-  {"lpar",  0x0028},  /* leftparenthesis */
-  {"lrm",	8206},	/* left-to-right mark */ 
-  {"lsqb",  0x005b},  /* leftsquarebracket */
-  {"lstrok",  0x0142},  /* l with stroke */
-  {"lt",  0x003c},  /* less-thansign */
-  {"nacute",  0x0144},  /* n with acute */
-  {"nbsp",  0x00a0},  /* no-breakspace */
-  {"ncaron",  0x0148},  /* n with caron */
-  {"num",  0x0023},  /* numbersign */
-  {"oacute",  0x00f3},  /* o with acute */
-  {"ocirc",  0x00f4},  /* o with circumflex */
-  {"odblac",  0x0151},  /* o with double acute */
-  {"ogon",  0x02db},  /* ogonek */
-  {"ouml",  0x00f6},  /* o with diaeresis */
-  {"percnt",  0x0025},  /* percentsign */
-  {"period",  0x002e},  /* fullstop */
-  {"plus",  0x002b},  /* plussign */
-  {"quest",  0x003f},  /* questionmark */
-  {"quot",  0x0022},  /* quotationmark */
-  {"racute",  0x0155},  /* r with acute */
-  {"rcaron",  0x0159},  /* r with caron */
-  {"rcub",  0x007d},  /* rightcurlybracket */
-  {"rlm",	8207},	/* right-to-left mark */ 
-  {"rpar",  0x0029},  /* rightparenthesis */
-  {"rsqb",  0x005d},  /* rightsquarebracket */
-  {"sacute",  0x015b},  /* s with acute */
-  {"scaron",  0x0161},  /* s with caron */
-  {"scedil",  0x015f},  /* s with cedilla */
-  {"sect",  0x00a7},  /* sectionsign */
-  {"semi",  0x003b},  /* semicolon */
-  {"shy",  0x00ad},  /* softhyphen */
-  {"sol",  0x002f},  /* solidus */
-  {"tcaron",  0x0165},  /* t with caron */
-  {"tcedil",  0x0163},  /* t with cedilla */
-  {"tilde",  0x007e},  /* tilde */
-  {"times",  0x00d7},  /* multiplicationsign */
-  {"uacute",  0x00fa},  /* u with acute */
-  {"udblac",  0x0171},  /* u with double acute */
-  {"uml",  0x00a8},  /* diaeresis */
-  {"uring",  0x016f},  /* u with ring above */
-  {"uuml",  0x00fc},  /* u with diaeresis */
-  {"verbar",  0x007c},  /* verticalline */
-  {"yacute",  0x00fd},  /* y with acute */
-  {"zacute",  0x017a},  /* z with acute */
-  {"zcaron",  0x017e},  /* z with caron */
-  {"zdot",  0x017c},  /* z with dot above */
-  {"zwj",	8205},	/* zero width joiner */ 
-  {"zwnj",	8204},	/* zero width non-joiner */ 
+  {"Aacute",	0x00c1},  /* A with acute */
+  {"Abreve",	0x0102},  /* A with breve */
+  {"Acirc",	0x00c2},  /* A with circumflex */
+  {"Aogon",	0x0104},  /* A with ogonek */
+  {"Auml",	0x00c4},  /* A with diaeresis */
+  {"Cacute",	0x0106},  /* C with acute */
+  {"Ccaron",	0x010c},  /* C with caron */
+  {"Ccedil",	0x00c7},  /* C with cedilla */
+  {"Dcaron",	0x010e},  /* D with caron */
+  {"Dstrok",	0x0110},  /* D with stroke */
+  {"Eacute",	0x00c9},  /* E with acute */
+  {"Ecaron",	0x011a},  /* E with caron */
+  {"Eogon",	0x0118},  /* E with ogonek */
+  {"Euml",	0x00cb},  /* E with diaeresis */
+  {"Iacute",	0x00cd},  /* I with acute */
+  {"Icirc",	0x00ce},  /* I with circumflex */
+  {"Lacute",	0x0139},  /* L with acute */
+  {"Lcaron",	0x013d},  /* L with caron */
+  {"Lstrok",	0x0141},  /* L with stroke */
+  {"Nacute",	0x0143},  /* N with acute */
+  {"Ncaron",	0x0147},  /* N with caron */
+  {"Oacute",	0x00d3},  /* O with acute */
+  {"Ocirc",	0x00d4},  /* O with circumflex */
+  {"Odblac",	0x0150},  /* O with double acute */
+  {"Ouml",	0x00d6},  /* O with diaeresis */
+  {"Racute",	0x0154},  /* R with acute */
+  {"Rcaron",	0x0158},  /* R with caron */
+  {"Sacute",	0x015a},  /* S with acute */
+  {"Scaron",	0x0160},  /* S with caron */
+  {"Scedil",	0x015e},  /* S with cedilla */
+  {"Tcaron",	0x0164},  /* T with caron */
+  {"Tcedil",	0x0162},  /* T with cedilla */
+  {"Uacute",	0x00da},  /* U with acute */
+  {"Udblac",	0x0170},  /* U with double acute */
+  {"Uring",	0x016e},  /* U with ring above */
+  {"Uuml",	0x00dc},  /* U with diaeresis */
+  {"Yacute",	0x00dd},  /* Y with acute */
+  {"Zacute",	0x0179},  /* Z with acute */
+  {"Zcaron",	0x017d},  /* Z with caron */
+  {"Zdot",	0x017b},  /* Z with dot above */
+  {"aacute",	0x00e1},  /* a with acute */
+  {"abreve",	0x0103},  /* a with breve */
+  {"acirc",	0x00e2},  /* a with circumflex */
+  {"acute",	0x00b4},  /* acuteaccent */
+  {"amp",	0x0026},  /* ampersand */
+  {"aogon",	0x0105},  /* a with ogonek */
+  {"apos",	0x0027},  /* apostrophe */
+  {"ast",	0x002a},  /* asterisk */
+  {"auml",	0x00e4},  /* a with diaeresis */
+  {"breve",	0x02d8},  /* breve */
+  {"bsol",	0x005c},  /* reversesolidus */
+  {"cacute",	0x0107},  /* c with acute */
+  {"caron",	0x02c7},  /* caron */
+  {"ccaron",	0x010d},  /* c with caron */
+  {"ccedil",	0x00e7},  /* c with cedilla */
+  {"cedil",	0x00b8},  /* cedilla */
+  {"circ",	0x005e},  /* circumflexaccent */
+  {"colon",	0x003a},  /* colon */
+  {"comma",	0x002c},  /* comma */
+  {"commat",	0x0040},  /* commercialat */
+  {"curren",	0x00a4},  /* currencysign */
+  {"dblac",	0x02dd},  /* doubleacuteaccent */
+  {"dcaron",	0x010f},  /* d with caron */
+  {"deg",	0x00b0},  /* degreesign */
+  {"divide",	0x00f7},  /* divisionsign */
+  {"dollar",	0x0024},  /* dollarsign */
+  {"dot",	0x02d9},  /* dotabove */
+  {"dstrok",	0x0111},  /* d with stroke */
+  {"eacute",	0x00e9},  /* e with acute */
+  {"ecaron",	0x011b},  /* e with caron */
+  {"eogon",	0x0119},  /* e with ogonek */
+  {"equals",	0x003d},  /* equalssign */
+  {"euml",	0x00eb},  /* e with diaeresis */
+  {"excl",	0x0021},  /* exclamationmark */
+  {"grave",	0x0060},  /* graveaccent */
+  {"gt",	0x003e},  /* greater-thansign */
+  {"hyphen",	0x002d},  /* hyphen-minus */
+  {"iacute",	0x00ed},  /* i with acute */
+  {"icirc",	0x00ee},  /* i with circumflex */
+  {"lacute",	0x013a},  /* l with acute */
+  {"lcaron",	0x013e},  /* l with caron */
+  {"lcub",	0x007b},  /* leftcurlybracket */
+  {"lowbar",	0x005f},  /* lowline */
+  {"lpar",	0x0028},  /* leftparenthesis */
+  {"lrm",	 8206},	/* left-to-right mark */ 
+  {"lsqb",	0x005b},  /* leftsquarebracket */
+  {"lstrok",	0x0142},  /* l with stroke */
+  {"lt",	0x003c},  /* less-thansign */
+  {"nacute",	0x0144},  /* n with acute */
+  {"nbsp",	0x00a0},  /* no-breakspace */
+  {"ncaron",	0x0148},  /* n with caron */
+  {"num",	0x0023},  /* numbersign */
+  {"oacute",	0x00f3},  /* o with acute */
+  {"ocirc",	0x00f4},  /* o with circumflex */
+  {"odblac",	0x0151},  /* o with double acute */
+  {"ogon",	0x02db},  /* ogonek */
+  {"ouml",	0x00f6},  /* o with diaeresis */
+  {"percnt",	0x0025},  /* percentsign */
+  {"period",	0x002e},  /* fullstop */
+  {"plus",	0x002b},  /* plussign */
+  {"quest",	0x003f},  /* questionmark */
+  {"quot",	0x0022},  /* quotationmark */
+  {"racute",	0x0155},  /* r with acute */
+  {"rcaron",	0x0159},  /* r with caron */
+  {"rcub",	0x007d},  /* rightcurlybracket */
+  {"rlm",	 8207},	/* right-to-left mark */ 
+  {"rpar",	0x0029},  /* rightparenthesis */
+  {"rsqb",	0x005d},  /* rightsquarebracket */
+  {"sacute",	0x015b},  /* s with acute */
+  {"scaron",	0x0161},  /* s with caron */
+  {"scedil",	0x015f},  /* s with cedilla */
+  {"sect",	0x00a7},  /* sectionsign */
+  {"semi",	0x003b},  /* semicolon */
+  {"shy",	0x00ad},  /* softhyphen */
+  {"sol",	0x002f},  /* solidus */
+  {"tcaron",	0x0165},  /* t with caron */
+  {"tcedil",	0x0163},  /* t with cedilla */
+  {"tilde",	0x007e},  /* tilde */
+  {"times",	0x00d7},  /* multiplicationsign */
+  {"uacute",	0x00fa},  /* u with acute */
+  {"udblac",	0x0171},  /* u with double acute */
+  {"uml",	0x00a8},  /* diaeresis */
+  {"uring",	0x016f},  /* u with ring above */
+  {"uuml",	0x00fc},  /* u with diaeresis */
+  {"verbar",	0x007c},  /* verticalline */
+  {"yacute",	0x00fd},  /* y with acute */
+  {"zacute",	0x017a},  /* z with acute */
+  {"zcaron",	0x017e},  /* z with caron */
+  {"zdot",	0x017c},  /* z with dot above */
+  {"zwj",	 8205},	/* zero width joiner */ 
+  {"zwnj",	 8204},	/* zero width non-joiner */ 
 
 };
 #endif /* EXP_CHARTRANS */
@@ -608,6 +607,7 @@ static attr font_attr[] = {			/* FONT attributes */
 };
 
 static attr form_attr[] = {			/* FORM attributes */
+	{ "ACCEPT-CHARSET"},	/* HTML 4.0 draft - kw */
 	{ "ACTION"},
 	{ "CLASS" },
 	{ "CLEAR" },
@@ -738,6 +738,7 @@ static attr img_attr[] = {			/* IMG attributes */
 
 static attr input_attr[] = {			/* INPUT attributes */
 	{ "ACCEPT" },
+	{ "ACCEPT-CHARSET" },	/* RFC 2070 HTML i18n - kw */
 	{ "ALIGN" },
 	{ "ALT" },
 	{ "CHECKED" },
@@ -822,6 +823,7 @@ static attr legend_attr[] = {			/* LEGEND attributes */
 };
 
 static attr link_attr[] = {			/* LINK attributes */
+	{ "CHARSET" },		/* RFC 2070 HTML i18n -- hint for UA -- - kw */
 	{ "CLASS" },
 	{ "HREF" },
 	{ "ID" },
@@ -1126,6 +1128,7 @@ static attr td_attr[] = {			/* TD and TH attributes */
 };
 
 static attr textarea_attr[] = {			/* TEXTAREA attributes */
+	{ "ACCEPT-CHARSET" },	/* RFC 2070 HTML i18n - kw */
 	{ "ALIGN" },
 	{ "CLASS" },
 	{ "CLEAR" },
@@ -1471,6 +1474,8 @@ static attr ulist_attr[] = {			/* UL attributes */
  /* { "XMP"	, gen_attr,	HTML_GEN_ATTRIBUTES,	SGML_LITTERAL }, */
 #define T_XMP		0x0800, 0x00000,0x00000,0x367E0,0x36FFF,0x0875F,0x00001
 
+#define T__UNREC_	0x0000, 0x00000,0x00000,0x00000,0x00000,0x00000,0x00000
+
 /*	Elements
 **	--------
 **
@@ -1762,6 +1767,9 @@ PUBLIC void HTSwitchDTD ARGS1(
 	memcpy(tags, tags_old, HTML_ELEMENTS * sizeof(HTTag));
 }
 
+PUBLIC CONST HTTag HTTag_unrecognized =
+    { NULL,    NULL,		0,	SGML_EMPTY,T__UNREC_};
+
 /*
 **	Utility Routine:  Useful for people building HTML objects.
 */
@@ -1799,7 +1807,7 @@ PUBLIC void HTStartAnchor ARGS3(
         value[HTML_A_HREF] = (CONST char *)href;
     }
 
-    (*obj->isa->start_element)(obj, HTML_A, present, value, 0);
+    (*obj->isa->start_element)(obj, HTML_A, present, value, -1, 0);
 }
 
 PUBLIC void HTStartIsIndex ARGS3(
@@ -1823,5 +1831,5 @@ PUBLIC void HTStartIsIndex ARGS3(
         value[HTML_ISINDEX_HREF] = (CONST char *)href;
     }
 
-    (*obj->isa->start_element)(obj, HTML_ISINDEX , present, value, 0);
+    (*obj->isa->start_element)(obj, HTML_ISINDEX , present, value, -1, 0);
 }
diff --git a/WWW/Library/Implementation/HTMLDTD.h b/WWW/Library/Implementation/HTMLDTD.h
index 98816060..1e5646fd 100644
--- a/WWW/Library/Implementation/HTMLDTD.h
+++ b/WWW/Library/Implementation/HTMLDTD.h
@@ -178,7 +178,7 @@ Attribute numbers
    
  */
 #define HTML_A_ACCESSKEY        0
-#define HTML_A_CHARSET          1 /* i18n draft, added tentatively - kw */
+#define HTML_A_CHARSET          1 /* RFC 2070 HTML i18n - kw */
 #define HTML_A_CLASS            2
 #define HTML_A_CLEAR            3
 #define HTML_A_COORDS           4
@@ -455,21 +455,22 @@ Attribute numbers
 #define HTML_FONT_STYLE         9
 #define HTML_FONT_ATTRIBUTES   10
 
-#define HTML_FORM_ACTION        0
-#define HTML_FORM_CLASS         1
-#define HTML_FORM_CLEAR         2
-#define HTML_FORM_DIR           3
-#define HTML_FORM_ENCTYPE       4
-#define HTML_FORM_ID            5
-#define HTML_FORM_LANG          6
-#define HTML_FORM_METHOD        7
-#define HTML_FORM_ONSUBMIT      8
-#define HTML_FORM_SCRIPT        9
-#define HTML_FORM_STYLE        10
-#define HTML_FORM_SUBJECT      11
-#define	HTML_FORM_TARGET       12
-#define HTML_FORM_TITLE        13
-#define HTML_FORM_ATTRIBUTES   14
+#define HTML_FORM_ACCEPT_CHARSET  0 /* HTML 4.0 draft - kw */
+#define HTML_FORM_ACTION        1
+#define HTML_FORM_CLASS         2
+#define HTML_FORM_CLEAR         3
+#define HTML_FORM_DIR           4
+#define HTML_FORM_ENCTYPE       5
+#define HTML_FORM_ID            6
+#define HTML_FORM_LANG          7
+#define HTML_FORM_METHOD        8
+#define HTML_FORM_ONSUBMIT      9
+#define HTML_FORM_SCRIPT       10
+#define HTML_FORM_STYLE        11
+#define HTML_FORM_SUBJECT      12
+#define	HTML_FORM_TARGET       13
+#define HTML_FORM_TITLE        14
+#define HTML_FORM_ATTRIBUTES   15
 
 #define HTML_FRAME_ID           0
 #define HTML_FRAME_MARGINHEIGHT 1
@@ -557,37 +558,38 @@ Attribute numbers
 #define HTML_IMG_ATTRIBUTES    18
 
 #define HTML_INPUT_ACCEPT       0
-#define HTML_INPUT_ALIGN        1
-#define HTML_INPUT_ALT          2
-#define HTML_INPUT_CHECKED      3
-#define HTML_INPUT_CLASS        4
-#define HTML_INPUT_CLEAR        5
-#define HTML_INPUT_DIR          6
-#define HTML_INPUT_DISABLED     7
-#define HTML_INPUT_ERROR        8
-#define HTML_INPUT_HEIGHT       9
-#define HTML_INPUT_ID          10
-#define HTML_INPUT_LANG        11
-#define HTML_INPUT_MAX         12
-#define HTML_INPUT_MAXLENGTH   13
-#define HTML_INPUT_MD          14
-#define HTML_INPUT_MIN         15
-#define HTML_INPUT_NAME        16
-#define HTML_INPUT_NOTAB       17
-#define HTML_INPUT_ONBLUR      18
-#define HTML_INPUT_ONCHANGE    19
-#define HTML_INPUT_ONCLICK     20
-#define HTML_INPUT_ONFOCUS     21
-#define HTML_INPUT_ONSELECT    22
-#define HTML_INPUT_SIZE        23
-#define HTML_INPUT_SRC         24
-#define HTML_INPUT_STYLE       25
-#define HTML_INPUT_TABINDEX    26
-#define HTML_INPUT_TITLE       27
-#define HTML_INPUT_TYPE        28
-#define HTML_INPUT_VALUE       29
-#define HTML_INPUT_WIDTH       30
-#define HTML_INPUT_ATTRIBUTES  31
+#define HTML_INPUT_ACCEPT_CHARSET  1 /* RFC 2070 HTML i18n - kw */
+#define HTML_INPUT_ALIGN        2
+#define HTML_INPUT_ALT          3
+#define HTML_INPUT_CHECKED      4
+#define HTML_INPUT_CLASS        5
+#define HTML_INPUT_CLEAR        6
+#define HTML_INPUT_DIR          7
+#define HTML_INPUT_DISABLED     8
+#define HTML_INPUT_ERROR        9
+#define HTML_INPUT_HEIGHT      10
+#define HTML_INPUT_ID          11
+#define HTML_INPUT_LANG        12
+#define HTML_INPUT_MAX         13
+#define HTML_INPUT_MAXLENGTH   14
+#define HTML_INPUT_MD          15
+#define HTML_INPUT_MIN         16
+#define HTML_INPUT_NAME        17
+#define HTML_INPUT_NOTAB       18
+#define HTML_INPUT_ONBLUR      19
+#define HTML_INPUT_ONCHANGE    20
+#define HTML_INPUT_ONCLICK     21
+#define HTML_INPUT_ONFOCUS     22
+#define HTML_INPUT_ONSELECT    23
+#define HTML_INPUT_SIZE        24
+#define HTML_INPUT_SRC         25
+#define HTML_INPUT_STYLE       26
+#define HTML_INPUT_TABINDEX    27
+#define HTML_INPUT_TITLE       28
+#define HTML_INPUT_TYPE        29
+#define HTML_INPUT_VALUE       30
+#define HTML_INPUT_WIDTH       31
+#define HTML_INPUT_ATTRIBUTES  32
 
 #define HTML_ISINDEX_ACTION     0  /* Treat as synonym for HREF. - FM */
 #define HTML_ISINDEX_DIR        1
@@ -646,17 +648,18 @@ Attribute numbers
 #define HTML_LI_VALUE          12
 #define HTML_LI_ATTRIBUTES     13
 
-#define HTML_LINK_CLASS         0
-#define HTML_LINK_HREF          1
-#define HTML_LINK_ID            2
-#define HTML_LINK_MEDIA         3
-#define HTML_LINK_REL           4
-#define HTML_LINK_REV           5
-#define HTML_LINK_STYLE         6
-#define	HTML_LINK_TARGET        7
-#define HTML_LINK_TITLE         8
-#define HTML_LINK_TYPE          9
-#define HTML_LINK_ATTRIBUTES   10
+#define HTML_LINK_CHARSET       0 /* RFC 2070 HTML i18n - kw */
+#define HTML_LINK_CLASS         1
+#define HTML_LINK_HREF          2
+#define HTML_LINK_ID            3
+#define HTML_LINK_MEDIA         4
+#define HTML_LINK_REL           5
+#define HTML_LINK_REV           6
+#define HTML_LINK_STYLE         7
+#define	HTML_LINK_TARGET        8
+#define HTML_LINK_TITLE         9
+#define HTML_LINK_TYPE         10
+#define HTML_LINK_ATTRIBUTES   11
 
 #define HTML_MAP_CLASS          0
 #define HTML_MAP_CLEAR          1
@@ -897,26 +900,27 @@ Attribute numbers
 #define HTML_TD_VALIGN         16
 #define HTML_TD_ATTRIBUTES     17
 
-#define HTML_TEXTAREA_ALIGN     0
-#define HTML_TEXTAREA_CLASS     1
-#define HTML_TEXTAREA_CLEAR     2
-#define HTML_TEXTAREA_COLS      3
-#define HTML_TEXTAREA_DIR       4
-#define HTML_TEXTAREA_DISABLED  5
-#define HTML_TEXTAREA_ERROR     6
-#define HTML_TEXTAREA_ID        7
-#define HTML_TEXTAREA_LANG      8
-#define HTML_TEXTAREA_NAME      9
-#define HTML_TEXTAREA_NOTAB    10
-#define HTML_TEXTAREA_ONBLUR   11
-#define HTML_TEXTAREA_ONCHANGE 12
-#define HTML_TEXTAREA_ONFOCUS  13
-#define HTML_TEXTAREA_ONSELECT 14
-#define HTML_TEXTAREA_ROWS     15
-#define HTML_TEXTAREA_STYLE    16
-#define HTML_TEXTAREA_TABINDEX 17
-#define HTML_TEXTAREA_TITLE    18
-#define HTML_TEXTAREA_ATTRIBUTES 19
+#define HTML_TEXTAREA_ACCEPT_CHARSET  0 /* RFC 2070 HTML i18n - kw */
+#define HTML_TEXTAREA_ALIGN     1
+#define HTML_TEXTAREA_CLASS     2
+#define HTML_TEXTAREA_CLEAR     3
+#define HTML_TEXTAREA_COLS      4
+#define HTML_TEXTAREA_DIR       5
+#define HTML_TEXTAREA_DISABLED  6
+#define HTML_TEXTAREA_ERROR     7
+#define HTML_TEXTAREA_ID        8
+#define HTML_TEXTAREA_LANG      9
+#define HTML_TEXTAREA_NAME     10
+#define HTML_TEXTAREA_NOTAB    11
+#define HTML_TEXTAREA_ONBLUR   12
+#define HTML_TEXTAREA_ONCHANGE 13
+#define HTML_TEXTAREA_ONFOCUS  14
+#define HTML_TEXTAREA_ONSELECT 15
+#define HTML_TEXTAREA_ROWS     16
+#define HTML_TEXTAREA_STYLE    17
+#define HTML_TEXTAREA_TABINDEX 18
+#define HTML_TEXTAREA_TITLE    19
+#define HTML_TEXTAREA_ATTRIBUTES 20
 
 #define HTML_TR_ALIGN           0
 #define HTML_TR_CHAR            1
@@ -954,6 +958,8 @@ extern CONST SGML_dtd HTML_dtd;
 extern void HTSwitchDTD PARAMS((
     BOOL new));
 
+extern CONST HTTag HTTag_unrecognized;
+
 /*
 
 Start anchor element
diff --git a/WWW/Library/Implementation/HTMLGen.c b/WWW/Library/Implementation/HTMLGen.c
index e3ca3e9d..ef5c2faa 100644
--- a/WWW/Library/Implementation/HTMLGen.c
+++ b/WWW/Library/Implementation/HTMLGen.c
@@ -66,7 +66,8 @@ struct _HTStructured {
 **	------------
 */
 
-PRIVATE void flush_breaks (HTStructured * me)
+PRIVATE void flush_breaks ARGS1(
+	HTStructured *,		me)
 {
     int i;
     for (i=0; i<= MAX_CLEANNESS; i++) {
@@ -91,7 +92,10 @@ PRIVATE void HTMLGen_flush ARGS1(
 **	We keep track of all the breaks for when we chop the line
 */
 
-PRIVATE void allow_break (HTStructured * me, int new_cleanness, BOOL dlbc)
+PRIVATE void allow_break ARGS3(
+	HTStructured *,	me,
+	int,		new_cleanness,
+	BOOL,		dlbc)
 {
     if (dlbc && me->write_pointer == me->buffer) dlbc = NO;
     me->line_break[new_cleanness] = 
@@ -254,11 +258,12 @@ PRIVATE void HTMLGen_write ARGS3(
 **	Within the opening tag, there may be spaces
 **	and the line may be broken at these spaces.
 */
-PRIVATE void HTMLGen_start_element ARGS5(
+PRIVATE void HTMLGen_start_element ARGS6(
 	HTStructured *, 	me,
 	int,			element_number,
 	CONST BOOL*,	 	present,
 	CONST char **,		value,
+	int,			charset,
 	char **,		insert)
 {
     int i;
@@ -433,6 +438,7 @@ PRIVATE CONST HTStructuredClass HTMLGeneration = /* As opposed to print etc */
 */
 extern int LYcols;			/* LYCurses.h, set in LYMain.c	*/
 extern BOOL dump_output_immediately;	/* TRUE if no interactive user 	*/
+extern int dump_output_width;	        /* -width instead of 80		*/
 extern BOOLEAN LYPreparsedSource;	/* Show source as preparsed?	*/
 
 PUBLIC HTStructured * HTMLGenerator ARGS1(
@@ -460,17 +466,19 @@ PUBLIC HTStructured * HTMLGenerator ARGS1(
      */
     if (!LYPreparsedSource) {
 	me->buffer_maxchars = 80; /* work as before - kw */
+    } else if (dump_output_width > 1) {
+	me->buffer_maxchars = dump_output_width; /* try to honor -width - kw */
     } else if (dump_output_immediately) {
-	me->buffer_maxchars = 80; /* work as before - kw */
+	me->buffer_maxchars = 80; /* try to honor -width - kw */
     } else {
 	me->buffer_maxchars = LYcols - 2;
 	if (me->buffer_maxchars < 38) /* too narrow, let GridText deal */
 	    me->buffer_maxchars = 40;
-	if (me->buffer_maxchars > 900) /* likely not true - kw */
-	    me->buffer_maxchars = 78;
-	if (me->buffer_maxchars > BUFFER_SIZE) /* must not be larger! */
-	    me->buffer_maxchars = BUFFER_SIZE - 2;
     }
+    if (me->buffer_maxchars > 900) /* likely not true - kw */
+	me->buffer_maxchars = 78;
+    if (me->buffer_maxchars > BUFFER_SIZE) /* must not be larger! */
+	me->buffer_maxchars = BUFFER_SIZE - 2;
 
     /*
      *	If dump_output_immediately is set, there likely isn't anything
@@ -513,10 +521,10 @@ PUBLIC HTStream* HTPlainToHTML ARGS3(
 	HTParentAnchor *,	anchor,	
 	HTStream *,		sink)
 {
-    HTStructured* me = (HTStructured*)malloc(sizeof(*me));
+    HTStructured *me = (HTStructured *)malloc(sizeof(*me));
     if (me == NULL)
         outofmem(__FILE__, "PlainToHTML");
-    me->isa = (CONST HTStructuredClass*) &PlainToHTMLConversion;       
+    me->isa = (CONST HTStructuredClass *)&PlainToHTMLConversion;       
 
     /*
      *  Copy pointers to routines for speed.
@@ -528,8 +536,10 @@ PUBLIC HTStream* HTPlainToHTML ARGS3(
     me->cleanness = 	0;
     me->overflowed = NO;
     me->delete_line_break_char[0] = NO;
-    me->buffer_maxchars = 80;
-	
+    /* try to honor -width - kw */
+    me->buffer_maxchars = (dump_output_width > 1 ?
+			   dump_output_width : 80);
+
     HTMLGen_put_string(me, "<HTML>\n<BODY>\n<PRE>\n");
     me->preformatted = YES;
     me->escape_specials = NO;
diff --git a/WWW/Library/Implementation/HTNews.c b/WWW/Library/Implementation/HTNews.c
index bcf2993e..bfd64e5a 100644
--- a/WWW/Library/Implementation/HTNews.c
+++ b/WWW/Library/Implementation/HTNews.c
@@ -82,7 +82,7 @@ PRIVATE HTList *NNTP_AuthInfo = NULL;		/* AUTHINFO database */
 
 #define PUTC(c) (*targetClass.put_character)(target, c)
 #define PUTS(s) (*targetClass.put_string)(target, s)
-#define START(e) (*targetClass.start_element)(target, e, 0, 0, 0)
+#define START(e) (*targetClass.start_element)(target, e, 0, 0, -1, 0)
 #define END(e) (*targetClass.end_element)(target, e, 0)
 #define MAYBE_END(e) if (HTML_dtd.tags[e].contents != SGML_EMPTY) \
                         (*targetClass.end_element)(target, e, 0)
@@ -639,7 +639,7 @@ PRIVATE void start_anchor ARGS1(CONST char *,  href)
     }
     ((CONST char **)value)[HTML_A_HREF] = href;
     (*targetClass.start_element)(target, HTML_A , present,
-    				 (CONST char **)value, 0);
+    				 (CONST char **)value, -1, 0);
 }
 
 /*      Start link element
@@ -658,7 +658,7 @@ PRIVATE void start_link ARGS2(CONST char *,  href, CONST char *, rev)
     ((CONST char **)value)[HTML_LINK_HREF] = href;
     ((CONST char **)value)[HTML_LINK_REV]  = rev;
     (*targetClass.start_element)(target, HTML_LINK, present,
-				 (CONST char **)value, 0);
+				 (CONST char **)value, -1, 0);
 }
 
 /*      Start list element
@@ -677,7 +677,7 @@ PRIVATE void start_list ARGS1(int, seqnum)
     ((CONST char **)value)[HTML_OL_SEQNUM] = SeqNum;
     ((CONST char **)value)[HTML_OL_START]  = SeqNum;
     (*targetClass.start_element)(target, HTML_OL , present,
-				 (CONST char **)value, 0);
+				 (CONST char **)value, -1, 0);
 }
 
 /*	Paste in an Anchor
@@ -1010,6 +1010,9 @@ PRIVATE int read_article NOARGS
 		        HTmmdecode(subject, subject);
 			HTrjis(subject, subject);
 		    }
+#ifdef NOTUSED_CHARTRANS
+		    else HTmmdecode(subject, subject);
+#endif
 
 		} else if (match(full_line, "DATE:")) {
 		    StrAllocCopy(date, HTStrip(strchr(full_line,':')+1));
@@ -1021,6 +1024,9 @@ PRIVATE int read_article NOARGS
 		        HTmmdecode(organization, organization);
 			HTrjis(organization, organization);
 		    }
+#ifdef NOTUSED_CHARTRANS
+		    else HTmmdecode(organization, organization);
+#endif
 
 		} else if (match(full_line, "FROM:")) {
 		    StrAllocCopy(from, HTStrip(strchr(full_line,':')+1));
@@ -1028,6 +1034,9 @@ PRIVATE int read_article NOARGS
 		        HTmmdecode(from, from);
 			HTrjis(from, from);
 		    }
+#ifdef NOTUSED_CHARTRANS
+		    else HTmmdecode(from, from);
+#endif
 
 		} else if (match(full_line, "REPLY-TO:")) {
 		    StrAllocCopy(replyto, HTStrip(strchr(full_line,':')+1));
@@ -1035,6 +1044,9 @@ PRIVATE int read_article NOARGS
 		        HTmmdecode(replyto, replyto);
 			HTrjis(replyto, replyto);
 		    }
+#ifdef NOTUSED_CHARTRANS
+		    else HTmmdecode(replyto, replyto);
+#endif
 
 		} else if (match(full_line, "NEWSGROUPS:")) {
 		    StrAllocCopy(newsgroups, HTStrip(strchr(full_line,':')+1));
@@ -1750,6 +1762,11 @@ PRIVATE int read_group ARGS3(
 			case 's':
 			    if (match(line, "SUBJECT:")) {
 				strcpy(subject, line+9);/* Save subject */
+#ifdef NOTUSED_CHARTRANS
+				HTmmdecode(subject, subject);
+				if (HTCJK == JAPANESE)
+				    HTrjis(subject, subject);
+#endif
 			 	if (HTCJK == JAPANESE) {
 				    HTmmdecode(subject, subject);
 				    HTrjis(subject, subject);
@@ -1772,6 +1789,11 @@ PRIVATE int read_group ARGS3(
 				char * p;
 				strcpy(author,
 					author_name(strchr(line,':')+1));
+#ifdef NOTUSED_CHARTRANS
+				HTmmdecode(author, author);
+				if (HTCJK == JAPANESE)
+				    HTrjis(author, author);
+#endif
 				if (HTCJK == JAPANESE) {
 				    HTmmdecode(author, author);
 				    HTrjis(author, author);
diff --git a/WWW/Library/Implementation/HTParse.c b/WWW/Library/Implementation/HTParse.c
index 350107c2..e69b77ad 100644
--- a/WWW/Library/Implementation/HTParse.c
+++ b/WWW/Library/Implementation/HTParse.c
@@ -594,7 +594,7 @@ PRIVATE CONST unsigned char isAcceptable[96] =
 	 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,	/* 4x  @ABCDEFGHIJKLMNO  */
 	 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7,	/* 5X  PQRSTUVWXYZ[\]^_	 */
 	 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,	/* 6x  `abcdefghijklmno	 */
-	 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0 };	/* 7X  pqrstuvwxyz{\}~	DEL */
+	 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0 };	/* 7X  pqrstuvwxyz{|}~	DEL */
 
 PRIVATE char *hex = "0123456789ABCDEF";
 #define ACCEPTABLE(a)	( a>=32 && a<128 && ((isAcceptable[a-32]) & mask))
@@ -749,3 +749,63 @@ PUBLIC char * HTUnEscapeSome ARGS2(
     return str;
     
 } /* HTUnEscapeSome */
+
+PRIVATE CONST unsigned char crfc[96] =
+
+/*	Bit 0		xalpha		-- need "quoting"
+**	Bit 1		xpalpha		-- need \escape if quoted
+*/
+    /*   0 1 2 3 4 5 6 7 8 9 A B C D E F */
+    {    1,0,3,0,0,0,0,0,1,1,0,0,1,0,1,0,	/* 2x   !"#$%&'()*+,-./	 */
+         0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,0,	/* 3x  0123456789:;<=>?	 */
+	 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,	/* 4x  @ABCDEFGHIJKLMNO  */
+	 0,0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,	/* 5X  PQRSTUVWXYZ[\]^_	 */
+	 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,	/* 6x  `abcdefghijklmno	 */
+	 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3 };	/* 7X  pqrstuvwxyz{|}~	DEL */
+
+PUBLIC void HTMake822Word ARGS1(
+	char **,	str)
+{
+    CONST char * p;
+    char * q;
+    char * result;
+    unsigned char a;
+    int added = 0;
+    if (!(*str) || !(**str)) {
+	StrAllocCopy(*str, "\"\"");
+	return;
+    }
+    for (p = *str; *p; p++) {
+	a = *p;
+        if (a < 32 || a >= 128 ||
+	    ((crfc[a-32]) & 1)) {
+	    if (!added)
+		added = 2;
+	    if (a >= 160 || a == '\t')
+		continue;
+	    if (a == '\r' || a == '\n')
+		added += 2;
+	    else if ((a & 127) < 32 || ((crfc[a-32]) & 2))
+		added++;
+	}
+    }
+    if (!added)
+	return;
+    result = (char *) malloc(p-(*str) + added + 1);
+    if (result == NULL)
+        outofmem(__FILE__, "HTMake822Word");
+    result[0] = '"';
+    for (q = result + 1, p = *str; *p; p++) {
+    	a = TOASCII(*p);
+	if ((a != '\t') && ((a & 127) < 32 ||
+			    ( a < 128 && ((crfc[a-32]) & 2))))
+	    *q++ = '\'';
+	*q++ = *p;
+	if (a == '\n' || (a == '\r' && (TOASCII(*(p+1)) != '\n')))
+	    *q++ = ' ';
+    }
+    *q++ = '"';
+    *q++ = '\0';			/* Terminate */
+    FREE(*str);
+    *str = result;
+}
diff --git a/WWW/Library/Implementation/HTParse.h b/WWW/Library/Implementation/HTParse.h
index 2e01c271..63c84739 100644
--- a/WWW/Library/Implementation/HTParse.h
+++ b/WWW/Library/Implementation/HTParse.h
@@ -154,6 +154,10 @@ extern char * HTUnEscapeSome PARAMS((
 	char *		str,
 	CONST char *	do_trans));
 
+/* Turn a string which is not a RFC 822 token into a quoted-string - kw */
+extern void HTMake822Word PARAMS((
+	char **	str));
+
 #endif  /* HTPARSE_H */
 
 /*
diff --git a/WWW/Library/Implementation/HTPlain.c b/WWW/Library/Implementation/HTPlain.c
index a76616e5..c95f55a8 100644
--- a/WWW/Library/Implementation/HTPlain.c
+++ b/WWW/Library/Implementation/HTPlain.c
@@ -34,6 +34,7 @@ extern CONST char * LYchar_set_names[];
 extern CONST char **LYCharSets[];
 #ifdef EXP_CHARTRANS
 extern int LYlowest_eightbit[];
+extern BOOLEAN LYRawMode;
 #endif /* EXP_CHARTRANS */
 extern CONST char * HTMLGetEntityName PARAMS((int i));
 extern BOOL HTPassEightBitRaw;
@@ -361,16 +362,6 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 	*/
 	} else if (unsign_c == 173) {
 	    continue;
-#ifdef EXP_CHARTRANS
-	} else if (me->T.strip_raw_char_in &&
-		   (unsigned char)*p >= 0xc0 &&
-		   (unsigned char)*p < 255) {
-	    /*
-	    **  KOI special: strip high bit, gives
-	    **  (somewhat) readable ASCII.
-	    */
-	    HText_appendCharacter(me->text, (char)(*p & 0x7f));
-#endif /* EXP_CHARTRANS */
 	/*
 	**  If we get to here, pass the displayable ASCII characters. - FM
 	*/
@@ -419,9 +410,22 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 	    HText_appendText(me->text, me->utf_buf);
 	    me->utf_buf_p = me->utf_buf;
 	    *(me->utf_buf_p) = '\0';
+	} else if (me->T.strip_raw_char_in &&
+		   (unsigned char)*p >= 0xc0 &&
+		   (unsigned char)*p < 255) {
+	    /*
+	    **  KOI special: strip high bit, gives
+	    **  (somewhat) readable ASCII.
+	    */
+	    HText_appendCharacter(me->text, (char)(*p & 0x7f));
 	} else if (me->T.trans_from_uni && unsign_c > 255) {
-	    sprintf(replace_buf, "U%.2lx", unsign_c);
-	    HText_appendText(me->text, replace_buf);
+	    if (PASSHI8BIT && PASSHICTRL && LYRawMode &&
+		(unsigned char)*p >= LYlowest_eightbit[me->htext_char_set]) {
+		HText_appendCharacter(me->text, *p);
+	    } else {
+		sprintf(replace_buf, "U%.2lx", unsign_c);
+		HText_appendText(me->text, replace_buf);
+	    }
 #endif /* EXP_CHARTRANS */
 
 	/*
@@ -493,6 +497,12 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 		    me->utf_buf_p = me->utf_buf;
 		    *(me->utf_buf_p) = '\0';
 
+		} else if (LYRawMode &&
+			   me->in_char_set != me->htext_char_set &&
+			   (PASSHI8BIT || PASSHICTRL) &&
+			   (unsigned char)c_p >=
+			             LYlowest_eightbit[me->htext_char_set]) {
+		    HText_appendCharacter(me->text, c_p);
 		} else if (me->T.trans_from_uni && unsign_c >= 127) {
 		    sprintf(replace_buf,"U%.2lx",unsign_c);
 		    HText_appendText(me->text, replace_buf);
diff --git a/WWW/Library/Implementation/HTTP.c b/WWW/Library/Implementation/HTTP.c
index 9c08a170..f4009328 100644
--- a/WWW/Library/Implementation/HTTP.c
+++ b/WWW/Library/Implementation/HTTP.c
@@ -317,9 +317,9 @@ try_again:
 	  for (i = 0; line[i]; i++)
 	      line[i] = TOLOWER(line[i]);
 	  if (strstr(line, "iso-8859-1") == NULL)
-	      strcat(line, ", iso-8859-1;q=0.001");
+	      strcat(line, ", iso-8859-1;q=0.01");
 	  if (strstr(line, "us-ascii") == NULL)
-	      strcat(line, ", us-ascii;q=0.001");
+	      strcat(line, ", us-ascii;q=0.01");
 	  StrAllocCat(command, line);
 	  sprintf(line, "%c%c", CR, LF);
 	  StrAllocCat(command, line);
@@ -830,7 +830,7 @@ try_again:
         if (TRACE)
             fprintf (stderr, "--- Talking HTTP0.\n");
 
-        format_in = HTFileFormat(url, &encoding);
+        format_in = HTFileFormat(url, &encoding, NULL);
 	/*
 	**  Treat all plain text as HTML.
         **  This sucks but its the only solution without
@@ -842,6 +842,23 @@ try_again:
                            "HTTP: format_in being changed to text/HTML\n");
             format_in = WWW_HTML;
         }
+	if (!IsUnityEnc(encoding)) {
+	    /*
+	    **  Change the format to that for "www/compressed".
+	    */
+	    if (TRACE) {
+		fprintf(stderr,
+			"HTTP: format_in is '%s',\n", HTAtom_name(format_in));
+	    }
+	    StrAllocCopy(anAnchor->content_type, HTAtom_name(format_in));
+	    StrAllocCopy(anAnchor->content_encoding, HTAtom_name(encoding));
+	    format_in = HTAtom_for("www/compressed");
+	    if (TRACE) {
+		fprintf(stderr,
+			"        Treating as '%s' with encoding '%s'\n",
+			"www/compressed", HTAtom_name(encoding));
+	    }
+	}
 
         start_of_data = line_kept_clean;
     } else {
@@ -893,6 +910,14 @@ try_again:
 	    /*
 	    **  Good: Got MIME object! (Successful) - FM
 	    */
+	    if (do_head) {
+	        /*
+		 *  If HEAD was requested, show headers (and possibly
+		 *  bogus body) for all 2xx status codes as text/plain - KW
+		 */
+		HTProgress(line_buffer);
+	        break;
+	    }
 	    switch (server_status) {
 	      case 204:
 	        /*
diff --git a/WWW/Library/Implementation/HTUtils.h b/WWW/Library/Implementation/HTUtils.h
index a0c93655..ec7265fb 100644
--- a/WWW/Library/Implementation/HTUtils.h
+++ b/WWW/Library/Implementation/HTUtils.h
@@ -21,6 +21,10 @@
 /* Explicit system-configure */
 #ifdef VMS
 #define NO_SIZECHANGE
+#define NO_UNISTD_H
+#define EXP_CHARTRANS
+#define NO_KEYPAD
+#define NO_UTMP
 #endif
 
 /* FIXME: these will be removed after completing auto-configure script */
diff --git a/WWW/Library/Implementation/HTVMSUtils.c b/WWW/Library/Implementation/HTVMSUtils.c
index 495c71ca..118f9517 100644
--- a/WWW/Library/Implementation/HTVMSUtils.c
+++ b/WWW/Library/Implementation/HTVMSUtils.c
@@ -711,7 +711,7 @@ long status;
 #include "HTML.h"
 #define PUTC(c) (*targetClass.put_character)(target, c)
 #define PUTS(s) (*targetClass.put_string)(target, s)
-#define START(e) (*targetClass.start_element)(target, e, 0, 0, 0)
+#define START(e) (*targetClass.start_element)(target, e, 0, 0, -1, 0)
 #define END(e) (*targetClass.end_element)(target, e, 0)
 #define FREE_TARGET (*targetClass._free)(target)
 #define ABORT_TARGET (*targetClass._free)(target)
@@ -1098,15 +1098,18 @@ PUBLIC int HTVMSBrowseDir ARGS4(
 	    entry_info->display = TRUE;
 
 	    /* Get the type */
-	    format = HTFileFormat(dirbuf->d_name, &encoding);
-	    if(!strncmp(HTAtom_name(format), "application",11)) 
-	      {
-		   cp = HTAtom_name(format) + 12;
-		   if(!strncmp(cp,"x-", 2))
+	    format = HTFileFormat(dirbuf->d_name, &encoding,
+				  (CONST char **)&cp);
+	    if (!cp) {
+		if(!strncmp(HTAtom_name(format), "application",11)) 
+		{
+		    cp = HTAtom_name(format) + 12;
+		    if(!strncmp(cp,"x-", 2))
 			cp += 2;
-	      }
-	    else
-		cp = HTAtom_name(format);
+		}
+		else
+		    cp = HTAtom_name(format);
+	    }
 	    StrAllocCopy(entry_info->type, cp);
 
 	    StrAllocCopy(entry_info->filename, dirbuf->d_name);
diff --git a/WWW/Library/Implementation/HTWAIS.c b/WWW/Library/Implementation/HTWAIS.c
index 2bc4c4f6..bd293c2f 100644
--- a/WWW/Library/Implementation/HTWAIS.c
+++ b/WWW/Library/Implementation/HTWAIS.c
@@ -105,7 +105,7 @@ PRIVATE char	line[2048];	/* For building strings to display */
 
 #define PUTC(c) (*target->isa->put_character)(target, c)
 #define PUTS(s) (*target->isa->put_string)(target, s)
-#define START(e) (*target->isa->start_element)(target, e, 0, 0, 0)
+#define START(e) (*target->isa->start_element)(target, e, 0, 0, -1, 0)
 #define END(e) (*target->isa->end_element)(target, e, 0)
 #define MAYBE_END(e) if (HTML_dtd.tags[e].contents != SGML_EMPTY) \
                         (*target->isa->end_element)(target, e, 0)
diff --git a/WWW/Library/Implementation/HTWSRC.c b/WWW/Library/Implementation/HTWSRC.c
index 3d3647b3..731ac5c7 100644
--- a/WWW/Library/Implementation/HTWSRC.c
+++ b/WWW/Library/Implementation/HTWSRC.c
@@ -37,7 +37,7 @@ struct _HTStructured {
 
 #define PUTC(c) (*me->target->isa->put_character)(me->target, c)
 #define PUTS(s) (*me->target->isa->put_string)(me->target, s)
-#define START(e) (*me->target->isa->start_element)(me->target, e, 0, 0, 0)
+#define START(e) (*me->target->isa->start_element)(me->target, e, 0, 0, -1, 0)
 #define END(e) (*me->target->isa->end_element)(me->target, e, 0)
 #define MAYBE_END(e) if (HTML_dtd.tags[e].contents != SGML_EMPTY) \
                         (*me->target->isa->end_element)(me->target, e, 0)
diff --git a/WWW/Library/Implementation/SGML.c b/WWW/Library/Implementation/SGML.c
index e1f56166..51434a66 100644
--- a/WWW/Library/Implementation/SGML.c
+++ b/WWW/Library/Implementation/SGML.c
@@ -80,6 +80,7 @@ struct _HTStream {
     HTStructured		*target;	/* target object */
 
     HTTag 			*current_tag;
+    CONST HTTag 		*unknown_tag;
     int 			current_attribute_number;
     HTChunk			*string;
     HTElement			*element_stack;
@@ -112,7 +113,7 @@ struct _HTStream {
 
 #ifdef EXP_CHARTRANS
     HTParentAnchor *		node_anchor;
-    LYUCcharset	*		UCI;		/* anchor UCInfo	    */
+    LYUCcharset	*		UCI;		/* pointer to anchor UCInfo */
     int				in_char_set;	/* charset we are fed	    */
     LYUCcharset	*		htmlUCI;	/* anchor UCInfo for target */
     int				html_char_set;	/* feed it to target stream */
@@ -121,6 +122,7 @@ struct _HTStream {
     char 			utf_buf[7];
     char *			utf_buf_p;
     UCTransParams		T;
+    int			current_tag_charset; /* charset to pass attributes */
 #endif /* EXP_CHARTRANS */
 
     char *			recover;
@@ -159,6 +161,20 @@ PRIVATE void set_chartrans_handling ARGS3(
     UCSetTransParams(&context->T,
 		     context->in_char_set, context->UCI,
 		     context->html_char_set, context->htmlUCI);
+    if (HTCJK != NOCJK) {
+	context->current_tag_charset = -1;
+    } else if (context->T.transp) {
+	context->current_tag_charset = context->in_char_set;
+    } else if (context->T.decode_utf8) {
+	context->current_tag_charset = context->in_char_set;
+    } else if (context->T.do_8bitraw ||
+	       context->T.use_raw_char_in) {
+	context->current_tag_charset = context->in_char_set;
+    } else if (context->T.trans_from_uni || context->T.output_utf8) {
+	context->current_tag_charset = UCGetLYhndl_byMIME("unicode-1-1-utf-8");
+    } else {
+	context->current_tag_charset = 0;
+    }	
 }
 
 PRIVATE void change_chartrans_handling ARGS1(
@@ -222,6 +238,10 @@ PRIVATE void handle_attribute_name ARGS2(
     attr * attributes = tag->attributes;
 
     int high, low, i, diff;		/* Binary search for attribute name */
+    if (tag == context->unknown_tag) {
+	return;
+    }
+
     for (low = 0, high = tag->number_of_attributes;
     	 high > low;
 	 diff < 0 ? (low = i+1) : (high = i)) {
@@ -409,16 +429,16 @@ PRIVATE void handle_entity ARGS2(
 	    return;
 	    } else if ((rc == -4) &&
 		       /* Not found; look for replacement string */
-		     (rc = UCTransUniCharStr(replace_buf,60,
-					     extra_entities[i].code,
-					     current_char_set, 0)   >= 0 ) ) { 
-	    CONST char *p;
-	    for (p=replace_buf; *p; p++)
-	      PUTC(*p);
-	    FoundEntity = TRUE;
-	    return;
-	  } 
-	  rc = (*context->actions->put_entity)(context->target,
+		       (rc = UCTransUniCharStr(replace_buf, 60,
+					       extra_entities[i].code,
+					       current_char_set, 0) >= 0)) {
+		CONST char *p;
+		for (p = replace_buf; *p; p++)
+		    PUTC(*p);
+		FoundEntity = TRUE;
+		return;
+	    } 
+	    rc = (*context->actions->put_entity)(context->target,
 					  i+context->dtd->number_of_entities);
 	  if (rc != HT_CANNOT_TRANSLATE) {
 	      FoundEntity = TRUE;
@@ -580,7 +600,7 @@ extern BOOL New_DTD;
 typedef enum {
     close_NO	= 0,
     close_error = 1,
-    close_valid	= 2,
+    close_valid	= 2
 } canclose_t;
 
 PRIVATE canclose_t can_close ARGS2(
@@ -811,6 +831,7 @@ PRIVATE void start_element ARGS1(
 	new_tag - context->dtd->tags,
 	context->present,
 	(CONST char**) context->value,  /* coerce type for think c */
+	context->current_tag_charset,
 	(char **)&context->include);
     if (new_tag->contents != SGML_EMPTY) {		/* i.e. tag not empty */
 	HTElement * N = (HTElement *)malloc(sizeof(HTElement));
@@ -821,7 +842,10 @@ PRIVATE void start_element ARGS1(
 	context->element_stack = N;
     }
 #ifdef EXP_CHARTRANS
-    else {			/* check for result of META tag. */
+    else if (!strcasecomp(new_tag->name, "META")) {
+	/*
+	**  Check for result of META tag. - KW & FM
+	*/
 	change_chartrans_handling(context);
     }
 #endif /* EXP_CHARTRANS */
@@ -854,6 +878,10 @@ PUBLIC HTTag * SGMLFindTag ARGS2(
 	    return &dtd->tags[i];
 	}
     }
+    if (isalpha((unsigned char)string[0])) {
+	/* unrecognized, but may be valid - kw */
+	return (HTTag *)&HTTag_unrecognized;
+    }
     return NULL;
 }
 
@@ -977,7 +1005,7 @@ PUBLIC void SGML_character ARGS2(
     HTChunk	*string = 	context->string;
     CONST char * EntityName;
     extern int current_char_set;
-    extern CONST char *LYchar_set_names[];
+    extern CONST char * LYchar_set_names[];
     extern CONST char * HTMLGetEntityName PARAMS((int i));
 
 #ifdef EXP_CHARTRANS
@@ -1118,7 +1146,7 @@ PUBLIC void SGML_character ARGS2(
 	    c = replace_buf[0];
 	    if (c && replace_buf[1]) {
 		if (context->state == S_text) {
-		    for (p=replace_buf; *p; p++)
+		    for (p = replace_buf; *p; p++)
 			PUTC(*p);
 		    return;
 		}
@@ -1272,7 +1300,7 @@ top1:
 		   /*
 		   **  Not found; look for replacement string. - KW
 		   */
-		   (uck = UCTransUniCharStr(replace_buf,60, clong,
+		   (uck = UCTransUniCharStr(replace_buf, 60, clong,
 					    context->html_char_set,
 					    0) >= 0)) { 
 	    /*
@@ -1401,7 +1429,8 @@ top1:
     **  Handle possible named entity.
     */
     case S_entity:
-	if (unsign_c < 127 && isalnum((unsigned char)c)) {
+	if (unsign_c < 127 && (string->size ?
+		  isalnum((unsigned char)c) : isalpha((unsigned char)c))) {
 	    /*
 	    **  Accept valid ASCII character. - FM
 	    */
@@ -1610,16 +1639,6 @@ top1:
 			context->state = S_text;
 			goto top1;
 		    }
-		} else if (value == 160) {
-		    /*
-		    **  Use Lynx special character for 160 (nbsp). - FM
-		    */
-		    PUTC(HT_NON_BREAK_SPACE);
-		} else if (value == 173) {
-		    /*
-		    **  Use Lynx special character for 173 (shy) - FM
-		    */
-		    PUTC(LY_SOFT_HYPHEN);
 		} else if (value < 161 || HTPassEightBitNum ||
 			   !strncmp(LYchar_set_names[current_char_set],
 			   	    "ISO Latin 1", 11)) {
@@ -1712,7 +1731,8 @@ top1:
     **  Tag
     */	    
     case S_tag:					/* new tag */
-	if (unsign_c < 127 && isalnum((unsigned char)c)) {
+	if (unsign_c < 127 && (string->size ?
+		  isalnum((unsigned char)c) : isalpha((unsigned char)c))) {
 	    /*
 	    **  Add valid ASCII character. - FM
 	    */
@@ -1728,9 +1748,16 @@ top1:
 	    context->first_bracket = FALSE;
 	    HTChunkPutc(string, c);
 	    break;
-        } else if (!string->size && (WHITE(c) || c == '=')) {/* <WHITE or <= */
+        } else if (!string->size &&
+		   (unsign_c <= 160 &&
+		    (c != '/' && c != '?' && c != '_' && c != ':'))) {
 	    /*
-	    **  Recover the '<' and WHITE or '=' character. - FM & KW
+	    **  '<' must be followed by an ASCII letter to be a valid
+	    **  start tag.  Here it isn't, nor do we have a '/' for an
+	    **  end tag, nor one of some other characters with a
+	    **  special meaning for SGML or which are likely to be legal
+	    **  Name Start characters in XML or some other extension.
+	    **  So recover the '<' and following character as data. - FM & KW
 	    */
 	    context->state = S_text;
 	    PUTC('<');
@@ -1750,29 +1777,35 @@ top1:
 	    HTChunkTerminate(string) ;
 
 	    t = SGMLFindTag(dtd, string->data);
-	    if (!t) {
-	        if (c == ':' && 0 == strcasecomp(string->data, "URL")) {
-		    /*
-		    **  Treat <URL: as text rather than a junk tag,
-		    **  so we display it and the URL (Lynxism 8-). - FM
-		    */
-		    int i;
-		    PUTC('<');
-		    for (i = 0; i < 3; i++)	/* recover */
-		        PUTC(string->data[i]);
-		    PUTC(c);
-		    if (TRACE)
-		        fprintf(stderr, "SGML: Treating <%s%c as text\n",
-		    			string->data, c);
-		    string->size = 0;
-		    context->state = S_text;	
-		} else {
-		    if (TRACE)
-		        fprintf(stderr, "SGML: *** Unknown element %s\n",
-		    			string->data);
-		    context->state = (c == '>') ? S_text : S_junk_tag;
-		}
+	    if (t == context->unknown_tag && c == ':' &&
+		0 == strcasecomp(string->data, "URL")) {
+		/*
+		**  Treat <URL: as text rather than a junk tag,
+		**  so we display it and the URL (Lynxism 8-). - FM
+		*/
+		int i;
+		PUTC('<');
+		for (i = 0; i < 3; i++)	/* recover */
+		    PUTC(string->data[i]);
+		PUTC(c);
+		if (TRACE)
+		    fprintf(stderr, "SGML: Treating <%s%c as text\n",
+			    string->data, c);
+		string->size = 0;
+		context->state = S_text;
 		break;
+	    } else if (!t) {
+		if (TRACE)
+		    fprintf(stderr, "SGML: *** Invalid element %s\n",
+			    string->data);
+		context->state = (c == '>') ? S_text : S_junk_tag;
+		break;
+	    } else if (t == context->unknown_tag) {
+		if (TRACE)
+		    fprintf(stderr, "SGML: *** Unknown element %s\n",
+			    string->data);
+		/*  Fall through and treat like valid tag for attribute
+		    parsing - kw */
 	    }
 	    context->current_tag = t;
 	    
@@ -2153,6 +2186,18 @@ top1:
 		break;
 	    }
 	    else context->state = S_tag_gap;
+#ifdef EXP_CHARTRANS
+	} else if (context->T.decode_utf8 &&
+		*context->utf_buf) {
+	    HTChunkPuts(string, context->utf_buf);
+	    context->utf_buf_p = context->utf_buf;
+	    *(context->utf_buf_p) = '\0';
+	} else if (HTCJK == NOCJK && (context->T.output_utf8 ||
+				      context->T.trans_from_uni)) {
+	    HTChunkPutUtf8Char(string, clong);
+	} else if (saved_char_in && context->T.use_raw_char_in) {
+	    HTChunkPutc(string, saved_char_in);
+#endif /* EXP_CHARTRANS */
 	} else {
 	    HTChunkPutc(string, c);
 	}
@@ -2171,6 +2216,18 @@ top1:
 	    */
 	    context->state = S_esc_sq;
 	    HTChunkPutc(string, c);
+#ifdef EXP_CHARTRANS
+	} else if (context->T.decode_utf8 &&
+		*context->utf_buf) {
+	    HTChunkPuts(string, context->utf_buf);
+	    context->utf_buf_p = context->utf_buf;
+	    *(context->utf_buf_p) = '\0';
+	} else if (HTCJK == NOCJK && (context->T.output_utf8 ||
+				      context->T.trans_from_uni)) {
+	    HTChunkPutUtf8Char(string, clong);
+	} else if (saved_char_in && context->T.use_raw_char_in) {
+	    HTChunkPutc(string, saved_char_in);
+#endif /* EXP_CHARTRANS */
 	} else {
 	    HTChunkPutc(string, c);
 	}
@@ -2193,6 +2250,18 @@ top1:
 	    */
 	    context->state = S_esc_dq;
 	    HTChunkPutc(string, c);
+#ifdef EXP_CHARTRANS
+	} else if (context->T.decode_utf8 &&
+		*context->utf_buf) {
+	    HTChunkPuts(string, context->utf_buf);
+	    context->utf_buf_p = context->utf_buf;
+	    *(context->utf_buf_p) = '\0';
+	} else if (HTCJK == NOCJK && (context->T.output_utf8 ||
+				      context->T.trans_from_uni)) {
+	    HTChunkPutUtf8Char(string, clong);
+	} else if (saved_char_in && context->T.use_raw_char_in) {
+	    HTChunkPutc(string, saved_char_in);
+#endif /* EXP_CHARTRANS */
 	} else {
 	    HTChunkPutc(string, c);
 	}
@@ -2210,7 +2279,7 @@ top1:
 	    } else {
 		t = SGMLFindTag(dtd, string->data);
 	    }
-	    if (!t) {
+	    if (!t || t == context->unknown_tag) {
 		if (TRACE)
 		    fprintf(stderr, "Unknown end tag </%s>\n", string->data); 
 	    } else {
@@ -2589,6 +2658,7 @@ PUBLIC HTStream* SGML_new  ARGS3(
     context->target = target;
     context->actions = (HTStructuredClass*)(((HTStream*)target)->isa);
     					/* Ugh: no OO */
+    context->unknown_tag = &HTTag_unrecognized;
     context->state = S_text;
     context->element_stack = 0;			/* empty */
 #ifdef CALLERDATA		  
diff --git a/WWW/Library/Implementation/SGML.h b/WWW/Library/Implementation/SGML.h
index 42524cbe..a3ea248a 100644
--- a/WWW/Library/Implementation/SGML.h
+++ b/WWW/Library/Implementation/SGML.h
@@ -207,6 +207,7 @@ typedef struct _HTStructuredClass{
                 int             element_number,
                 CONST BOOL*     attribute_present,
                 CONST char**    attribute_value,
+		int		charset,
 		char **		include));
                 
         void (*end_element) PARAMS((
diff --git a/WWW/Library/Implementation/UCAux.h b/WWW/Library/Implementation/UCAux.h
index ac7a5439..ffe0a652 100644
--- a/WWW/Library/Implementation/UCAux.h
+++ b/WWW/Library/Implementation/UCAux.h
@@ -4,6 +4,7 @@
 extern BOOL UCCanUniTranslateFrom PARAMS((int from));
 extern BOOL UCCanTranslateUniTo PARAMS((int to));
 extern BOOL UCCanTranslateFromTo PARAMS((int from, int to));
+extern BOOL UCNeedNotTranslate PARAMS((int from, int to));
 
 struct _UCTransParams
 {
diff --git a/WWW/Library/Implementation/UCDefs.h b/WWW/Library/Implementation/UCDefs.h
index bbfdc33a..876fc075 100644
--- a/WWW/Library/Implementation/UCDefs.h
+++ b/WWW/Library/Implementation/UCDefs.h
@@ -57,7 +57,7 @@ typedef struct _LYUCcharset {
 #define UCT_R_8BIT UCT_R_LAT1 | UCT_R_HIGHCTRL /* full 8bit range */
 
 /*
- *  For the following some coments are in HTAnchor.c.
+ *  For the following some comments are in HTAnchor.c.
  */
 #define UCT_STAGE_MIME 0
 #define UCT_STAGE_PARSER 1	/* What the parser (SGML.c) gets to see */
@@ -67,9 +67,10 @@ typedef struct _LYUCcharset {
 
 #define UCT_SETBY_NONE 0
 #define UCT_SETBY_DEFAULT 1
-#define UCT_SETBY_STRUCTURED 2	/* structured stream stage */
-#define UCT_SETBY_PARSER 3	/* set by SGML parser or similar */
-#define UCT_SETBY_MIME 4	/* set explicitly by MIME charset parameter */
+#define UCT_SETBY_LINK 2	/* set by A or LINK CHARSET= hint */
+#define UCT_SETBY_STRUCTURED 3	/* structured stream stage (HTML.c) */
+#define UCT_SETBY_PARSER 4	/* set by SGML parser or similar */
+#define UCT_SETBY_MIME 5	/* set explicitly by MIME charset parameter */
 
 typedef struct _UCStageInfo
 {
diff --git a/WWW/Library/Implementation/UCMap.h b/WWW/Library/Implementation/UCMap.h
index de196752..017ebc92 100644
--- a/WWW/Library/Implementation/UCMap.h
+++ b/WWW/Library/Implementation/UCMap.h
@@ -17,6 +17,7 @@ extern int UCTransChar PARAMS((
 	char		ch_in,
 	int		charset_in,
 	int		charset_out));
+PUBLIC int UCReverseTransChar PARAMS((char ch_out, int charset_in, int charset_out));
 extern int UCTransCharStr PARAMS((
 	char *		outbuf,
 	int		buflen,
diff --git a/WWW/Library/Implementation/tcp.h b/WWW/Library/Implementation/tcp.h
index e94bc0eb..d8c723fe 100644
--- a/WWW/Library/Implementation/tcp.h
+++ b/WWW/Library/Implementation/tcp.h
@@ -65,6 +65,7 @@ typedef struct sockaddr_in SockA;  /* See netinet/in.h */
 #define STDIO_H
 #endif /* !STDIO_H */
 
+#ifndef VMS
 #include <sys/types.h>
 
 #if HAVE_DIRENT_H
@@ -84,7 +85,8 @@ typedef struct sockaddr_in SockA;  /* See netinet/in.h */
 # if HAVE_NDIR_H
 #  include <ndir.h>
 # endif
-#endif
+#endif /* HAVE_DIRENT_H */
+#endif /* !VMS */
 
 #if TIME_WITH_SYS_TIME
 # include <sys/time.h>
diff --git a/WWW/Library/unix/Makefile b/WWW/Library/unix/Makefile
index e894af92..5fea7dd7 100644
--- a/WWW/Library/unix/Makefile
+++ b/WWW/Library/unix/Makefile
@@ -8,7 +8,7 @@ WWW_MACH = unix
 ASIS_MACH = hardware/os
 
 
-CFLAGS =  -O -DDEBUG
+CFLAGS =  -g -DDEBUG
 LFLAGS =
 CC = cc
 
diff --git a/WWW/Library/vms/descrip.mms b/WWW/Library/vms/descrip.mms
index b178e538..d34fe347 100644
--- a/WWW/Library/vms/descrip.mms
+++ b/WWW/Library/vms/descrip.mms
@@ -176,9 +176,9 @@ CC = gcc
 !	HTVMSUtils.h, ufc-crypt.h, patchlevel.h
 
 MODULES = HTParse, HTAccess, HTTP, HTFile, HTBTree, HTFTP, HTTCP, HTString, -
-	SGML, HTMLDTD, HTChunk, HTPlain, HTWriter, HTFWriter, HTMLGen, -
+	SGML, HTMLDTD, HTChunk, HTPlain, HTWriter, HTMLGen, -
 	HTAtom, HTAnchor, HTStyle, HTList, HTAlert, HTRules, HTFormat, -
-	HTInit, HTMIME, HTHistory, HTNews, HTGopher, HTTelnet, HTFinger, -
+	HTMIME, HTHistory, HTNews, HTGopher, HTTelnet, HTFinger, -
 	HTWSRC, HTAAUtil, HTAABrow, HTAAServ, HTAAFile, HTPasswd, HTGroup, -
 	HTACL, HTAuth, HTAAProt, HTAssoc, HTLex, HTUU, HTVMSUtils, getpass, -
 	getline, crypt, crypt_util, HTWAIS, HTVMS_WaisUI, HTVMS_WaisProt
@@ -218,11 +218,9 @@ clean :
 !HTMLDTD.obj :	HTMLDTD.c HTMLDTD.h SGML.h
 !HTPlain.obj :	HTPlain.c HTPlain.h HTStream.h
 !HTWriter.obj :	HTWriter.c HTWriter.h HTStream.h
-!HTFWriter.obj :	HTFWriter.c HTFWriter.h HTStream.h
 !HTMLGen.obj :	HTMLGen.c HTMLGen.h HTUtils.h HTMLDTD.h
 !HTAlert.obj :	HTAlert.c HTAlert.h HTUtils.h Version.make
 !HTRules.obj :	HTRules.c HTRules.h HTUtils.h Version.make
-!HTInit.obj :	HTInit.c HTInit.h HTUtils.h HTList.h
 !HTMIME.obj :	HTMIME.c HTMIME.h HTUtils.h HTList.h
 !HTTelnet.obj :	HTTelnet.c HTTelnet.h HTUtils.h
 !HTWAIS.obj :	HTWAIS.c HTWAIS.h HTUtils.h HTList.h
diff --git a/WWW/Library/vms/libmake.com b/WWW/Library/vms/libmake.com
index 75cb23de..7d812cfd 100644
--- a/WWW/Library/vms/libmake.com
+++ b/WWW/Library/vms/libmake.com
@@ -137,7 +137,6 @@ $ cc [-.Implementation]HTMLDTD.c
 $ cc [-.Implementation]HTChunk.c
 $ cc [-.Implementation]HTPlain.c
 $ cc [-.Implementation]HTWriter.c
-$ cc [-.Implementation]HTFWriter.c
 $ cc [-.Implementation]HTMLGen.c
 $ cc [-.Implementation]HTAtom.c
 $ cc [-.Implementation]HTAnchor.c
@@ -146,7 +145,6 @@ $ cc [-.Implementation]HTList.c
 $ cc [-.Implementation]HTAlert.c
 $ cc [-.Implementation]HTRules.c
 $ cc [-.Implementation]HTFormat.c
-$ cc [-.Implementation]HTInit.c
 $ cc [-.Implementation]HTMIME.c
 $ cc [-.Implementation]HTHistory.c
 $ cc [-.Implementation]HTNews.c
author	Thomas E. Dickey <dickey@invisible-island.net>	1997-10-06 04:08:00 -0400
committer	Thomas E. Dickey <dickey@invisible-island.net>	1997-10-06 04:08:00 -0400
commit	1d80538b4b84eadd223c7b61839b950389c2d49d (patch)
tree	a46f327e82edb06d8d789b60c3395f873476e040 /WWW/Library
parent	443226a5ffcf805f6ab3ccbcc2a6b4802793b07d (diff)
download	lynx-snapshots-1d80538b4b84eadd223c7b61839b950389c2d49d.tar.gz