about summary refs log tree commit diff stats
path: root/WWW
diff options
context:
space:
mode:
Diffstat (limited to 'WWW')
-rw-r--r--WWW/Library/Implementation/HTChunk.c13
-rw-r--r--WWW/Library/Implementation/HTFTP.c28
-rw-r--r--WWW/Library/Implementation/HTFile.c16
-rw-r--r--WWW/Library/Implementation/HTMIME.c12
-rw-r--r--WWW/Library/Implementation/HTML.h44
-rw-r--r--WWW/Library/Implementation/HTMLDTD.c5
-rw-r--r--WWW/Library/Implementation/HTNews.c55
-rw-r--r--WWW/Library/Implementation/HTParse.c9
-rw-r--r--WWW/Library/Implementation/HTParse.h6
-rw-r--r--WWW/Library/Implementation/HTPlain.c226
-rw-r--r--WWW/Library/Implementation/HTString.c7
-rw-r--r--WWW/Library/Implementation/HTString.h4
-rw-r--r--WWW/Library/Implementation/HTTP.c36
-rw-r--r--WWW/Library/Implementation/SGML.c523
-rw-r--r--WWW/Library/Implementation/UCAux.h14
-rw-r--r--WWW/Library/Implementation/UCMap.h5
-rw-r--r--WWW/Library/unix/makefile.in3
17 files changed, 698 insertions, 308 deletions
diff --git a/WWW/Library/Implementation/HTChunk.c b/WWW/Library/Implementation/HTChunk.c
index 497e5173..fd66a482 100644
--- a/WWW/Library/Implementation/HTChunk.c
+++ b/WWW/Library/Implementation/HTChunk.c
@@ -116,14 +116,15 @@ PUBLIC void HTChunkPutb ARGS3 (HTChunk *,ch, CONST char *,b, int,l)
     ch->size += l;
 }
 
-#ifdef EXP_CHARTRANS
-
 #define PUTC(code) ch->data[ch->size++] = (char)(code)
 #define PUTC2(code) ch->data[ch->size++] = (char)(0x80|(0x3f &(code)))
 
-PUBLIC void HTChunkPutUtf8Char ARGS2 (HTChunk *,ch, UCode_t,code)
+PUBLIC void HTChunkPutUtf8Char ARGS2(
+	HTChunk *,	ch,
+	UCode_t,	code)
 {
     int utflen;
+
     if (code < 128)
 	utflen = 1;
     else if   (code <     0x800L) {
@@ -148,7 +149,7 @@ PUBLIC void HTChunkPutUtf8Char ARGS2 (HTChunk *,ch, UCode_t,code)
           outofmem(__FILE__, "HTChunkPutUtf8Char");
     }
 
-    switch(utflen) {
+    switch (utflen) {
     case 0:
 	return;
     case 1:
@@ -169,7 +170,7 @@ PUBLIC void HTChunkPutUtf8Char ARGS2 (HTChunk *,ch, UCode_t,code)
     case 6:
 	PUTC(0xfc | (code>>30));
     }
-    switch(utflen) {
+    switch (utflen) {
     case 6:
 	PUTC2(code>>24);
     case 5:
@@ -183,8 +184,6 @@ PUBLIC void HTChunkPutUtf8Char ARGS2 (HTChunk *,ch, UCode_t,code)
     }
 }
 
-#endif /* EXP_CHARTRANS */
-
 /*	Terminate a chunk
 **	-----------------
 */
diff --git a/WWW/Library/Implementation/HTFTP.c b/WWW/Library/Implementation/HTFTP.c
index 0e5b801b..701490e9 100644
--- a/WWW/Library/Implementation/HTFTP.c
+++ b/WWW/Library/Implementation/HTFTP.c
@@ -759,21 +759,21 @@ PRIVATE int get_connection ARGS2(
       }
     server_type = GENERIC_SERVER;	/* reset */
     if (status == 2) {		/* Send username */
-	{
-	    char *cp;		/* look at greeting text */
-	    if (strlen(response_text) > 4) {
-		if ((cp = strstr(response_text, " awaits your command")) ||
-		    (cp = strstr(response_text, " ready."))) {
-		    *cp = '\0';
-		}
-		cp = response_text + 4;
-		if (!strncasecomp(cp, "NetPresenz", 10))
-		    server_type = NETPRESENZ_SERVER;
-	    } else {
-		cp = response_text;
+	char *cp;		/* look at greeting text */
+
+	if (strlen(response_text) > 4) {
+	    if ((cp = strstr(response_text, " awaits your command")) ||
+		(cp = strstr(response_text, " ready."))) {
+		*cp = '\0';
 	    }
-	    StrAllocCopy(anchor->server, cp);
+	    cp = response_text + 4;
+	    if (!strncasecomp(cp, "NetPresenz", 10))
+		server_type = NETPRESENZ_SERVER;
+	} else {
+	    cp = response_text;
 	}
+	StrAllocCopy(anchor->server, cp);
+
 	if (username && *username) {
 	    command = (char*)malloc(10+strlen(username)+2+1);
 	    if (command == NULL)
@@ -2232,7 +2232,7 @@ PRIVATE EntryInfo * parse_dir_entry ARGS2(
     return(entry_info);
 } /* parse_dir_entry */
 
-PUBLIC int compare_EntryInfo_structs ARGS2(
+PRIVATE int compare_EntryInfo_structs ARGS2(
 	EntryInfo *,	entry1, 
 	EntryInfo *,	entry2)
 {
diff --git a/WWW/Library/Implementation/HTFile.c b/WWW/Library/Implementation/HTFile.c
index 45bd6464..6ef0fde1 100644
--- a/WWW/Library/Implementation/HTFile.c
+++ b/WWW/Library/Implementation/HTFile.c
@@ -873,16 +873,6 @@ PUBLIC HTFormat HTCharsetFormat ARGS3(
 	*cp4 = '\0';
 	cp4 = cp3;
 	chndl = UCGetLYhndl_byMIME(cp3);
-	if (chndl < 0) {
-	    if (!strcmp(cp4, "cn-big5")) {
-		cp4 += 3;
-		chndl = UCGetLYhndl_byMIME(cp4);
-	    } else if (!strncmp(cp4, "cn-gb", 5)) {
-		StrAllocCopy(cp3, "gb2312");
-		cp4 = cp3;
-		chndl = UCGetLYhndl_byMIME(cp4);
-	    }
-	}
 	if (UCCanTranslateFromTo(chndl, current_char_set)) {
 	    chartrans_ok = YES;
 	    *cp1 = '\0';
@@ -1010,7 +1000,7 @@ PUBLIC HTFormat HTCharsetFormat ARGS3(
 	    HTAlert(anchor->charset);
 	} else if (!strncmp(cp2, "koi8-r", 6) &&
 		   !strncmp(LYchar_set_names[current_char_set],
-			    "KOI8-R character set", 20)) {
+			    "KOI8-R Cyrillic", 15)) {
 	    *cp1 = '\0';
 	    format = HTAtom_for(cp);
 	    StrAllocCopy(anchor->charset, "koi8-r");
@@ -2446,7 +2436,7 @@ PUBLIC int HTLoadFile ARGS4(
 		return status;
 	    }  /* If succesfull open */
 	    FREE(localname);
-	}    /* scope of fp */
+	}  /* scope of fp */
     }  /* local unix file system */    
 #endif /* !NO_UNIX_IO */
 #endif /* VMS */
@@ -2487,8 +2477,6 @@ PUBLIC int HTLoadFile ARGS4(
 
 	return HTLoadError(sink, 403, "Can't access requested file.");
     }
-    
- 
 }
 
 /*
diff --git a/WWW/Library/Implementation/HTMIME.c b/WWW/Library/Implementation/HTMIME.c
index ecd61ac2..b694bb05 100644
--- a/WWW/Library/Implementation/HTMIME.c
+++ b/WWW/Library/Implementation/HTMIME.c
@@ -377,16 +377,6 @@ PRIVATE void HTMIME_put_character ARGS2(
 			    *cp4 = '\0';
 			    cp4 = cp3;
 			    chndl = UCGetLYhndl_byMIME(cp3);
-			    if (chndl < 0) {
-				if (!strcmp(cp4, "cn-big5")) {
-				    cp4 += 3;
-				    chndl = UCGetLYhndl_byMIME(cp4);
-				} else if (!strncmp(cp4, "cn-gb", 5)) {
-				    StrAllocCopy(cp3, "gb2312");
-				    cp4 = cp3;
-				    chndl = UCGetLYhndl_byMIME(cp4);
-				}
-			    }
 			    if (UCCanTranslateFromTo(chndl,
 						     current_char_set)) {
 				chartrans_ok = YES;
@@ -535,7 +525,7 @@ PRIVATE void HTMIME_put_character ARGS2(
 			    } else if
 			       (!strncmp(cp2, "koi8-r", 6) &&
 				!strncmp(LYchar_set_names[current_char_set],
-					 "KOI8-R character set", 20)) {
+					 "KOI8-R Cyrillic", 15)) {
 				*cp1 = '\0';
 				me->format = HTAtom_for(cp);
 				StrAllocCopy(me->anchor->charset,
diff --git a/WWW/Library/Implementation/HTML.h b/WWW/Library/Implementation/HTML.h
index f1653cf1..42bf4638 100644
--- a/WWW/Library/Implementation/HTML.h
+++ b/WWW/Library/Implementation/HTML.h
@@ -146,12 +146,46 @@ struct _HTStructured {
 
     BOOL		needBoldH;
 
-#ifdef EXP_CHARTRANS
-    LYUCcharset	* UCI;	/* pointer to node_anchor's UCInfo */
-    int	UCLYhndl;		/* tells us what charset we are fed */
-    UCTransParams T;
+    /*
+    **  UCI and UCLYhndl give the UCInfo and charset registered for
+    **  the HTML parser in the node_anchor's UCStages structure.  It
+    **  indicates what is fed to the HTML parser as the stream of character
+    **  data (not necessarily tags and attributes).  It should currently
+    **  always be set to be the same as UCI and UCLhndl for the HTEXT stage
+    **  in the node_anchor's UCStages structure, since the HTML parser sends
+    **  its input character data to the output without further charset
+    **  translation.
+    */
+    LYUCcharset	*	UCI;
+    int			UCLYhndl;
+    /*
+    **  inUCI and inUCLYhndl indicate the UCInfo and charset which the
+    **  HTML parser treats at the input charset.  It is normally set
+    **  to the UCI and UCLhndl for the SGML parser in the node_anchor's
+    **  UCStages structure (which may be a dummy, based on the MIME
+    **  parser's UCI and UCLhndl in that structure, when we are handling
+    **  a local file or non-http(s) gateway).  It could be changed
+    **  temporarily by the HTML parser, for conversions of attribute
+    **  strings, but should be reset once done. - FM
+    */
+    LYUCcharset	*	inUCI;
+    int			inUCLYhndl;
+    /*
+    **  outUCI and outUCLYhndl indicate the UCInfo and charset which
+    **  the HTML parser treats as the output charset.  It is normally
+    **  set to its own UCI and UCLhndl.  It could be changed for
+    **  conversions of attribute strings, but should be reset once
+    **  done. - FM
+    */
+    LYUCcharset	*	outUCI;
+    int			outUCLYhndl;
+    /*
+    **  T holds the transformation rules for conversions of strings
+    **  between the input and output charsets by the HTML parser. - FM
+    */
+    UCTransParams	T;
+
     int 		tag_charset; /* charset for attribute values etc. */
-#endif
 };
 
 struct _HTStream {
diff --git a/WWW/Library/Implementation/HTMLDTD.c b/WWW/Library/Implementation/HTMLDTD.c
index cbeb35f9..e2ac932d 100644
--- a/WWW/Library/Implementation/HTMLDTD.c
+++ b/WWW/Library/Implementation/HTMLDTD.c
@@ -440,9 +440,8 @@ static CONST UC_entity_info extra_entities[] = {
   {"zcaron",	0x017e},  /* z with caron */
   {"zdot",	0x017c},  /* z with dot above */
   {"zeta",	0x03b6},  /* greek small letter zeta,  u+03B6 ISOgrk3 -- */
-  {"zwj",	 8205},	/* zero width joiner */ 
-  {"zwnj",	 8204},	/* zero width non-joiner */ 
-
+  {"zwj",	  8205},  /* zero width joiner */ 
+  {"zwnj",	  8204},  /* zero width non-joiner */ 
 };
 #endif /* EXP_CHARTRANS */
 
diff --git a/WWW/Library/Implementation/HTNews.c b/WWW/Library/Implementation/HTNews.c
index adc9568b..bb3481d9 100644
--- a/WWW/Library/Implementation/HTNews.c
+++ b/WWW/Library/Implementation/HTNews.c
@@ -1010,11 +1010,11 @@ PRIVATE int read_article NOARGS
 		}
 
 		if (full_line[0] == '.') {	
-		    if (full_line[1] < ' ') {		/* End of article? */
+		    if ((unsigned char)full_line[1] < ' ') {		/* End of article? */
 			done = YES;
 			break;
 		    }
-		} else if (full_line[0] < ' ') {
+		} else if ((unsigned char)full_line[0] < ' ') {
 		    break;		/* End of Header? */
 
 		} else if (match(full_line, "SUBJECT:")) {
@@ -1273,7 +1273,7 @@ PRIVATE int read_article NOARGS
 	    if (TRACE)
 	        fprintf(stderr, "B %s", line);
 	    if (line[0] == '.') {
-		if (line[1] < ' ') {		/* End of article? */
+		if ((unsigned char)line[1] < ' ') {		/* End of article? */
 		    done = YES;
 		    break;
 		} else {			/* Line starts with dot */
@@ -1411,6 +1411,7 @@ PRIVATE int read_list ARGS1(char *, arg)
     BOOL done = NO;
     BOOL head = NO;
     BOOL tail = NO;
+    BOOL skip_this_line = NO, skip_rest_of_line = NO;
     int listing = 0;
     char *pattern = NULL;
     int len = 0;
@@ -1455,7 +1456,7 @@ PRIVATE int read_list ARGS1(char *, arg)
     START(HTML_DLC);
     PUTC('\n');
     while (!done) {
-	char ch = *p++ = NEXT_CHAR;
+	char ch = NEXT_CHAR;
 	if (ch == (char)EOF) {
 	    if (interrupted_in_htgetcharacter) {
 		interrupted_in_htgetcharacter = 0;
@@ -1470,13 +1471,47 @@ PRIVATE int read_list ARGS1(char *, arg)
 	    abort_socket();	/* End of file, close socket */
 	    FREE(pattern);
 	    return(HT_LOADED);	/* End of file on response */
+	} else if (skip_this_line) {
+	    if (ch == LF) {
+		skip_this_line = skip_rest_of_line = NO;
+		p = line;
+	    }
+	    continue;
+	} else if (skip_rest_of_line) {
+	    if (ch != LF) {
+		continue;
+	    }
+	} else if (p == &line[LINE_LENGTH]) {
+	    if (TRACE) {
+	        fprintf(stderr, "b %.*s%c[...]\n", (LINE_LENGTH), line, ch);
+	    }
+	    *p = '\0';
+	    if (ch == LF) {
+		;		/* Will be dealt with below */
+	    } else if (WHITE(ch)) {
+		ch = LF;	/* May treat as line without description */
+		skip_this_line = YES; /* ...and ignore until LF */
+	    } else if (strchr(line, ' ') == NULL &&
+		       strchr(line, '\t') == NULL) {
+		/* No separator found */
+		if (TRACE)
+		    fprintf(stderr,
+			    "HTNews..... group name too long, discarding.\n");
+		skip_this_line = YES; /* ignore whole line */
+		continue;
+	    } else {
+		skip_rest_of_line = YES; /* skip until ch == LF found */
+	    }
+	} else {
+	    *p++ = ch;
 	}
-	if ((ch == LF) || (p == &line[LINE_LENGTH])) {
-	    *p++ = '\0';			/* Terminate the string */
+	if (ch == LF) {
+	    skip_rest_of_line = NO; /* done, reset flag */
+	    *p = '\0';			/* Terminate the string */
 	    if (TRACE)
 	        fprintf(stderr, "B %s", line);
 	    if (line[0] == '.') {
-		if ((unsigned char)line[1] < ' ') {		/* End of article? */
+		if ((unsigned char)line[1] < ' ') {		/* End of list? */
 		    done = YES;
 		    break;
 		} else {			/* Line starts with dot */
@@ -1670,11 +1705,11 @@ PRIVATE int read_group ARGS3(
 		    return(HT_LOADED);	/* End of file on response */
 		}
 		if ((ch == '\n') || (p == &line[LINE_LENGTH])) {
-		    *p++ = '\0';		/* Terminate the string */
+		    *p = '\0';		/* Terminate the string */
 		    if (TRACE)
 		        fprintf(stderr, "X %s", line);
 		    if (line[0] == '.') {
-			if (line[1] < ' ') {	/* End of article? */
+			if (line[1] < ' ') {	/* End of response? */
 			    done = YES;
 			    break;
 			} else {		/* Line starts with dot */
@@ -1782,7 +1817,7 @@ PRIVATE int read_group ARGS3(
 			switch(line[0]) {
 
 			case '.':
-			    done = (line[1] < ' ');	/* End of article? */
+			    done = ((unsigned char)line[1] < ' ');  /* End of response? */
 			    break;
 
 			case 'S':
diff --git a/WWW/Library/Implementation/HTParse.c b/WWW/Library/Implementation/HTParse.c
index 6f558ef5..ab5149b0 100644
--- a/WWW/Library/Implementation/HTParse.c
+++ b/WWW/Library/Implementation/HTParse.c
@@ -65,7 +65,7 @@ PRIVATE void scan ARGS2(
     char * p;
 #ifdef NOTDEFINED
     int length = strlen(name);
-#endif
+#endif /* NOTDEFINED */
     
     parts->access = NULL;
     parts->host = NULL;
@@ -89,7 +89,7 @@ PRIVATE void scan ARGS2(
     }
     
 #ifdef NOTDEFINED
-    for (p = (name + length-1); p >= name; p--) {
+    for (p = (name + length-1); p >= name; p--) {}
 #endif /* NOTDEFINED */
     /*
     **  Scan left-to-right for a fragment (anchor).
@@ -699,7 +699,10 @@ PUBLIC char * HTUnEscape ARGS1(
 
     while (*p != '\0') {
         if (*p == HEX_ESCAPE &&
-	    p[1] && p[2] &&	/* tests shouldn't be needed, but.. */
+	    /*
+	     *  Tests shouldn't be needed, but better safe than sorry.
+	     */
+	    p[1] && p[2] &&
 	    isxdigit((unsigned char)p[1]) &&
 	    isxdigit((unsigned char)p[2])) {
 	    p++;
diff --git a/WWW/Library/Implementation/HTParse.h b/WWW/Library/Implementation/HTParse.h
index 63c84739..2f77f079 100644
--- a/WWW/Library/Implementation/HTParse.h
+++ b/WWW/Library/Implementation/HTParse.h
@@ -154,9 +154,11 @@ extern char * HTUnEscapeSome PARAMS((
 	char *		str,
 	CONST char *	do_trans));
 
-/* Turn a string which is not a RFC 822 token into a quoted-string - kw */
+/*
+**  Turn a string which is not a RFC 822 token into a quoted-string. - KW
+*/
 extern void HTMake822Word PARAMS((
-	char **	str));
+	char **		str));
 
 #endif  /* HTPARSE_H */
 
diff --git a/WWW/Library/Implementation/HTPlain.c b/WWW/Library/Implementation/HTPlain.c
index c95f55a8..277a6ebf 100644
--- a/WWW/Library/Implementation/HTPlain.c
+++ b/WWW/Library/Implementation/HTPlain.c
@@ -8,6 +8,7 @@
 **		strings written must be less than buffer size.
 */
 #include "HTUtils.h"
+#include "tcp.h"
 
 #include "HTPlain.h"
 
@@ -32,10 +33,8 @@ extern HTStyleSheet * styleSheet;
 extern int current_char_set;
 extern CONST char * LYchar_set_names[];
 extern CONST char **LYCharSets[];
-#ifdef EXP_CHARTRANS
 extern int LYlowest_eightbit[];
 extern BOOLEAN LYRawMode;
-#endif /* EXP_CHARTRANS */
 extern CONST char * HTMLGetEntityName PARAMS((int i));
 extern BOOL HTPassEightBitRaw;
 extern BOOL HTPassHighCtrlRaw;
@@ -47,24 +46,19 @@ PUBLIC int HTPlain_lastraw = -1;
 **		-----------
 */
 struct _HTStream {
-	CONST HTStreamClass *	isa;
-
-	HText * 		text;
-#ifdef EXP_CHARTRANS
+    CONST HTStreamClass *	isa;
+    HText *			text;
     LYUCcharset	* UCI;	/* pointer to node_anchor's UCInfo */
     int	in_char_set;		/* tells us what charset we are fed */
-    int	htext_char_set;		/* what charset feed to HText */
-    char                utf_count;
-    long                utf_char;
-    char	utf_buf[7];
-    char *	utf_buf_p;
-    UCTransParams T;
-#endif /* EXP_CHARTRANS */
+    int	htext_char_set;		/* what charset we feed to HText */
+    char			utf_count;
+    UCode_t			utf_char;
+    char 			utf_buf[8];
+    char *			utf_buf_p;
+    UCTransParams		T;
 };
 
-#ifdef EXP_CHARTRANS
-
-PRIVATE char replace_buf [61];        /* buffer for replacement strings */
+PRIVATE char replace_buf [64];        /* buffer for replacement strings */
 
 PRIVATE void HTPlain_getChartransInfo ARGS2(
 	HTStream *,		me,
@@ -88,7 +82,6 @@ PRIVATE void HTPlain_getChartransInfo ARGS2(
     }
     me->UCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_PARSER);
 }
-#endif /* EXP_CHARTRANS */
 
 /*	Write the buffer out to the socket
 **	----------------------------------
@@ -140,13 +133,14 @@ PRIVATE void HTPlain_put_character ARGS2(
     HTPlain_lastraw = c;
     if (c == '\r') {
 	HText_appendCharacter(me->text, '\n');
-#ifdef EXP_CHARTRANS
-      /* for now don't repeat everything here that has been done below - kw*/
-    } else if ((unsigned char)c >= 127) {
-	HTPlain_write(me, &c, 1);
-#endif
     } else if (HTCJK != NOCJK) {
 	HText_appendCharacter(me->text, c);
+    } else if ((unsigned char)c >= 127) {
+	/*
+	**  For now, don't repeat everything here
+	**  that has been done below - KW
+	*/
+	HTPlain_write(me, &c, 1);
     } else if ((unsigned char)c >= 127 && (unsigned char)c < 161 &&
     	       HTPassHighCtrlRaw) {
 	HText_appendCharacter(me->text, c);
@@ -159,7 +153,7 @@ PRIVATE void HTPlain_put_character ARGS2(
 	HText_appendCharacter(me->text, c);
     } else if ((unsigned char)c > 160) {
 	if (!HTPassEightBitRaw &&
-	    strncmp(LYchar_set_names[current_char_set], "ISO Latin 1", 11)) {
+	    current_char_set != 0) {
 	    int len, high, low, i, diff = 1;
 	    CONST char * name;
 	    int value = (int)((unsigned char)c - 160);
@@ -217,11 +211,10 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 {
     CONST char * p;
     CONST char * e = s+l;
-#ifdef EXP_CHARTRANS
     BOOL chk;
-    long unsign_c, uck;
+    UCode_t code;
+    long uck;
     char c_p;
-#endif /* EXP_CHARTRANS */
 
     for (p = s; p < e; p++) {
 #ifdef REMOVE_CR_ONLY
@@ -247,8 +240,7 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 	    HText_appendCharacter(me->text, '\n');
 	    continue;
 	}
-#ifdef EXP_CHARTRANS
-	unsign_c = (unsigned char)(*p);
+	code = (unsigned char)*p;
 	c_p = *p;
 
 	if (me->T.decode_utf8) {
@@ -257,20 +249,31 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 	    **  Incomplete characters silently ignored.
 	    **  from Linux kernel's console.c
 	    */
-	    if ((unsigned char)(*p) > 0x7f) {
+	    if ((unsigned char)(*p) > 127) {
 		if (me->utf_count > 0 && (*p & 0xc0) == 0x80) {
 		    me->utf_char = (me->utf_char << 6) | (*p & 0x3f);
 		    me->utf_count--;
-		    *(me->utf_buf_p++) = *p;
+		    *(me->utf_buf_p) = *p;
+		    (me->utf_buf_p)++;
 		    if (me->utf_count == 0) {
+		        /*
+			**  Got a complete multibyte character.
+			*/
 			*(me->utf_buf_p) = '\0';
-			unsign_c = me->utf_char;
-			if (unsign_c<256) c_p = (char)unsign_c;
+			code = me->utf_char;
+			if (code < 256) {
+			    c_p = FROMASCII((char)code);
+			}
+		    } else {
+			continue;  /* iterate for more */
 		    }
-		    else continue;  /* iterate for more */
 		} else {
+		    /*
+		    **  Accumulate a multibyte character.
+		    */
 		    me->utf_buf_p = me->utf_buf;
-		    *(me->utf_buf_p++) = *p;
+		    *(me->utf_buf_p) = *p;
+		    (me->utf_buf_p)++;
 		    if ((*p & 0xe0) == 0xc0) {
 			me->utf_count = 1;
 			me->utf_char = (*p & 0x1f);
@@ -286,14 +289,20 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 		    } else if ((*p & 0xfe) == 0xfc) {
 			me->utf_count = 5;
 			me->utf_char = (*p & 0x01);
-		    } else { /* garbage */
+		    } else {
+			/*
+			 *  Garbage.
+			 */
 			me->utf_count = 0;
 			me->utf_buf_p = me->utf_buf;
 			*(me->utf_buf_p) = '\0';
 		    }
 		    continue; /* iterate for more */
 		}
-	    } else {	/* got an ASCII char */
+	    } else {
+	        /*
+		**  Got an ASCII character.
+		*/
 		me->utf_count = 0;
 		me->utf_buf_p = me->utf_buf;
 		*(me->utf_buf_p) = '\0';
@@ -301,25 +310,21 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 	}
 	
 	if (me->T.trans_to_uni &&
-	    (unsign_c >= 127 ||
-	     (unsign_c < 32 && unsign_c != 0 && me->T.trans_C0_to_uni))) {
-	    unsign_c = UCTransToUni(c_p, me->in_char_set);
-	    if (unsign_c > 0) {
-		if (unsign_c < 256) {
-		    c_p = (char)unsign_c;
+	    (code >= 127 ||
+	     (code < 32 && code != 0 && me->T.trans_C0_to_uni))) {
+	    code = UCTransToUni(c_p, me->in_char_set);
+	    if (code > 0) {
+		if (code < 256) {
+		    c_p = FROMASCII((char)code);
 		}
 	    }
 	}
 	/*
-	**  At this point we have either unsign_c in Unicode
-	**  (and c in latin1 if unsign_c is in the latin1 range),
-	**  or unsign_c and c will have to be passed raw.
+	**  At this point we have either code in Unicode
+	**  (and c in latin1 if code is in the latin1 range),
+	**  or code and c will have to be passed raw.
 	*/
 
-#else
-#define unsign_c (unsigned char)*p	
-#define c_p *p
-#endif /* EXP_CHARTRANS */
 	/*
 	**  If CJK mode is on, we'll assume the document matches
 	**  the user's selected character set, and if not, the
@@ -334,7 +339,7 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 #define PASSHI8BIT HTPassEightBitRaw
 #else
 #define PASSHICTRL (me->T.transp || \
-		    unsign_c >= LYlowest_eightbit[me->in_char_set])
+		    code >= LYlowest_eightbit[me->in_char_set])
 #define PASS8859SPECL me->T.pass_160_173_raw
 #define PASSHI8BIT (HTPassEightBitRaw || \
 		    (me->T.do_8bitraw && !me->T.trans_from_uni))
@@ -345,47 +350,75 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 	**  document matches and pass 127-160 8-bit characters.  If it
 	**  doesn't match, the user should toggle raw/CJK mode off. - FM
 	*/
-	} else if (unsign_c >= 127 && unsign_c < 161 &&
+	} else if (code >= 127 && code < 161 &&
 		   PASSHICTRL && PASS8859SPECL) {
 	    HText_appendCharacter(me->text, *p);
-	} else if (unsign_c == 173 && PASS8859SPECL) {
+	} else if (code == 173 && PASS8859SPECL) {
 	    HText_appendCharacter(me->text, *p);
 	/*
 	**  If neither HTPassHighCtrlRaw nor CJK is set, play it safe
 	**  and treat 160 (nbsp) as an ASCII space (32). - FM
 	*/
-	} else if (unsign_c == 160) {
+	} else if (code == 160) {
 	    HText_appendCharacter(me->text, ' ');
 	/*
 	**  If neither HTPassHighCtrlRaw nor CJK is set, play it safe
 	**  and ignore 173 (shy). - FM
 	*/
-	} else if (unsign_c == 173) {
+	} else if (code == 173) {
 	    continue;
 	/*
 	**  If we get to here, pass the displayable ASCII characters. - FM
 	*/
-	} else if ((unsign_c >= 32 && unsign_c < 127) ||
-#ifdef EXP_CHARTRANS
-		   (PASSHI8BIT && c_p>=LYlowest_eightbit[me->htext_char_set])||
-#endif
+	} else if ((code >= 32 && code < 127) ||
+		   (PASSHI8BIT &&
+		    c_p >= LYlowest_eightbit[me->htext_char_set]) ||
 		   *p == '\n' || *p == '\t') {
 	    HText_appendCharacter(me->text, c_p);
 
-#ifdef EXP_CHARTRANS
 	} else if (me->T.use_raw_char_in) {
 	    HText_appendCharacter(me->text, *p);
+#ifdef NOTUSED_FOTEMODS
+	/*
+	**  Use an ASCII space (32) for ensp, emsp or thinsp. - FM 
+	*/
+	} else if (code == 8194 || code == 8195 || code == 8201) {
+	    HText_appendCharacter(me->text, ' ');
+	/*
+	**  Use ASCII hyphen for 8211 (ndash/endash)
+	**  or 8212 (mdash/emdash). - FM
+	*/
+	} else if (code == 8211 || code == 8212) {
+	    HText_appendCharacter(me->text, '-');
+	/*
+	**  Ignore 8204 (zwnj) or 8205 (zwj), for now. - FM
+	*/
+	} else if (code == 8204 || code == 8205) {
+	    if (TRACE) {
+		fprintf(stderr,
+			"HTPlain_write: Ignoring '%ld'.\n", code);
+	    }
+	/*
+	**  Ignore 8206 (lrm) or 8207 (rlm), for now. - FM
+	*/
+	} else if (code == 8206 || code == 8207) {
+	    if (TRACE) {
+		fprintf(stderr,
+			"HTPlain_write: Ignoring '%ld'.\n", code);
+	    }
+#endif /* NOTUSED_FOTEMODS */
+
 /******************************************************************
  *   I. LATIN-1 OR UCS2  TO  DISPLAY CHARSET
  ******************************************************************/  
-	} else if ((chk = (me->T.trans_from_uni && unsign_c >= 160)) &&
-		   (uck = UCTransUniChar(unsign_c,
+	} else if ((chk = (me->T.trans_from_uni && code >= 160)) &&
+		   (uck = UCTransUniChar(code,
 					 me->htext_char_set)) >= 32 &&
 		   uck < 256) {
 	    if (TRACE) {
 		fprintf(stderr,
-			"UCTransUniChar returned 0x%lx:'%c'.\n",
-			uck, (char)uck);
+			"UCTransUniChar returned 0x%.2lX:'%c'.\n",
+			uck, FROMASCII((char)uck));
 	    }
 	    HText_appendCharacter(me->text, (char)(uck & 0xff));
 	} else if (chk &&
@@ -394,7 +427,7 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 		   /*
 		   **  Not found; look for replacement string.
 		   */
-		   (uck = UCTransUniCharStr(replace_buf,60, unsign_c,
+		   (uck = UCTransUniCharStr(replace_buf, 60, code,
 					    me->htext_char_set, 0) >= 0)) { 
 	    /*
 	    **  No further tests for valididy - assume that whoever
@@ -405,11 +438,20 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 	**  If we get to here, and should have translated,
 	**  translation has failed so far.  
 	*/
-	} else if (chk && unsign_c > 127 && me->T.output_utf8 &&
-		   *me->utf_buf) {
-	    HText_appendText(me->text, me->utf_buf);
-	    me->utf_buf_p = me->utf_buf;
-	    *(me->utf_buf_p) = '\0';
+	} else if (chk && code > 127 && me->T.output_utf8) {
+	    /*
+	    **  We want UTF-8 output, so do it now. - FM
+	    */
+	    if (*me->utf_buf) {
+		HText_appendText(me->text, me->utf_buf);
+		me->utf_buf_p = me->utf_buf;
+		*(me->utf_buf_p) = '\0';
+	    } else if (UCConvertUniToUtf8(code, replace_buf)) {
+		HText_appendText(me->text, replace_buf);
+	    } else {
+		sprintf(replace_buf, "U%.2lX", code);
+		HText_appendText(me->text, replace_buf);
+	    }
 	} else if (me->T.strip_raw_char_in &&
 		   (unsigned char)*p >= 0xc0 &&
 		   (unsigned char)*p < 255) {
@@ -418,32 +460,30 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 	    **  (somewhat) readable ASCII.
 	    */
 	    HText_appendCharacter(me->text, (char)(*p & 0x7f));
-	} else if (me->T.trans_from_uni && unsign_c > 255) {
+	} else if (me->T.trans_from_uni && code > 255) {
 	    if (PASSHI8BIT && PASSHICTRL && LYRawMode &&
 		(unsigned char)*p >= LYlowest_eightbit[me->htext_char_set]) {
 		HText_appendCharacter(me->text, *p);
 	    } else {
-		sprintf(replace_buf, "U%.2lx", unsign_c);
+		sprintf(replace_buf, "U%.2lX", code);
 		HText_appendText(me->text, replace_buf);
 	    }
-#endif /* EXP_CHARTRANS */
-
 	/*
 	**  If we get to here and HTPassEightBitRaw or the
 	**  selected character set is not "ISO Latin 1",
 	**  use the translation tables for 161-255 8-bit
 	**  characters (173 was handled above). - FM
 	*/
-	} else if (unsign_c > 160) {
-	    if (!HTPassEightBitRaw && unsign_c <= 255 &&
-		strncmp(LYchar_set_names[current_char_set],
-		   	"ISO Latin 1", 11)) {
+	} else if (code > 160) {
+	    if (!HTPassEightBitRaw && code <= 255 &&
+		me->htext_char_set != 0) {
 		/*
 		**  Attempt to translate. - FM
 		*/
 		int len, high, low, i, diff=1;
 		CONST char * name;
-		int value = (int)(unsign_c - 160);
+		int value = (int)(code - 160);
+
 		name = HTMLGetEntityName(value);
 		len =  strlen(name);
 		for(low = 0, high = HTML_dtd.number_of_entities;
@@ -454,7 +494,7 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 		    diff = strncmp(HTML_dtd.entity_names[i], name, len);
 		    if (diff == 0) {
 			HText_appendText(me->text,
-					 LYCharSets[current_char_set][i]);
+					 LYCharSets[me->htext_char_set][i]);
 			break;
 		    }
 		}
@@ -466,7 +506,7 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 		    */
 #ifdef EXP_CHARTRANS
 		    if (!PASSHI8BIT)
-			c_p = (char)unsign_c;
+			c_p = FROMASCII((char)code);
 		    if (me->T.output_utf8 &&
 			*me->utf_buf) {
 			HText_appendText(me->text, me->utf_buf);
@@ -474,7 +514,7 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 			*(me->utf_buf_p) = '\0';
 
 		    } else if (me->T.trans_from_uni) {
-			sprintf(replace_buf,"U%.2lx",unsign_c);
+			sprintf(replace_buf, "U%.2lX", code);
 			HText_appendText(me->text, replace_buf);
 		    } else
 #endif /* EXP_CHARTRANS */
@@ -484,15 +524,12 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 	        /*
 		**  Didn't attempt a translation. - FM
 		*/
-#ifdef EXP_CHARTRANS
-		    /*  either output as UTF8 or a hex representation or
-		    **  pass the raw character and hope it's OK.
-		    */
-		if (unsign_c <= 255 && !PASSHI8BIT)
-		    c_p = (char)unsign_c;
-		if (unsign_c > 127 &&
-		    me->T.output_utf8 &&
-		    *me->utf_buf) {
+		/*  Either output as UTF8 or a hex representation or
+		**  pass the raw character and hope it's OK.
+		*/
+		if (code <= 255 && !PASSHI8BIT)
+		    c_p = FROMASCII((char)code);
+		if (code > 127 && me->T.output_utf8 && *me->utf_buf) {
 		    HText_appendText(me->text, me->utf_buf);
 		    me->utf_buf_p = me->utf_buf;
 		    *(me->utf_buf_p) = '\0';
@@ -503,11 +540,10 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l)
 			   (unsigned char)c_p >=
 			             LYlowest_eightbit[me->htext_char_set]) {
 		    HText_appendCharacter(me->text, c_p);
-		} else if (me->T.trans_from_uni && unsign_c >= 127) {
-		    sprintf(replace_buf,"U%.2lx",unsign_c);
+		} else if (me->T.trans_from_uni && code >= 127) {
+		    sprintf(replace_buf, "U%.2lX", code);
 		    HText_appendText(me->text, replace_buf);
 		} else
-#endif /* EXP_CHARTRANS */
 	        HText_appendCharacter(me->text, c_p);
 	    }
 	}
@@ -541,7 +577,7 @@ PRIVATE void HTPlain_abort ARGS2(
 */
 PUBLIC CONST HTStreamClass HTPlain =
 {		
-	"SocketWriter",
+	"PlainPresenter",
 	HTPlain_free,
 	HTPlain_abort,
 	HTPlain_put_character, 	HTPlain_put_string, HTPlain_write,
@@ -566,7 +602,7 @@ PUBLIC HTStream* HTPlainPresent ARGS3(
 #ifdef EXP_CHARTRANS
     me->utf_count = 0;
     me->utf_char = 0;
-    me->utf_buf[0] = me->utf_buf[6] = '\0';
+    me->utf_buf[0] = me->utf_buf[6] =me->utf_buf[7] = '\0';
     me->utf_buf_p = me->utf_buf;
     me->htext_char_set =
 		      HTAnchor_getUCLYhndl(anchor,UCT_STAGE_HTEXT);
diff --git a/WWW/Library/Implementation/HTString.c b/WWW/Library/Implementation/HTString.c
index 3f3c9f4b..2ffa3e51 100644
--- a/WWW/Library/Implementation/HTString.c
+++ b/WWW/Library/Implementation/HTString.c
@@ -180,8 +180,11 @@ PUBLIC char * HTNextField ARGS1(
 **      found   points to the delimiter found unless it was NULL.
 **	Returns	a pointer to the first word or NULL on error
 */
-PUBLIC char * HTNextTok (char ** pstr,
-		      const char * delims, const char * bracks, char * found)
+PUBLIC char * HTNextTok ARGS4(
+	char **,	pstr,
+	const char *,	delims,
+	const char *,	bracks,
+	char *,		found)
 {
     char * p = *pstr;
     char * start = NULL;
diff --git a/WWW/Library/Implementation/HTString.h b/WWW/Library/Implementation/HTString.h
index ffc79e64..fb2d3397 100644
--- a/WWW/Library/Implementation/HTString.h
+++ b/WWW/Library/Implementation/HTString.h
@@ -43,8 +43,8 @@ Next word or quoted string
 extern char * HTNextField PARAMS ((char** pstr));
 
 /* A more general parser - kw */
-extern char * HTNextTok (char ** pstr,
-		      const char * delims, const char * bracks, char * found);
+extern char * HTNextTok PARAMS((char ** pstr,
+		      const char * delims, const char * bracks, char * found));
 
 #endif
 /*
diff --git a/WWW/Library/Implementation/HTTP.c b/WWW/Library/Implementation/HTTP.c
index 2fc9145b..595f39bd 100644
--- a/WWW/Library/Implementation/HTTP.c
+++ b/WWW/Library/Implementation/HTTP.c
@@ -364,10 +364,20 @@ try_again:
       }
 
       if (!(LYUserSpecifiedURL ||
-      	    LYNoRefererHeader || LYNoRefererForThis) &&
-         strcmp((char *)HTLoadedDocumentURL(), "")) {
+	    LYNoRefererHeader || LYNoRefererForThis) &&
+	  strcmp(HTLoadedDocumentURL(), "")) {
+	  char *cp = HTLoadedDocumentURL();
           StrAllocCat(command, "Referer: ");
-          StrAllocCat(command, (char *)HTLoadedDocumentURL());
+	  if (!strncasecomp(cp, "LYNXIMGMAP:", 11)) {
+	      char *cp1 = strchr(cp, '#');
+	      if (cp1)
+		  *cp1 = '\0';
+	      StrAllocCat(command, cp + 11);
+	      if (cp1)
+		  *cp1 = '#';
+	  } else {
+	      StrAllocCat(command, cp);
+	  }
           sprintf(line, "%c%c", CR, LF);
           StrAllocCat(command, line);
       }
@@ -653,7 +663,7 @@ try_again:
     BOOL end_of_file = NO;
     int buffer_length = INIT_LINE_SIZE;
 
-    line_buffer = (char *) calloc(1, buffer_length * sizeof(char));
+    line_buffer = (char *)calloc(1, (buffer_length * sizeof(char)));
 
     do {/* Loop to read in the first line */
         /*
@@ -662,7 +672,7 @@ try_again:
         if (buffer_length - length < LINE_EXTEND_THRESH) {
             buffer_length = buffer_length + buffer_length;
             line_buffer = 
-              (char *) realloc(line_buffer, buffer_length * sizeof(char));
+              (char *)realloc(line_buffer, (buffer_length * sizeof(char)));
         }
         if (TRACE)
             fprintf (stderr, "HTTP: Trying to read %d\n",
@@ -672,8 +682,10 @@ try_again:
         if (TRACE)
             fprintf (stderr, "HTTP: Read %d\n", status);
         if (status <= 0) {
-            /* Retry if we get nothing back too; 
-               bomb out if we get nothing twice. */
+            /*
+	     *  Retry if we get nothing back too.
+             *  Bomb out if we get nothing twice.
+	     */
             if (status == HT_INTERRUPTED) {
                 if (TRACE)
                     fprintf (stderr, "HTTP: Interrupted initial read.\n");
@@ -728,7 +740,7 @@ try_again:
 
         if (line_buffer) {
             FREE(line_kept_clean);
-            line_kept_clean = (char *)malloc (buffer_length * sizeof (char));
+            line_kept_clean = (char *)malloc(buffer_length * sizeof(char));
             memcpy(line_kept_clean, line_buffer, buffer_length);
         }
 
@@ -924,8 +936,9 @@ try_again:
 		 *  No Content.
 		 */
 	        HTAlert(line_buffer);
+                HTTP_NETCLOSE(s, handle);
 	        status = HT_NO_DATA;
-	        goto done;
+	        goto clean_up;
 		break;
 
 	      case 205:
@@ -937,8 +950,9 @@ try_again:
 		 *  document. - FM
 		 */
 	        HTAlert("Request fulfilled.  Reset Content.");
+                HTTP_NETCLOSE(s, handle);
 	        status = HT_NO_DATA;
-	        goto done;
+	        goto clean_up;
 		break;
 
 	      case 206:
@@ -951,7 +965,7 @@ try_again:
 	        HTAlert(line_buffer);
                 HTTP_NETCLOSE(s, handle);
 	        status = HT_NO_DATA;
-	        goto done;
+	        goto clean_up;
 		break;
 
 	      default:
diff --git a/WWW/Library/Implementation/SGML.c b/WWW/Library/Implementation/SGML.c
index 6e64714f..4af1fab9 100644
--- a/WWW/Library/Implementation/SGML.c
+++ b/WWW/Library/Implementation/SGML.c
@@ -44,6 +44,8 @@ PUBLIC BOOL HTPassEightBitNum = FALSE;	/* Pass ^ numeric entities raw.	*/
 PUBLIC BOOL HTPassHighCtrlRaw = FALSE;	/* Pass 127-160,173,&#127; raw.	*/
 PUBLIC BOOL HTPassHighCtrlNum = FALSE;	/* Pass &#128;-&#159; raw.	*/
 
+extern UCode_t HTMLGetEntityUCValue PARAMS((CONST char *name));
+extern int LYlowest_eightbit[];
 
 /*	The State (context) of the parser
 **
@@ -79,6 +81,7 @@ struct _HTStream {
 
     HTTag 			*current_tag;
     CONST HTTag 		*unknown_tag;
+    BOOL			inSELECT;
     int 			current_attribute_number;
     HTChunk			*string;
     HTElement			*element_stack;
@@ -109,7 +112,6 @@ struct _HTStream {
     BOOL			second_bracket;
     BOOL			isHex;
 
-#ifdef EXP_CHARTRANS
     HTParentAnchor *		node_anchor;
     LYUCcharset	*		UCI;		/* pointer to anchor UCInfo */
     int				in_char_set;	/* charset we are fed	    */
@@ -121,7 +123,6 @@ struct _HTStream {
     char *			utf_buf_p;
     UCTransParams		T;
     int			current_tag_charset; /* charset to pass attributes */
-#endif /* EXP_CHARTRANS */
 
     char *			recover;
     int				recover_index;
@@ -168,7 +169,8 @@ PRIVATE void set_chartrans_handling ARGS3(
     } else if (context->T.do_8bitraw ||
 	       context->T.use_raw_char_in) {
 	context->current_tag_charset = context->in_char_set;
-    } else if (context->T.trans_from_uni || context->T.output_utf8) {
+    } else if (context->T.output_utf8 ||
+    	       context->T.trans_from_uni) {
 	context->current_tag_charset = UCGetLYhndl_byMIME("unicode-1-1-utf-8");
     } else {
 	context->current_tag_charset = 0;
@@ -321,6 +323,23 @@ PRIVATE BOOL put_special_unicodes ARGS2(
 	**  Use ASCII hyphen for ndash/endash or mdash/emdash.
 	*/
 	PUTC('-');
+#ifdef NOTUSED_FOTEMODS
+    } else if (code == 8204 || code == 8205) {
+	/*
+	**  Ignore zwnj or zwj, for now.  Note that zwnj may have
+	**  been handled as <WBR> by the calling function. - FM
+	*/
+	if (TRACE) {
+	    fprintf(stderr, "put_special_unicodes: Ignoring '%ld'.\n", code);
+	}
+    } else if (code == 8206 || code == 8207) {
+	/*
+	**  Ignore lrm or rlm, for now.
+	*/
+	if (TRACE) {
+	    fprintf(stderr, "put_special_unicodes: Ignoring '%ld'.\n", code);
+	}
+#endif /* NOTUSED_FOTEMODS */
     } else {
 	/*
 	**  Return NO if nothing done.
@@ -348,28 +367,31 @@ PRIVATE BOOL put_special_unicodes ARGS2(
 **
 ** Modified more (for use with Lynx character translation code):
 */
-
-#ifdef EXP_CHARTRANS
 PRIVATE char replace_buf [64];        /* buffer for replacement strings */
-#endif
-
 PRIVATE BOOL FoundEntity = FALSE;
 
+#define IncludesLatin1Enc(cs) \
+		(cs == 0 || \
+		 (context->htmlUCI && \
+		  (context->htmlUCI->enc & (UCT_CP_SUPERSETOF_LAT1))))
+
 PRIVATE void handle_entity ARGS2(
 	HTStream *,	context,
 	char,		term)
 {
     CONST char ** entities = context->dtd->entity_names;
+#ifdef NOTDEFINED
     CONST UC_entity_info * extra_entities = context->dtd->extra_entity_info;
-    extern int current_char_set;
     int rc;
+#endif /* NOTDEFINED */
+    UCode_t code;
+    long uck;
     CONST char *s = context->string->data;
     int high, low, i, diff;
 
     /*
-    **  Use Lynx special characters directly for nbsp, ensp, emsp,
-    **  thinsp, and shy so we go through the HTML_put_character()
-    **  filters instead of using HTML_put_string(). - FM
+    **  Use Lynx special characters for nbsp (160), ensp (8194),
+    **  emsp (8195), thinsp (8201), and shy (173). - FM
     */
     if (!strcmp(s, "nbsp")) {
         PUTC(HT_NON_BREAK_SPACE);
@@ -387,13 +409,122 @@ PRIVATE void handle_entity ARGS2(
 	return;
     }
 
+#ifdef NOTUSED_FOTEMODS
+    /*
+    **  For ndash or endash (8211), and mdash or emdash (8212),
+    **  use an ASCII hyphen (32). - FM
+    */
+    if (!strcmp(s, "ndash") ||
+	!strcmp(s, "endash") ||
+	!strcmp(s, "mdash") ||
+	!strcmp(s, "endash")) {
+	PUTC('-');
+	FoundEntity = TRUE;
+	return;
+    }
+
+    /*
+    **  Ignore zwnj (8204) and zwj (8205), for now.
+    **  Note that zwnj may have been handled as <WBR>
+    **  by the calling function. - FM
+    */
+    if (!strcmp(s, "zwnj") ||
+	!strcmp(s, "zwnj")) {
+	if (TRACE) {
+	    fprintf(stderr, "handle_entity: Ignoring '%s'.\n", s);
+	}
+	FoundEntity = TRUE;
+	return;
+    }
+
+    /*
+    **  Ignore lrm (8206), and rln (8207), for now. - FM
+    */
+    if (!strcmp(s, "lrm") ||
+	!strcmp(s, "rlm")) {
+	if (TRACE) {
+	    fprintf(stderr, "handle_entity: Ignoring '%s'.\n", s);
+	}
+	FoundEntity = TRUE;
+	return;
+    }
+#endif /* NOTUSED_FOTEMODS */
+
     /*
     **  Handle all other entities normally. - FM
     */
     FoundEntity = FALSE;
+    if ((code = HTMLGetEntityUCValue(s)) != 0) {
+        /*
+	**  We got a Unicode value for the entity name.
+	**  Check for special Unicodes. - FM
+	*/
+	if (put_special_unicodes(context, code)) {  
+	    FoundEntity = TRUE;
+	    return;
+	}
+	/*
+	**  Seek a translation from the chartrans tables.
+	*/
+	if ((uck = UCTransUniChar(code, context->html_char_set)) >= 32 &&
+	    uck < 256 &&
+	    (uck < 127 ||
+	     uck >= LYlowest_eightbit[context->html_char_set])) {
+	    if (uck == 160 && IncludesLatin1Enc(context->html_char_set)) {
+		/*
+		**  Would only happen if some other Unicode
+		**  is mapped to Latin-1 160.
+		*/
+		PUTC(HT_NON_BREAK_SPACE);
+	    } else if (uck == 173 && IncludesLatin1Enc(context->html_char_set)) {
+		/*
+		**  Would only happen if some other Unicode
+		**  is mapped to Latin-1 173.
+		*/
+		PUTC(LY_SOFT_HYPHEN);
+	    } else {
+		PUTC(FROMASCII((char)uck));
+	    }
+	    FoundEntity = TRUE;
+	    return;
+	} else if ((uck == -4 ||
+		    (context->T.repl_translated_C0 &&
+		     uck > 0 && uck < 32)) &&
+		   /*
+		   **  Not found; look for replacement string.
+		   */
+		   (uck = UCTransUniCharStr(replace_buf, 60, code,
+					    context->html_char_set, 0) >= 0)) {
+	    CONST char *p;
+	    for (p = replace_buf; *p; p++)
+		PUTC(*p);
+	    FoundEntity = TRUE;
+	    return;
+	}
+	/*
+	**  If we're displaying UTF-8, try that now. - FM
+	*/
+	if (context->T.output_utf8 && PUTUTF8(code)) {
+	    FoundEntity = TRUE;
+	    return;
+	}
+	/*
+	**  If it's safe ASCII, use it. - FM
+	*/
+        if (code >= 32 && code < 127) {
+	    PUTC(FROMASCII((char)code));
+	    FoundEntity = TRUE;
+	    return;
+	}
+    }
+
+    /*
+    **  We haven't succeeded yet, so try the old LYCharSets
+    **  arrays for translation strings. - FM
+    */
     for (low = 0, high = context->dtd->number_of_entities;
     	 high > low;
-	 diff < 0 ? (low = i+1) : (high = i)) {  /* Binary serach */
+	 diff < 0 ? (low = i+1) : (high = i)) {  /* Binary search */
 	i = (low + (high-low)/2);
 	diff = strcmp(entities[i], s);	/* Case sensitive! */
 	if (diff == 0) {		/* success: found it */
@@ -402,38 +533,43 @@ PRIVATE void handle_entity ARGS2(
 	    return;
 	}
     }
-#ifdef EXP_CHARTRANS
+
+#ifdef NOTDEFINED
     /* repeat for extra entities if not found... hack... -kw */
     if (TRACE)
 	fprintf(stderr,
 		"SGML: Unknown entity %s so far, checking extra...\n", s); 
     for (low = 0, high = context->dtd->number_of_extra_entities;
     	 high > low;
-	 diff < 0 ? (low = i+1) : (high = i)) {  /* Binary serach */
-	i = (low + (high-low)/2);
-	diff = strcmp(extra_entities[i].name, s);	/* Case sensitive! */
-	if (diff==0) {			/* success: found it */
-	  if (put_special_unicodes(context, extra_entities[i].code)) {  
-	    FoundEntity = TRUE;
-	    return;
-	  } else if (context->T.output_utf8 &&
-		     PUTUTF8(extra_entities[i].code)) {
-	    FoundEntity = TRUE;
-	    return;
-	  }
+	 diff < 0 ? (low = i+1) : (high = i)) {   /* Binary search */
+	i = (low + (high - low)/2);
+	diff = strcmp(extra_entities[i].name, s); /* Case sensitive! */
+	if (diff == 0) {		/* success: found it */
+	    if (put_special_unicodes(context, extra_entities[i].code)) {  
+		FoundEntity = TRUE;
+		return;
+	    } else if (context->T.output_utf8 &&
+		       PUTUTF8(extra_entities[i].code)) {
+		FoundEntity = TRUE;
+		return;
+	    }
 	    if ((rc = UCTransUniChar(extra_entities[i].code,
-				     current_char_set)) > 0) {
+				     context->html_char_set)) > 0 &&
+		rc < 256) {
 		/*
-		 *  Could do further checks here... - KW
-		 */
-	    PUTC(rc);
-	    FoundEntity = TRUE;
-	    return;
+		**  Could do further checks here... - KW
+		*/
+		PUTC(FROMASCII((char)rc));
+		FoundEntity = TRUE;
+		return;
 	    } else if ((rc == -4) &&
-		       /* Not found; look for replacement string */
+		       /*
+		       **  Not found; look for replacement string.
+		       */
 		       (rc = UCTransUniCharStr(replace_buf, 60,
 					       extra_entities[i].code,
-					       current_char_set, 0) >= 0)) {
+					       context->html_char_set,
+					       0) >= 0)) {
 		CONST char *p;
 		for (p = replace_buf; *p; p++)
 		    PUTC(*p);
@@ -442,13 +578,14 @@ PRIVATE void handle_entity ARGS2(
 	    } 
 	    rc = (*context->actions->put_entity)(context->target,
 					  i+context->dtd->number_of_entities);
-	  if (rc != HT_CANNOT_TRANSLATE) {
-	      FoundEntity = TRUE;
-	      return;
-	  }
+	    if (rc != HT_CANNOT_TRANSLATE) {
+		FoundEntity = TRUE;
+		return;
+	    }
 	}
     }
-#endif
+#endif /* NOTDEFINED */
+
     /*
     **  If entity string not found, display as text.
     */
@@ -625,6 +762,9 @@ PRIVATE void do_close_stacked ARGS1(
     HTElement * stacked = context->element_stack;
     if (!stacked)
 	return;			/* stack was empty */
+    if (context->inSELECT && !strcasecomp(stacked->tag->name, "SELECT")) {
+	context->inSELECT = FALSE;
+    }
     (*context->actions->end_element)(
         context->target,
         stacked->tag - context->dtd->tags,
@@ -635,8 +775,8 @@ PRIVATE void do_close_stacked ARGS1(
 PRIVATE int is_on_stack ARGS2(
 	HTStream *,	context,
 	HTTag *,	old_tag)
-{
-    HTElement * stacked = context->element_stack;
+{ 
+   HTElement * stacked = context->element_stack;
     int i = 1;
     for (; stacked; stacked = stacked->next, i++) {
 	if (stacked->tag == old_tag)
@@ -700,10 +840,36 @@ PRIVATE void end_element ARGS2(
 	    return;
 	}
     }
-    /* Now let the old code deal with the rest... - kw */
+    /* Now let the non-extended code deal with the rest. - kw */
 
 #endif /* EXTENDED_HTMLDTD */
 
+    /*
+    **  If we are in a SELECT block, ignore anything
+    **  but a SELECT end tag. - FM
+    */
+    if (context->inSELECT) {
+	if (!strcasecomp(old_tag->name, "SELECT")) {
+	    /*
+	    **  Turn off the inSELECT flag and fall through. - FM
+	    */
+	    context->inSELECT = FALSE;
+	} else {
+	    /*
+	    **  Ignore the end tag. - FM
+	    */
+	    if (TRACE) {
+		fprintf(stderr,
+			"SGML: Ignoring end tag </%s> in SELECT block.\n",
+	    		old_tag->name);
+	    }
+	    return;
+	}
+    }
+
+    /*
+    **  Handle the end tag. - FM
+    */
     if (TRACE)
         fprintf(stderr, "SGML: End </%s>\n", old_tag->name);
     if (old_tag->contents == SGML_EMPTY) {
@@ -832,10 +998,67 @@ PRIVATE void start_element ARGS1(
 			new_tag->name);
 	}
     }
-    /* fall through to the old code - kw */
+    /* Fall through to the non-extended code - kw */
 
 #endif /* EXTENDED_HTMLDTD */
 
+    /*
+    **  If we are not in a SELECT block, check if this is
+    **  a SELECT start tag.  Otherwise (i.e., we are in a
+    **  SELECT block) accept only OPTION as valid, terminate
+    **  the SELECT block if it is any other form-related
+    **  element, and otherwise ignore it. - FM
+    */
+    if (!context->inSELECT) {
+        /*
+	**  We are not in a SELECT block, so check if this starts one. - FM
+	*/
+	if (!strcasecomp(new_tag->name, "SELECT")) {
+	    /*
+	    **  Set the inSELECT flag and fall through. - FM
+	    */
+	    context->inSELECT = TRUE;
+	}
+    } else {
+        /*
+	**  We are in a SELECT block. - FM
+	*/
+        if (strcasecomp(new_tag->name, "OPTION")) {
+	    /*
+	    **  Ugh, it is not an OPTION. - FM
+	    */
+	    if (!strcasecomp(new_tag->name, "INPUT") ||
+	        !strcasecomp(new_tag->name, "TEXTAREA") ||
+		!strcasecomp(new_tag->name, "SELECT") ||
+		!strcasecomp(new_tag->name, "BUTTON") ||
+		!strcasecomp(new_tag->name, "FIELDSET") ||
+		!strcasecomp(new_tag->name, "LABEL") ||
+		!strcasecomp(new_tag->name, "LEGEND") ||
+		!strcasecomp(new_tag->name, "FORM")) {
+		/*
+		**  It is another form-related start tag, so terminate
+		**  the current SELECT block and fall through. - FM
+		*/
+		if (TRACE)
+		    fprintf(stderr,
+		       "SGML: Faking SELECT end tag before <%s> start tag.\n",
+			    new_tag->name);
+		end_element(context, SGMLFindTag(context->dtd, "SELECT"));
+	    } else {
+	        /*
+		**  Ignore the start tag. - FM
+		*/
+		if (TRACE)
+		    fprintf(stderr,
+			  "SGML: Ignoring start tag <%s> in SELECT block.\n",
+			    new_tag->name);
+		return;
+	    }
+	}
+    }
+    /*
+    **  Handle the start tag. - FM
+    */
     if (TRACE)
         fprintf(stderr, "SGML: Start <%s>\n", new_tag->name);
     (*context->actions->start_element)(
@@ -1016,11 +1239,7 @@ PUBLIC void SGML_character ARGS2(
     CONST SGML_dtd *dtd	=	context->dtd;
     HTChunk	*string = 	context->string;
     CONST char * EntityName;
-    extern int current_char_set;
     extern CONST char * HTMLGetEntityName PARAMS((int i));
-
-#ifdef EXP_CHARTRANS
-    extern int LYlowest_eightbit[];
     char * p;
     BOOLEAN chk;	/* Helps (?) walk through all the else ifs... */
     UCode_t clong, uck;	/* Enough bits for UCS4 ... */
@@ -1033,12 +1252,6 @@ PUBLIC void SGML_character ARGS2(
     */
 #define unsign_c clong
 
-#else
-#define c c_in
-#define unsign_c (unsigned char)c
-#endif    
-
-#ifdef EXP_CHARTRANS
     c = c_in;
     clong = (unsigned char)c;	/* a.k.a. unsign_c */
 
@@ -1119,7 +1332,7 @@ PUBLIC void SGML_character ARGS2(
 	if (clong > 0) {
 	    saved_char_in = c;
 	    if (clong < 256) {
-		c = (char)clong;
+		c = FROMASCII((char)clong);
 	    }
 	}
 	goto top1;
@@ -1134,7 +1347,7 @@ PUBLIC void SGML_character ARGS2(
 	      (clong = UCTransToUni(c, context->in_char_set)) > 0))) {
 	    saved_char_in = c;
 	    if (clong < 256) {
-		c = (char)clong;
+		c = FROMASCII((char)clong);
 	    }
 	    goto top1;
 	} else {
@@ -1169,13 +1382,11 @@ PUBLIC void SGML_character ARGS2(
 	goto top0a;
     }
 
-    /* At this point we have either unsign_c a.k.a. clong in Unicode
-       (and c in latin1 if clong is in the latin1 range),
-       or unsign_c and c will have to be passed raw. */
-
-#endif /* EXP_CHARTRANS */
-
-
+    /*
+    **  At this point we have either unsign_c a.k.a. clong in
+    **  Unicode (and c in latin1 if clong is in the latin1 range),
+    **  or unsign_c and c will have to be passed raw. - KW
+    */
 top:
 #ifdef EXP_CHARTRANS
     saved_char_in = '\0';
@@ -1283,7 +1494,6 @@ top1:
 		   !(PASS8859SPECL || HTCJK != NOCJK)) {
             PUTC(LY_SOFT_HYPHEN);
 
-#ifdef EXP_CHARTRANS
 	} else if (context->T.use_raw_char_in && saved_char_in) {
 	    /*
 	    **  Only if the original character is still in saved_char_in,
@@ -1300,8 +1510,8 @@ top1:
 		   uck < 256) {
 	    if (TRACE) {
 		fprintf(stderr,
-			"UCTransUniChar returned 0x%lx:'%c'.\n",
-			uck, (char)uck);
+			"UCTransUniChar returned 0x%.2lX:'%c'.\n",
+			uck, FROMASCII((char)uck));
 	    }
 	    c = (char)(uck & 0xff);
 	    PUTC(c);
@@ -1325,8 +1535,6 @@ top1:
 	*/
 	} else if (context->T.output_utf8 && PUTUTF8(clong)) {
 	    ; /* do nothing more */
-#endif /* EXP_CHARTRANS */
-
 	/*
 	**  If it's any other (> 160) 8-bit chararcter, and
 	**  we have not set HTPassEightBitRaw nor HTCJK, nor
@@ -1337,14 +1545,10 @@ top1:
 #define PASSHI8BIT HTPassEightBitRaw
 #else
 #define PASSHI8BIT (HTPassEightBitRaw || (context->T.do_8bitraw && !context->T.trans_from_uni))
-#define IncludesLatin1Enc(cs) \
-		(cs == 0 || \
-		 (context->htmlUCI && \
-		  (context->htmlUCI->enc & (UCT_CP_SUPERSETOF_LAT1))))
 #endif /* EXP_CHARTRANS */
 	} else if (unsign_c > 160 && unsign_c < 256 &&
 		   !(PASSHI8BIT || HTCJK != NOCJK) &&
-		   !IncludesLatin1Enc(current_char_set)) {
+		   !IncludesLatin1Enc(context->html_char_set)) {
 	    int i;
 	    int value;
 
@@ -1514,7 +1718,7 @@ top1:
     **  Check for a numeric entity.
     */
     case S_cro:
-	if (unsign_c < 127 && (unsigned char)c == 'x') {
+	if (unsign_c < 127 && TOLOWER((unsigned char)c) == 'x') {
 	    context->isHex = TRUE;
 	    context->state = S_incro;
 	} else if (unsign_c < 127 && isdigit((unsigned char)c)) {
@@ -1628,17 +1832,20 @@ top1:
 		/*
 		 *  Seek a translation from the chartrans tables.
 		 */
-	      if ((uck = UCTransUniChar(value,current_char_set)) >= 32 &&
+		if ((uck = UCTransUniChar(value,
+					  context->html_char_set)) >= 32 &&
 		    uck < 256 &&
 		    (uck < 127 ||
 		     uck >= LYlowest_eightbit[context->html_char_set])) {
-		    if (uck == 160 && current_char_set == 0) {
+		    if (uck == 160 &&
+			IncludesLatin1Enc(context->html_char_set)) {
 			/*
 			**  Would only happen if some other Unicode
 			**  is mapped to Latin-1 160.
 			*/
 			PUTC(HT_NON_BREAK_SPACE);
-		    } else if (uck == 173 && current_char_set == 0) {
+		    } else if (uck == 173 &&
+			IncludesLatin1Enc(context->html_char_set)) {
 			/*
 			**  Would only happen if some other Unicode
 			**  is mapped to Latin-1 173.
@@ -1653,8 +1860,9 @@ top1:
 			   /*
 			   **  Not found; look for replacement string.
 			   */
-		(uck = UCTransUniCharStr(replace_buf,60,value,
-				      current_char_set, 0)   >= 0 ) ) { 
+			   (uck = UCTransUniCharStr(replace_buf, 60, value,
+						    context->html_char_set,
+						    0) >= 0)) { 
 		    for (p = replace_buf; *p; p++) {
 			PUTC(*p);
 		    }
@@ -1676,6 +1884,34 @@ top1:
 		    context->isHex = FALSE;
 		    context->state = S_entity;
 		    goto top1;
+#ifdef NOTUSED_FOTEMODS
+		/*
+		**  If the value is greater than 255 and we do not
+		**  have the "7-bit approximations" as our output
+		**  character set (in which case we did it already)
+		**  seek a translation for that. - FM
+		*/
+		} else if ((chk = ((code > 255) &&
+				   context->html_char_set !=
+				   UCGetLYhndl_byMIME("us-ascii"))) &&
+			   (uck = UCTransUniChar(code,
+				   UCGetLYhndl_byMIME("us-ascii")))
+				  >= 32 && uck < 127) {
+		    /*
+		    **  Got an ASCII character (yippey). - FM
+		    */
+		    PUTC(((char)(uck & 0xff)));
+		} else if ((chk && uck == -4) &&
+			   (uck = UCTransUniCharStr(replace_buf,
+						    60, code,
+						UCGetLYhndl_byMIME("us-ascii"),
+						    0) >= 0)) {
+		    /*
+		    **  Got a replacement string (yippey). - FM
+		    */
+		    for (p = replace_buf; *p; p++)
+			PUTC(*p);
+#endif /* NOTUSED_FOTEMODS */
 	        /*
 		**  Show the numeric entity if we get to here
 		**  and the value:
@@ -1690,13 +1926,13 @@ top1:
 		**  - FM
 		*/
 		} else if ((value > 255) ||
-		    (value < 32 &&
-		     value != 9 && value != 10 && value != 13 &&
-		     HTCJK == NOCJK) ||
-		    (value == 127 &&
-		     !(HTPassHighCtrlRaw || HTCJK != NOCJK)) ||
-		    (value > 127 && value < 160 &&
-		     !HTPassHighCtrlNum)) {
+			   (value < 32 &&
+			    value != 9 && value != 10 && value != 13 &&
+			    HTCJK == NOCJK) ||
+			   (value == 127 &&
+			    !(HTPassHighCtrlRaw || HTCJK != NOCJK)) ||
+			   (value > 127 && value < 160 &&
+			    !HTPassHighCtrlNum)) {
 		    if (value == 8194 || value == 8195 || value == 8201) {
 		        /*
 			**  ensp, emsp or thinsp. - FM
@@ -1728,7 +1964,7 @@ top1:
 			goto top1;
 		    }
 		} else if (value < 161 || HTPassEightBitNum ||
-			   IncludesLatin1Enc(current_char_set)) {
+			   IncludesLatin1Enc(context->html_char_set)) {
 		    /*
 		    **  No conversion needed. - FM
 		    */
@@ -2283,11 +2519,13 @@ top1:
 	} else if (HTCJK == NOCJK && (context->T.output_utf8 ||
 				      context->T.trans_from_uni)) {
 	    if (clong == 0xfffd && saved_char_in && HTPassEightBitRaw &&
-		(unsigned char)saved_char_in >= LYlowest_eightbit[current_char_set])
+		(unsigned char)saved_char_in >=
+		LYlowest_eightbit[context->html_char_set]) {
 		HTChunkPutUtf8Char(string,
 				   (0xf000 | (unsigned char)saved_char_in));
-	    else
+	    } else {
 		HTChunkPutUtf8Char(string, clong);
+	    }
 	} else if (saved_char_in && context->T.use_raw_char_in) {
 	    HTChunkPutc(string, saved_char_in);
 #endif /* EXP_CHARTRANS */
@@ -2318,11 +2556,13 @@ top1:
 	} else if (HTCJK == NOCJK && (context->T.output_utf8 ||
 				      context->T.trans_from_uni)) {
 	    if (clong == 0xfffd && saved_char_in && HTPassEightBitRaw &&
-		(unsigned char)saved_char_in >= LYlowest_eightbit[current_char_set])
+		(unsigned char)saved_char_in >=
+		LYlowest_eightbit[context->html_char_set]) {
 		HTChunkPutUtf8Char(string,
 				   (0xf000 | (unsigned char)saved_char_in));
-	    else
+	    } else {
 		HTChunkPutUtf8Char(string, clong);
+	    }
 	} else if (saved_char_in && context->T.use_raw_char_in) {
 	    HTChunkPutc(string, saved_char_in);
 #endif /* EXP_CHARTRANS */
@@ -2357,11 +2597,13 @@ top1:
 	} else if (HTCJK == NOCJK && (context->T.output_utf8 ||
 				      context->T.trans_from_uni)) {
 	    if (clong == 0xfffd && saved_char_in && HTPassEightBitRaw &&
-		(unsigned char)saved_char_in >= LYlowest_eightbit[current_char_set])
+		(unsigned char)saved_char_in >=
+		LYlowest_eightbit[context->html_char_set]) {
 		HTChunkPutUtf8Char(string,
 				   (0xf000 | (unsigned char)saved_char_in));
-	    else
+	    } else {
 		HTChunkPutUtf8Char(string, clong);
+	    }
 	} else if (saved_char_in && context->T.use_raw_char_in) {
 	    HTChunkPutc(string, saved_char_in);
 #endif /* EXP_CHARTRANS */
@@ -2425,34 +2667,6 @@ top1:
 		    }
 		    break;
 		} else if (tag_OK &&
-			   !strcasecomp(string->data, "P")) {
-		    /*
-		    **  Treat a P end tag like a P start tag (Ugh,
-		    **  what a hack! 8-). - FM
-		    */
-		    if (TRACE)
-		        fprintf(stderr,
-				"SGML: `</%s%c' found!  Treating as '<%s%c'.\n",
-				string->data, c, string->data, c);
-		    {
-		        int i;
-			for (i = 0;
-			     i < context->current_tag->number_of_attributes;
-			     i++) {
-			    context->present[i] = NO;
-			}
-		    }
-		    string->size = 0;
-		    context->current_attribute_number = INVALID;
-		    if (context->current_tag->name)
-			start_element(context);
-		    if (c != '>') {
-			context->state = S_junk_tag;
-		    } else {
-			context->state = S_text;
-		    }
-		    break;
-		} else if (tag_OK &&
 			   (!strcasecomp(string->data, "A") ||
 			    !strcasecomp(string->data, "B") ||
 			    !strcasecomp(string->data, "BLINK") ||
@@ -2461,6 +2675,7 @@ top1:
 			    !strcasecomp(string->data, "FONT") ||
 			    !strcasecomp(string->data, "FORM") ||
 			    !strcasecomp(string->data, "I") ||
+			    !strcasecomp(string->data, "P") ||
 			    !strcasecomp(string->data, "STRONG") ||
 			    !strcasecomp(string->data, "TT") ||
 			    !strcasecomp(string->data, "U"))) {
@@ -2471,12 +2686,69 @@ top1:
 		    **  with checks there to avoid throwing the HTML.c stack
 		    **  out of whack (Ugh, what a hack! 8-). - FM
 		    */
-		    if (TRACE)
-		        fprintf(stderr, "SGML: End </%s>\n", string->data);
-		    (*context->actions->end_element)
-			(context->target,
-			 (context->current_tag - context->dtd->tags),
-			 (char **)&context->include);
+		    if (context->inSELECT) {
+		        /*
+			**  We are in a SELECT block. - FM
+			*/
+			if (strcasecomp(string->data, "FORM")) {
+			    /*
+			    **  It is not at FORM end tag, so ignore it. - FM
+			    */
+			    if (TRACE) {
+				fprintf(stderr,
+			    "SGML: Ignoring end tag </%s> in SELECT block.\n",
+	    				string->data);
+			    }
+			} else {
+			    /*
+			    **  End the SELECT block and then
+			    **  handle the FORM end tag. - FM
+			    */
+			    if (TRACE) {
+				fprintf(stderr,
+			"SGML: Faking SELECT end tag before </%s> end tag.\n",
+					string->data);
+			    }
+			    end_element(context,
+			    		SGMLFindTag(context->dtd, "SELECT"));
+			    if (TRACE) {
+				fprintf(stderr,
+					"SGML: End </%s>\n", string->data);
+			    }
+			    (*context->actions->end_element)
+				(context->target,
+			 	 (context->current_tag - context->dtd->tags),
+				 (char **)&context->include);
+			}
+		    } else if (!strcasecomp(string->data, "P")) {
+			/*
+			**  Treat a P end tag like a P start tag (Ugh,
+			**  what a hack! 8-). - FM
+			*/
+			if (TRACE)
+			    fprintf(stderr,
+				    "SGML: `</%s%c' found!  Treating as '<%s%c'.\n",
+				    string->data, c, string->data, c);
+			{
+			    int i;
+			    for (i = 0;
+				 i < context->current_tag->number_of_attributes;
+				 i++) {
+				context->present[i] = NO;
+			    }
+			}
+			if (context->current_tag->name)
+			    start_element(context);
+		    } else {
+			if (TRACE) {
+			    fprintf(stderr,
+				    "SGML: End </%s>\n", string->data);
+			}
+			(*context->actions->end_element)
+			    (context->target,
+			     (context->current_tag - context->dtd->tags),
+			     (char **)&context->include);
+		    }
 		    string->size = 0;
 		    context->current_attribute_number = INVALID;
 		    if (c != '>') {
@@ -2764,6 +3036,7 @@ PUBLIC HTStream* SGML_new  ARGS3(
     context->unknown_tag = &HTTag_unrecognized;
     context->state = S_text;
     context->element_stack = 0;			/* empty */
+    context->inSELECT = FALSE;
 #ifdef CALLERDATA		  
     context->callerData = (void*) callerData;
 #endif /* CALLERDATA */
diff --git a/WWW/Library/Implementation/UCAux.h b/WWW/Library/Implementation/UCAux.h
index 36c17050..5b2d52ce 100644
--- a/WWW/Library/Implementation/UCAux.h
+++ b/WWW/Library/Implementation/UCAux.h
@@ -20,7 +20,9 @@ typedef enum {
 extern UCTQ_t UCCanUniTranslateFrom PARAMS((int from));
 extern UCTQ_t UCCanTranslateUniTo PARAMS((int to));
 extern UCTQ_t UCCanTranslateFromTo PARAMS((int from, int to));
-extern BOOL UCNeedNotTranslate PARAMS((int from, int to));
+extern BOOL UCNeedNotTranslate PARAMS((
+	int		from,
+	int		to));
 
 struct _UCTransParams
 {
@@ -68,9 +70,17 @@ typedef void putc_func_t PARAMS((
 	HTStream *	me,
 	char		ch));
 
+#ifndef UCMAP_H
+#include "UCMap.h"
+#endif /* UCMAP_H */
+
 extern BOOL UCPutUtf8_charstring PARAMS((
 	HTStream *	target,
 	putc_func_t *	actions,
-	long	code));
+	UCode_t		code));
     
+extern BOOL UCConvertUniToUtf8 PARAMS((
+	UCode_t		code,
+	char *		buffer));
+
 #endif /* UCAUX_H */
diff --git a/WWW/Library/Implementation/UCMap.h b/WWW/Library/Implementation/UCMap.h
index 017ebc92..e634f760 100644
--- a/WWW/Library/Implementation/UCMap.h
+++ b/WWW/Library/Implementation/UCMap.h
@@ -17,7 +17,10 @@ extern int UCTransChar PARAMS((
 	char		ch_in,
 	int		charset_in,
 	int		charset_out));
-PUBLIC int UCReverseTransChar PARAMS((char ch_out, int charset_in, int charset_out));
+extern int UCReverseTransChar PARAMS((
+	char		ch_out,
+	int		charset_in,
+	int		charset_out));
 extern int UCTransCharStr PARAMS((
 	char *		outbuf,
 	int		buflen,
diff --git a/WWW/Library/unix/makefile.in b/WWW/Library/unix/makefile.in
index 14053794..f9fbb160 100644
--- a/WWW/Library/unix/makefile.in
+++ b/WWW/Library/unix/makefile.in
@@ -11,6 +11,7 @@ LFLAGS =
 
 prefix		= @prefix@
 exec_prefix	= @exec_prefix@
+top_srcdir	= @top_srcdir@
 srcdir		= @srcdir@
 VPATH		= $(srcdir)
 
@@ -28,7 +29,7 @@ LIBDIR = $(WWW)/Library/Implementation/$(WWW_MACH)
 
 #_________________ OK if normal W3 distribution
 # Where is the WWW source root?
-WWW = ../..
+WWW = $(top_srcdir)/WWW
 
 #  Where should temporary (object) files go?
 WTMP = ../..
s="w"> copyValue(y.node) of rkRegisterAddr: x.regAddr = y.regAddr of rkNodeAddr: x.nodeAddr = y.nodeAddr proc writeField(n: var PNode, x: TFullReg) = case x.kind of rkNone: discard of rkInt: n.intVal = x.intVal of rkFloat: n.floatVal = x.floatVal of rkNode: n = copyValue(x.node) of rkRegisterAddr: writeField(n, x.regAddr[]) of rkNodeAddr: n = x.nodeAddr[] proc putIntoReg(dest: var TFullReg; n: PNode) = case n.kind of nkStrLit..nkTripleStrLit: dest.kind = rkNode createStr(dest) dest.node.strVal = n.strVal of nkCharLit..nkUInt64Lit: dest.kind = rkInt dest.intVal = n.intVal of nkFloatLit..nkFloat128Lit: dest.kind = rkFloat dest.floatVal = n.floatVal else: dest.kind = rkNode dest.node = n proc regToNode(x: TFullReg): PNode = case x.kind of rkNone: result = newNode(nkEmpty) of rkInt: result = newNode(nkIntLit); result.intVal = x.intVal of rkFloat: result = newNode(nkFloatLit); result.floatVal = x.floatVal of rkNode: result = x.node of rkRegisterAddr: result = regToNode(x.regAddr[]) of rkNodeAddr: result = x.nodeAddr[] template getstr(a: untyped): untyped = (if a.kind == rkNode: a.node.strVal else: $chr(int(a.intVal))) proc pushSafePoint(f: PStackFrame; pc: int) = when not defined(nimNoNilSeqs): if f.safePoints.isNil: f.safePoints = @[] f.safePoints.add(pc) proc popSafePoint(f: PStackFrame) = # XXX this needs a proper fix! if f.safePoints.len > 0: discard f.safePoints.pop() proc cleanUpOnException(c: PCtx; tos: PStackFrame): tuple[pc: int, f: PStackFrame] = let raisedType = c.currentExceptionA.typ.skipTypes(abstractPtrs) var f = tos while true: while f.safePoints.len == 0: f = f.next if f.isNil: return (-1, nil) var pc2 = f.safePoints[f.safePoints.high] var nextExceptOrFinally = -1 if c.code[pc2].opcode == opcExcept: nextExceptOrFinally = pc2 + c.code[pc2].regBx - wordExcess inc pc2 while c.code[pc2].opcode == opcExcept: let excIndex = c.code[pc2].regBx-wordExcess let exceptType = if excIndex > 0: c.types[excIndex].skipTypes( abstractPtrs) else: nil #echo typeToString(exceptType), " ", typeToString(raisedType) if exceptType.isNil or inheritanceDiff(raisedType, exceptType) <= 0: # mark exception as handled but keep it in B for # the getCurrentException() builtin: c.currentExceptionB = c.currentExceptionA c.currentExceptionA = nil # execute the corresponding handler: while c.code[pc2].opcode == opcExcept: inc pc2 discard f.safePoints.pop return (pc2, f) inc pc2 if c.code[pc2].opcode != opcExcept and nextExceptOrFinally >= 0: # we're at the end of the *except list*, but maybe there is another # *except branch*? pc2 = nextExceptOrFinally+1 if c.code[pc2].opcode == opcExcept: nextExceptOrFinally = pc2 + c.code[pc2].regBx - wordExcess if nextExceptOrFinally >= 0: pc2 = nextExceptOrFinally if c.code[pc2].opcode == opcFinally: # execute the corresponding handler, but don't quit walking the stack: discard f.safePoints.pop return (pc2+1, f) # not the right one: discard f.safePoints.pop proc cleanUpOnReturn(c: PCtx; f: PStackFrame): int = for s in f.safePoints: var pc = s while c.code[pc].opcode == opcExcept: pc = pc + c.code[pc].regBx - wordExcess if c.code[pc].opcode == opcFinally: return pc return -1 proc opConv(c: PCtx; dest: var TFullReg, src: TFullReg, desttyp, srctyp: PType): bool = if desttyp.kind == tyString: if dest.kind != rkNode: myreset(dest) dest.kind = rkNode dest.node = newNode(nkStrLit) let styp = srctyp.skipTypes(abstractRange) case styp.kind of tyEnum: let n = styp.n let x = src.intVal.int if x <% n.len and (let f = n.sons[x].sym; f.position == x): dest.node.strVal = if f.ast.isNil: f.name.s else: f.ast.strVal else: for i in 0..<n.len: if n.sons[i].kind != nkSym: internalError(c.config, "opConv for enum") let f = n.sons[i].sym if f.position == x: dest.node.strVal = if f.ast.isNil: f.name.s else: f.ast.strVal return dest.node.strVal = styp.sym.name.s & " " & $x of tyInt..tyInt64: dest.node.strVal = $src.intVal of tyUInt..tyUInt64: dest.node.strVal = $uint64(src.intVal) of tyBool: dest.node.strVal = if src.intVal == 0: "false" else: "true" of tyFloat..tyFloat128: dest.node.strVal = $src.floatVal of tyString: dest.node.strVal = src.node.strVal of tyCString: if src.node.kind == nkBracket: # Array of chars var strVal = "" for son in src.node.sons: let c = char(son.intVal) if c == '\0': break strVal.add(c) dest.node.strVal = strVal else: dest.node.strVal = src.node.strVal of tyChar: dest.node.strVal = $chr(src.intVal) else: internalError(c.config, "cannot convert to string " & desttyp.typeToString) else: case skipTypes(desttyp, abstractRange).kind of tyInt..tyInt64: if dest.kind != rkInt: myreset(dest); dest.kind = rkInt case skipTypes(srctyp, abstractRange).kind of tyFloat..tyFloat64: dest.intVal = int(src.floatVal) else: dest.intVal = src.intVal if dest.intVal < firstOrd(c.config, desttyp) or dest.intVal > lastOrd(c.config, desttyp): return true of tyUInt..tyUInt64: if dest.kind != rkInt: myreset(dest); dest.kind = rkInt case skipTypes(srctyp, abstractRange).kind of tyFloat..tyFloat64: dest.intVal = int(src.floatVal) else: let srcDist = (sizeof(src.intVal) - srctyp.size) * 8 let destDist = (sizeof(dest.intVal) - desttyp.size) * 8 when system.cpuEndian == bigEndian: dest.intVal = (src.intVal shr srcDist) shl srcDist dest.intVal = (dest.intVal shr destDist) shl destDist else: dest.intVal = (src.intVal shl srcDist) shr srcDist dest.intVal = (dest.intVal shl destDist) shr destDist of tyFloat..tyFloat64: if dest.kind != rkFloat: myreset(dest); dest.kind = rkFloat case skipTypes(srctyp, abstractRange).kind of tyInt..tyInt64, tyUInt..tyUInt64, tyEnum, tyBool, tyChar: dest.floatVal = toBiggestFloat(src.intVal) else: dest.floatVal = src.floatVal else: asgnComplex(dest, src) proc compile(c: PCtx, s: PSym): int = result = vmgen.genProc(c, s) when debugEchoCode: c.echoCode result #c.echoCode template handleJmpBack() {.dirty.} = if c.loopIterations <= 0: if allowInfiniteLoops in c.features: c.loopIterations = MaxLoopIterations else: msgWriteln(c.config, "stack trace: (most recent call last)") stackTraceAux(c, tos, pc) globalError(c.config, c.debug[pc], errTooManyIterations) dec(c.loopIterations) proc recSetFlagIsRef(arg: PNode) = arg.flags.incl(nfIsRef) for i in 0 ..< arg.safeLen: arg.sons[i].recSetFlagIsRef proc setLenSeq(c: PCtx; node: PNode; newLen: int; info: TLineInfo) = let typ = node.typ.skipTypes(abstractInst+{tyRange}-{tyTypeDesc}) let oldLen = node.len setLen(node.sons, newLen) if oldLen < newLen: for i in oldLen ..< newLen: node.sons[i] = getNullValue(typ.sons[0], info, c.config) const errIndexOutOfBounds = "index out of bounds" errNilAccess = "attempt to access a nil address" errOverOrUnderflow = "over- or underflow" errConstantDivisionByZero = "division by zero" errIllegalConvFromXtoY = "illegal conversion from '$1' to '$2'" errTooManyIterations = "interpretation requires too many iterations; " & "if you are sure this is not a bug in your code edit " & "compiler/vmdef.MaxLoopIterations and rebuild the compiler" errFieldXNotFound = "node lacks field: " proc rawExecute(c: PCtx, start: int, tos: PStackFrame): TFullReg = var pc = start var tos = tos var regs: seq[TFullReg] # alias to tos.slots for performance move(regs, tos.slots) #echo "NEW RUN ------------------------" while true: #{.computedGoto.} let instr = c.code[pc] let ra = instr.regA #if c.traceActive: when traceCode: echo "PC ", pc, " ", c.code[pc].opcode, " ra ", ra, " rb ", instr.regB, " rc ", instr.regC # message(c.config, c.debug[pc], warnUser, "Trace") case instr.opcode of opcEof: return regs[ra] of opcRet: # XXX perform any cleanup actions pc = tos.comesFrom tos = tos.next let retVal = regs[0] if tos.isNil: #echo "RET ", retVal.rendertree return retVal move(regs, tos.slots) assert c.code[pc].opcode in {opcIndCall, opcIndCallAsgn} if c.code[pc].opcode == opcIndCallAsgn: regs[c.code[pc].regA] = retVal #echo "RET2 ", retVal.rendertree, " ", c.code[pc].regA of opcYldYoid: assert false of opcYldVal: assert false of opcAsgnInt: decodeB(rkInt) regs[ra].intVal = regs[rb].intVal of opcAsgnStr: decodeBC(rkNode) createStrKeepNode regs[ra], rc != 0 regs[ra].node.strVal = regs[rb].node.strVal of opcAsgnFloat: decodeB(rkFloat) regs[ra].floatVal = regs[rb].floatVal of opcAsgnIntFromFloat32: let rb = instr.regB ensureKind(rkInt) regs[ra].intVal = cast[int32](float32(regs[rb].floatVal)) of opcAsgnIntFromFloat64: let rb = instr.regB ensureKind(rkInt) regs[ra].intVal = cast[int64](regs[rb].floatVal) of opcAsgnFloat32FromInt: let rb = instr.regB ensureKind(rkFloat) regs[ra].floatVal = cast[float32](int32(regs[rb].intVal)) of opcAsgnFloat64FromInt: let rb = instr.regB ensureKind(rkFloat) regs[ra].floatVal = cast[float64](int64(regs[rb].intVal)) of opcAsgnComplex: asgnComplex(regs[ra], regs[instr.regB]) of opcAsgnRef: asgnRef(regs[ra], regs[instr.regB]) of opcNodeToReg: let ra = instr.regA let rb = instr.regB # opcDeref might already have loaded it into a register. XXX Let's hope # this is still correct this way: if regs[rb].kind != rkNode: regs[ra] = regs[rb] else: assert regs[rb].kind == rkNode let nb = regs[rb].node case nb.kind of nkCharLit..nkUInt64Lit: ensureKind(rkInt) regs[ra].intVal = nb.intVal of nkFloatLit..nkFloat64Lit: ensureKind(rkFloat) regs[ra].floatVal = nb.floatVal else: ensureKind(rkNode) regs[ra].node = nb of opcLdArr: # a = b[c] decodeBC(rkNode) if regs[rc].intVal > high(int): stackTrace(c, tos, pc, errIndexOutOfBounds) let idx = regs[rc].intVal.int let src = regs[rb].node if src.kind in {nkStrLit..nkTripleStrLit}: if idx <% src.strVal.len: regs[ra].node = newNodeI(nkCharLit, c.debug[pc]) regs[ra].node.intVal = src.strVal[idx].ord else: stackTrace(c, tos, pc, errIndexOutOfBounds) elif src.kind notin {nkEmpty..nkFloat128Lit} and idx <% src.len: regs[ra].node = src.sons[idx] else: stackTrace(c, tos, pc, errIndexOutOfBounds) of opcLdStrIdx: decodeBC(rkInt) let idx = regs[rc].intVal.int let s = regs[rb].node.strVal if idx <% s.len: regs[ra].intVal = s[idx].ord elif idx == s.len and optLaxStrings in c.config.options: regs[ra].intVal = 0 else: stackTrace(c, tos, pc, errIndexOutOfBounds) of opcWrArr: # a[b] = c decodeBC(rkNode) let idx = regs[rb].intVal.int let arr = regs[ra].node if arr.kind in {nkStrLit..nkTripleStrLit}: if idx <% arr.strVal.len: arr.strVal[idx] = chr(regs[rc].intVal) else: stackTrace(c, tos, pc, errIndexOutOfBounds) elif idx <% arr.len: writeField(arr.sons[idx], regs[rc]) else: stackTrace(c, tos, pc, errIndexOutOfBounds) of opcLdObj: # a = b.c decodeBC(rkNode) let src = regs[rb].node case src.kind of nkEmpty..nkNilLit: stackTrace(c, tos, pc, errNilAccess) of nkObjConstr: let n = src.sons[rc + 1].skipColon regs[ra].node = n else: let n = src.sons[rc] regs[ra].node = n of opcWrObj: # a.b = c decodeBC(rkNode) let shiftedRb = rb + ord(regs[ra].node.kind == nkObjConstr) let dest = regs[ra].node if dest.kind == nkNilLit: stackTrace(c, tos, pc, errNilAccess) elif dest.sons[shiftedRb].kind == nkExprColonExpr: writeField(dest.sons[shiftedRb].sons[1], regs[rc]) else: writeField(dest.sons[shiftedRb], regs[rc]) of opcWrStrIdx: decodeBC(rkNode) let idx = regs[rb].intVal.int if idx <% regs[ra].node.strVal.len: regs[ra].node.strVal[idx] = chr(regs[rc].intVal) else: stackTrace(c, tos, pc, errIndexOutOfBounds) of opcAddrReg: decodeB(rkRegisterAddr) regs[ra].regAddr = addr(regs[rb]) of opcAddrNode: decodeB(rkNodeAddr) if regs[rb].kind == rkNode: regs[ra].nodeAddr = addr(regs[rb].node) else: stackTrace(c, tos, pc, "limited VM support for 'addr'") of opcLdDeref: # a = b[] let ra = instr.regA let rb = instr.regB case regs[rb].kind of rkNodeAddr: ensureKind(rkNode) regs[ra].node = regs[rb].nodeAddr[] of rkRegisterAddr: ensureKind(regs[rb].regAddr.kind) regs[ra] = regs[rb].regAddr[] of rkNode: if regs[rb].node.kind == nkNilLit: stackTrace(c, tos, pc, errNilAccess) if regs[rb].node.kind == nkRefTy: regs[ra].node = regs[rb].node.sons[0] else: ensureKind(rkNode) regs[ra].node = regs[rb].node else: stackTrace(c, tos, pc, errNilAccess) of opcWrDeref: # a[] = c; b unused let ra = instr.regA let rc = instr.regC case regs[ra].kind of rkNodeAddr: let n = regs[rc].regToNode # `var object` parameters are sent as rkNodeAddr. When they are mutated # vmgen generates opcWrDeref, which means that we must dereference # twice. # TODO: This should likely be handled differently in vmgen. if (nfIsRef notin regs[ra].nodeAddr[].flags and nfIsRef notin n.flags): regs[ra].nodeAddr[][] = n[] else: regs[ra].nodeAddr[] = n of rkRegisterAddr: regs[ra].regAddr[] = regs[rc] of rkNode: if regs[ra].node.kind == nkNilLit: stackTrace(c, tos, pc, errNilAccess) assert nfIsRef in regs[ra].node.flags regs[ra].node[] = regs[rc].regToNode[] regs[ra].node.flags.incl nfIsRef else: stackTrace(c, tos, pc, errNilAccess) of opcAddInt: decodeBC(rkInt) let bVal = regs[rb].intVal cVal = regs[rc].intVal sum = bVal +% cVal if (sum xor bVal) >= 0 or (sum xor cVal) >= 0: regs[ra].intVal = sum else: stackTrace(c, tos, pc, errOverOrUnderflow) of opcAddImmInt: decodeBImm(rkInt) #message(c.config, c.debug[pc], warnUser, "came here") #debug regs[rb].node let bVal = regs[rb].intVal cVal = imm sum = bVal +% cVal if (sum xor bVal) >= 0 or (sum xor cVal) >= 0: regs[ra].intVal = sum else: stackTrace(c, tos, pc, errOverOrUnderflow) of opcSubInt: decodeBC(rkInt) let bVal = regs[rb].intVal cVal = regs[rc].intVal diff = bVal -% cVal if (diff xor bVal) >= 0 or (diff xor not cVal) >= 0: regs[ra].intVal = diff else: stackTrace(c, tos, pc, errOverOrUnderflow) of opcSubImmInt: decodeBImm(rkInt) let bVal = regs[rb].intVal cVal = imm diff = bVal -% cVal if (diff xor bVal) >= 0 or (diff xor not cVal) >= 0: regs[ra].intVal = diff else: stackTrace(c, tos, pc, errOverOrUnderflow) of opcLenSeq: decodeBImm(rkInt) #assert regs[rb].kind == nkBracket let high = (imm and 1) # discard flags if (imm and nimNodeFlag) != 0: # used by mNLen (NimNode.len) regs[ra].intVal = regs[rb].node.safeLen - high else: # safeArrLen also return string node len # used when string is passed as openArray in VM regs[ra].intVal = regs[rb].node.safeArrLen - high of opcLenStr: decodeBImm(rkInt) assert regs[rb].kind == rkNode regs[ra].intVal = regs[rb].node.strVal.len - imm of opcIncl: decodeB(rkNode) let b = regs[rb].regToNode if not inSet(regs[ra].node, b): addSon(regs[ra].node, copyTree(b)) of opcInclRange: decodeBC(rkNode) var r = newNode(nkRange) r.add regs[rb].regToNode r.add regs[rc].regToNode addSon(regs[ra].node, r.copyTree) of opcExcl: decodeB(rkNode) var b = newNodeIT(nkCurly, regs[ra].node.info, regs[ra].node.typ) addSon(b, regs[rb].regToNode) var r = diffSets(c.config, regs[ra].node, b) discardSons(regs[ra].node) for i in countup(0, sonsLen(r) - 1): addSon(regs[ra].node, r.sons[i]) of opcCard: decodeB(rkInt) regs[ra].intVal = nimsets.cardSet(c.config, regs[rb].node) of opcMulInt: decodeBC(rkInt) let bVal = regs[rb].intVal cVal = regs[rc].intVal product = bVal *% cVal floatProd = toBiggestFloat(bVal) * toBiggestFloat(cVal) resAsFloat = toBiggestFloat(product) if resAsFloat == floatProd: regs[ra].intVal = product elif 32.0 * abs(resAsFloat - floatProd) <= abs(floatProd): regs[ra].intVal = product else: stackTrace(c, tos, pc, errOverOrUnderflow) of opcDivInt: decodeBC(rkInt) if regs[rc].intVal == 0: stackTrace(c, tos, pc, errConstantDivisionByZero) else: regs[ra].intVal = regs[rb].intVal div regs[rc].intVal of opcModInt: decodeBC(rkInt) if regs[rc].intVal == 0: stackTrace(c, tos, pc, errConstantDivisionByZero) else: regs[ra].intVal = regs[rb].intVal mod regs[rc].intVal of opcAddFloat: decodeBC(rkFloat) regs[ra].floatVal = regs[rb].floatVal + regs[rc].floatVal of opcSubFloat: decodeBC(rkFloat) regs[ra].floatVal = regs[rb].floatVal - regs[rc].floatVal of opcMulFloat: decodeBC(rkFloat) regs[ra].floatVal = regs[rb].floatVal * regs[rc].floatVal of opcDivFloat: decodeBC(rkFloat) regs[ra].floatVal = regs[rb].floatVal / regs[rc].floatVal of opcShrInt: decodeBC(rkInt) regs[ra].intVal = regs[rb].intVal shr regs[rc].intVal of opcShlInt: decodeBC(rkInt) regs[ra].intVal = regs[rb].intVal shl regs[rc].intVal of opcAshrInt: decodeBC(rkInt) regs[ra].intVal = ashr(regs[rb].intVal, regs[rc].intVal) of opcBitandInt: decodeBC(rkInt) regs[ra].intVal = regs[rb].intVal and regs[rc].intVal of opcBitorInt: decodeBC(rkInt) regs[ra].intVal = regs[rb].intVal or regs[rc].intVal of opcBitxorInt: decodeBC(rkInt) regs[ra].intVal = regs[rb].intVal xor regs[rc].intVal of opcAddu: decodeBC(rkInt) regs[ra].intVal = regs[rb].intVal +% regs[rc].intVal of opcSubu: decodeBC(rkInt) regs[ra].intVal = regs[rb].intVal -% regs[rc].intVal of opcMulu: decodeBC(rkInt) regs[ra].intVal = regs[rb].intVal *% regs[rc].intVal of opcDivu: decodeBC(rkInt) regs[ra].intVal = regs[rb].intVal /% regs[rc].intVal of opcModu: decodeBC(rkInt) regs[ra].intVal = regs[rb].intVal %% regs[rc].intVal of opcEqInt: decodeBC(rkInt) regs[ra].intVal = ord(regs[rb].intVal == regs[rc].intVal) of opcLeInt: decodeBC(rkInt) regs[ra].intVal = ord(regs[rb].intVal <= regs[rc].intVal) of opcLtInt: decodeBC(rkInt) regs[ra].intVal = ord(regs[rb].intVal < regs[rc].intVal) of opcEqFloat: decodeBC(rkInt) regs[ra].intVal = ord(regs[rb].floatVal == regs[rc].floatVal) of opcLeFloat: decodeBC(rkInt) regs[ra].intVal = ord(regs[rb].floatVal <= regs[rc].floatVal) of opcLtFloat: decodeBC(rkInt) regs[ra].intVal = ord(regs[rb].floatVal < regs[rc].floatVal) of opcLeu: decodeBC(rkInt) regs[ra].intVal = ord(regs[rb].intVal <=% regs[rc].intVal) of opcLtu: decodeBC(rkInt) regs[ra].intVal = ord(regs[rb].intVal <% regs[rc].intVal) of opcEqRef: decodeBC(rkInt) if regs[rb].kind == rkNodeAddr: if regs[rc].kind == rkNodeAddr: regs[ra].intVal = ord(regs[rb].nodeAddr == regs[rc].nodeAddr) else: assert regs[rc].kind == rkNode # we know these cannot be equal regs[ra].intVal = ord(false) elif regs[rc].kind == rkNodeAddr: assert regs[rb].kind == rkNode # we know these cannot be equal regs[ra].intVal = ord(false) else: regs[ra].intVal = ord((regs[rb].node.kind == nkNilLit and regs[rc].node.kind == nkNilLit) or regs[rb].node == regs[rc].node) of opcEqNimNode: decodeBC(rkInt) regs[ra].intVal = ord(exprStructuralEquivalent(regs[rb].node, regs[rc].node, strictSymEquality=true)) of opcSameNodeType: decodeBC(rkInt) regs[ra].intVal = ord(regs[rb].node.typ.sameTypeOrNil regs[rc].node.typ) of opcXor: decodeBC(rkInt) regs[ra].intVal = ord(regs[rb].intVal != regs[rc].intVal) of opcNot: decodeB(rkInt) assert regs[rb].kind == rkInt regs[ra].intVal = 1 - regs[rb].intVal of opcUnaryMinusInt: decodeB(rkInt) assert regs[rb].kind == rkInt let val = regs[rb].intVal if val != int64.low: regs[ra].intVal = -val else: stackTrace(c, tos, pc, errOverOrUnderflow) of opcUnaryMinusFloat: decodeB(rkFloat) assert regs[rb].kind == rkFloat regs[ra].floatVal = -regs[rb].floatVal of opcBitnotInt: decodeB(rkInt) assert regs[rb].kind == rkInt regs[ra].intVal = not regs[rb].intVal of opcEqStr: decodeBC(rkInt) regs[ra].intVal = ord(regs[rb].node.strVal == regs[rc].node.strVal) of opcLeStr: decodeBC(rkInt) regs[ra].intVal = ord(regs[rb].node.strVal <= regs[rc].node.strVal) of opcLtStr: decodeBC(rkInt) regs[ra].intVal = ord(regs[rb].node.strVal < regs[rc].node.strVal) of opcLeSet: decodeBC(rkInt) regs[ra].intVal = ord(containsSets(c.config, regs[rb].node, regs[rc].node)) of opcEqSet: decodeBC(rkInt) regs[ra].intVal = ord(equalSets(c.config, regs[rb].node, regs[rc].node)) of opcLtSet: decodeBC(rkInt) let a = regs[rb].node let b = regs[rc].node regs[ra].intVal = ord(containsSets(c.config, a, b) and not equalSets(c.config, a, b)) of opcMulSet: decodeBC(rkNode) createSet(regs[ra]) move(regs[ra].node.sons, nimsets.intersectSets(c.config, regs[rb].node, regs[rc].node).sons) of opcPlusSet: decodeBC(rkNode) createSet(regs[ra]) move(regs[ra].node.sons, nimsets.unionSets(c.config, regs[rb].node, regs[rc].node).sons) of opcMinusSet: decodeBC(rkNode) createSet(regs[ra]) move(regs[ra].node.sons, nimsets.diffSets(c.config, regs[rb].node, regs[rc].node).sons) of opcSymdiffSet: decodeBC(rkNode) createSet(regs[ra]) move(regs[ra].node.sons, nimsets.symdiffSets(c.config, regs[rb].node, regs[rc].node).sons) of opcConcatStr: decodeBC(rkNode) createStr regs[ra] regs[ra].node.strVal = getstr(regs[rb]) for i in rb+1..rb+rc-1: regs[ra].node.strVal.add getstr(regs[i]) of opcAddStrCh: decodeB(rkNode) #createStrKeepNode regs[ra] regs[ra].node.strVal.add(regs[rb].intVal.chr) of opcAddStrStr: decodeB(rkNode) #createStrKeepNode regs[ra] regs[ra].node.strVal.add(regs[rb].node.strVal) of opcAddSeqElem: decodeB(rkNode) if regs[ra].node.kind == nkBracket: regs[ra].node.add(copyValue(regs[rb].regToNode)) else: stackTrace(c, tos, pc, errNilAccess) of opcGetImpl: decodeB(rkNode) let a = regs[rb].node if a.kind == nkSym: regs[ra].node = if a.sym.ast.isNil: newNode(nkNilLit) else: copyTree(a.sym.ast) regs[ra].node.flags.incl nfIsRef else: stackTrace(c, tos, pc, "node is not a symbol") of opcGetImplTransf: decodeB(rkNode) let a = regs[rb].node if a.kind == nkSym: regs[ra].node = if a.sym.ast.isNil: newNode(nkNilLit) else: let ast = a.sym.ast.shallowCopy for i in 0..<a.sym.ast.len: ast[i] = a.sym.ast[i] ast[bodyPos] = transformBody(c.graph, a.sym) ast.copyTree() of opcSymOwner: decodeB(rkNode) let a = regs[rb].node if a.kind == nkSym: regs[ra].node = if a.sym.owner.isNil: newNode(nkNilLit) else: newSymNode(a.sym.skipGenericOwner) regs[ra].node.flags.incl nfIsRef else: stackTrace(c, tos, pc, "node is not a symbol") of opcSymIsInstantiationOf: decodeBC(rkInt) let a = regs[rb].node let b = regs[rc].node if a.kind == nkSym and a.sym.kind in skProcKinds and b.kind == nkSym and b.sym.kind in skProcKinds: regs[ra].intVal = if sfFromGeneric in a.sym.flags and a.sym.owner == b.sym: 1 else: 0 else: stackTrace(c, tos, pc, "node is not a proc symbol") of opcEcho: let rb = instr.regB if rb == 1: msgWriteln(c.config, regs[ra].node.strVal, {msgStdout}) else: var outp = "" for i in ra..ra+rb-1: #if regs[i].kind != rkNode: debug regs[i] outp.add(regs[i].node.strVal) msgWriteln(c.config, outp, {msgStdout}) of opcContainsSet: decodeBC(rkInt) regs[ra].intVal = ord(inSet(regs[rb].node, regs[rc].regToNode)) of opcSubStr: decodeBC(rkNode) inc pc assert c.code[pc].opcode == opcSubStr let rd = c.code[pc].regA createStr regs[ra] regs[ra].node.strVal = substr(regs[rb].node.strVal, regs[rc].intVal.int, regs[rd].intVal.int) of opcParseFloat: decodeBC(rkInt) inc pc assert c.code[pc].opcode == opcParseFloat let rd = c.code[pc].regA var rcAddr = addr(regs[rc]) if rcAddr.kind == rkRegisterAddr: rcAddr = rcAddr.regAddr elif regs[rc].kind != rkFloat: myreset(regs[rc]) regs[rc].kind = rkFloat regs[ra].intVal = parseBiggestFloat(regs[rb].node.strVal, rcAddr.floatVal, regs[rd].intVal.int) of opcRangeChck: let rb = instr.regB let rc = instr.regC if not (leValueConv(regs[rb].regToNode, regs[ra].regToNode) and leValueConv(regs[ra].regToNode, regs[rc].regToNode)): stackTrace(c, tos, pc, errIllegalConvFromXtoY % [ $regs[ra].regToNode, "[" & $regs[rb].regToNode & ".." & $regs[rc].regToNode & "]"]) of opcIndCall, opcIndCallAsgn: # dest = call regStart, n; where regStart = fn, arg1, ... let rb = instr.regB let rc = instr.regC let bb = regs[rb].node let isClosure = bb.kind == nkTupleConstr let prc = if not isClosure: bb.sym else: bb.sons[0].sym if prc.offset < -1: # it's a callback: c.callbacks[-prc.offset-2].value( VmArgs(ra: ra, rb: rb, rc: rc, slots: cast[pointer](regs), currentException: c.currentExceptionB, currentLineInfo: c.debug[pc])) elif sfImportc in prc.flags: if allowFFI notin c.features: globalError(c.config, c.debug[pc], "VM not allowed to do FFI") # we pass 'tos.slots' instead of 'regs' so that the compiler can keep # 'regs' in a register: when hasFFI: let prcValue = c.globals.sons[prc.position-1] if prcValue.kind == nkEmpty: globalError(c.config, c.debug[pc], "cannot run " & prc.name.s) let newValue = callForeignFunction(prcValue, prc.typ, tos.slots, rb+1, rc-1, c.debug[pc]) if newValue.kind != nkEmpty: assert instr.opcode == opcIndCallAsgn putIntoReg(regs[ra], newValue) else: globalError(c.config, c.debug[pc], "VM not built with FFI support") elif prc.kind != skTemplate: let newPc = compile(c, prc) # tricky: a recursion is also a jump back, so we use the same # logic as for loops: if newPc < pc: handleJmpBack() #echo "new pc ", newPc, " calling: ", prc.name.s var newFrame = PStackFrame(prc: prc, comesFrom: pc, next: tos) newSeq(newFrame.slots, prc.offset+ord(isClosure)) if not isEmptyType(prc.typ.sons[0]) or prc.kind == skMacro: putIntoReg(newFrame.slots[0], getNullValue(prc.typ.sons[0], prc.info, c.config)) for i in 1 .. rc-1: newFrame.slots[i] = regs[rb+i] if isClosure: newFrame.slots[rc].kind = rkNode newFrame.slots[rc].node = regs[rb].node.sons[1] tos = newFrame move(regs, newFrame.slots) # -1 for the following 'inc pc' pc = newPc-1 else: # for 'getAst' support we need to support template expansion here: let genSymOwner = if tos.next != nil and tos.next.prc != nil: tos.next.prc else: c.module var macroCall = newNodeI(nkCall, c.debug[pc]) macroCall.add(newSymNode(prc)) for i in 1 .. rc-1: let node = regs[rb+i].regToNode node.info = c.debug[pc] macroCall.add(node) var a = evalTemplate(macroCall, prc, genSymOwner, c.config) if a.kind == nkStmtList and a.len == 1: a = a[0] a.recSetFlagIsRef ensureKind(rkNode) regs[ra].node = a of opcTJmp: # jump Bx if A != 0 let rbx = instr.regBx - wordExcess - 1 # -1 for the following 'inc pc' if regs[ra].intVal != 0: inc pc, rbx of opcFJmp: # jump Bx if A == 0 let rbx = instr.regBx - wordExcess - 1 # -1 for the following 'inc pc' if regs[ra].intVal == 0: inc pc, rbx of opcJmp: # jump Bx let rbx = instr.regBx - wordExcess - 1 # -1 for the following 'inc pc' inc pc, rbx of opcJmpBack: let rbx = instr.regBx - wordExcess - 1 # -1 for the following 'inc pc' inc pc, rbx handleJmpBack() of opcBranch: # we know the next instruction is a 'fjmp': let branch = c.constants[instr.regBx-wordExcess] var cond = false for j in countup(0, sonsLen(branch) - 2): if overlap(regs[ra].regToNode, branch.sons[j]): cond = true break assert c.code[pc+1].opcode == opcFJmp inc pc # we skip this instruction so that the final 'inc(pc)' skips # the following jump if not cond: let instr2 = c.code[pc] let rbx = instr2.regBx - wordExcess - 1 # -1 for the following 'inc pc' inc pc, rbx of opcTry: let rbx = instr.regBx - wordExcess tos.pushSafePoint(pc + rbx) assert c.code[pc+rbx].opcode in {opcExcept, opcFinally} of opcExcept: # just skip it; it's followed by a jump; # we'll execute in the 'raise' handler let rbx = instr.regBx - wordExcess - 1 # -1 for the following 'inc pc' inc pc, rbx while c.code[pc+1].opcode == opcExcept: let rbx = c.code[pc+1].regBx - wordExcess - 1 inc pc, rbx #assert c.code[pc+1].opcode in {opcExcept, opcFinally} if c.code[pc+1].opcode != opcFinally: # in an except handler there is no active safe point for the 'try': tos.popSafePoint() of opcFinally: # just skip it; it's followed by the code we need to execute anyway tos.popSafePoint() of opcFinallyEnd: if c.currentExceptionA != nil: # we are in a cleanup run: let (newPc, newTos) = cleanUpOnException(c, tos) if newPc-1 < 0: bailOut(c, tos) return pc = newPc-1 if tos != newTos: tos = newTos move(regs, tos.slots) of opcRaise: let raised = regs[ra].node c.currentExceptionA = raised c.exceptionInstr = pc let (newPc, newTos) = cleanUpOnException(c, tos) # -1 because of the following 'inc' if newPc-1 < 0: bailOut(c, tos) return pc = newPc-1 if tos != newTos: tos = newTos move(regs, tos.slots) of opcNew: ensureKind(rkNode) let typ = c.types[instr.regBx - wordExcess] regs[ra].node = getNullValue(typ, c.debug[pc], c.config) regs[ra].node.flags.incl nfIsRef of opcNewSeq: let typ = c.types[instr.regBx - wordExcess] inc pc ensureKind(rkNode) let instr2 = c.code[pc] let count = regs[instr2.regA].intVal.int regs[ra].node = newNodeI(nkBracket, c.debug[pc]) regs[ra].node.typ = typ newSeq(regs[ra].node.sons, count) for i in 0 ..< count: regs[ra].node.sons[i] = getNullValue(typ.sons[0], c.debug[pc], c.config) of opcNewStr: decodeB(rkNode) regs[ra].node = newNodeI(nkStrLit, c.debug[pc]) regs[ra].node.strVal = newString(regs[rb].intVal.int) of opcLdImmInt: # dest = immediate value decodeBx(rkInt) regs[ra].intVal = rbx of opcLdNull: ensureKind(rkNode) let typ = c.types[instr.regBx - wordExcess] regs[ra].node = getNullValue(typ, c.debug[pc], c.config) # opcLdNull really is the gist of the VM's problems: should it load # a fresh null to regs[ra].node or to regs[ra].node[]? This really # depends on whether regs[ra] represents the variable itself or wether # it holds the indirection! Due to the way registers are re-used we cannot # say for sure here! --> The codegen has to deal with it # via 'genAsgnPatch'. of opcLdNullReg: let typ = c.types[instr.regBx - wordExcess] if typ.skipTypes(abstractInst+{tyRange}-{tyTypeDesc}).kind in { tyFloat..tyFloat128}: ensureKind(rkFloat) regs[ra].floatVal = 0.0 else: ensureKind(rkInt) regs[ra].intVal = 0 of opcLdConst: let rb = instr.regBx - wordExcess let cnst = c.constants.sons[rb] if fitsRegister(cnst.typ): myreset(regs[ra]) putIntoReg(regs[ra], cnst) else: ensureKind(rkNode) regs[ra].node = cnst of opcAsgnConst: let rb = instr.regBx - wordExcess let cnst = c.constants.sons[rb] if fitsRegister(cnst.typ): putIntoReg(regs[ra], cnst) else: ensureKind(rkNode) regs[ra].node = cnst.copyTree of opcLdGlobal: let rb = instr.regBx - wordExcess - 1 ensureKind(rkNode) regs[ra].node = c.globals.sons[rb] of opcLdGlobalAddr: let rb = instr.regBx - wordExcess - 1 ensureKind(rkNodeAddr) regs[ra].nodeAddr = addr(c.globals.sons[rb]) of opcRepr: decodeB(rkNode) createStr regs[ra] regs[ra].node.strVal = renderTree(regs[rb].regToNode, {renderNoComments, renderDocComments}) of opcQuit: if c.mode in {emRepl, emStaticExpr, emStaticStmt}: message(c.config, c.debug[pc], hintQuitCalled) msgQuit(int8(getOrdValue(regs[ra].regToNode))) else: return TFullReg(kind: rkNone) of opcSetLenStr: decodeB(rkNode) #createStrKeepNode regs[ra] regs[ra].node.strVal.setLen(regs[rb].intVal.int) of opcOf: decodeBC(rkInt) let typ = c.types[regs[rc].intVal.int] regs[ra].intVal = ord(inheritanceDiff(regs[rb].node.typ, typ) <= 0) of opcIs: decodeBC(rkInt) let t1 = regs[rb].node.typ.skipTypes({tyTypeDesc}) let t2 = c.types[regs[rc].intVal.int] # XXX: This should use the standard isOpImpl let match = if t2.kind == tyUserTypeClass: true else: sameType(t1, t2) regs[ra].intVal = ord(match) of opcSetLenSeq: decodeB(rkNode) let newLen = regs[rb].intVal.int if regs[ra].node.isNil: stackTrace(c, tos, pc, errNilAccess) else: c.setLenSeq(regs[ra].node, newLen, c.debug[pc]) of opcReset: internalError(c.config, c.debug[pc], "too implement") of opcNarrowS: decodeB(rkInt) let min = -(1.BiggestInt shl (rb-1)) let max = (1.BiggestInt shl (rb-1))-1 if regs[ra].intVal < min or regs[ra].intVal > max: stackTrace(c, tos, pc, "unhandled exception: value out of range") of opcNarrowU: decodeB(rkInt) regs[ra].intVal = regs[ra].intVal and ((1'i64 shl rb)-1) of opcIsNil: decodeB(rkInt) let node = regs[rb].node regs[ra].intVal = ord( # Note that `nfIsRef` + `nkNilLit` represents an allocated # reference with the value `nil`, so `isNil` should be false! (node.kind == nkNilLit and nfIsRef notin node.flags) or (not node.typ.isNil and node.typ.kind == tyProc and node.typ.callConv == ccClosure and node.sons[0].kind == nkNilLit and node.sons[1].kind == nkNilLit)) of opcNBindSym: # cannot use this simple check # if dynamicBindSym notin c.config.features: # bindSym with static input decodeBx(rkNode) regs[ra].node = copyTree(c.constants.sons[rbx]) regs[ra].node.flags.incl nfIsRef of opcNDynBindSym: # experimental bindSym let rb = instr.regB rc = instr.regC idx = int(regs[rb+rc-1].intVal) callback = c.callbacks[idx].value args = VmArgs(ra: ra, rb: rb, rc: rc, slots: cast[pointer](regs), currentException: c.currentExceptionB, currentLineInfo: c.debug[pc]) callback(args) regs[ra].node.flags.incl nfIsRef of opcNChild: decodeBC(rkNode) let idx = regs[rc].intVal.int let src = regs[rb].node if src.kind notin {nkEmpty..nkNilLit} and idx <% src.len: regs[ra].node = src.sons[idx] else: stackTrace(c, tos, pc, errIndexOutOfBounds) of opcNSetChild: decodeBC(rkNode) let idx = regs[rb].intVal.int var dest = regs[ra].node if dest.kind notin {nkEmpty..nkNilLit} and idx <% dest.len: dest.sons[idx] = regs[rc].node else: stackTrace(c, tos, pc, errIndexOutOfBounds) of opcNAdd: decodeBC(rkNode) var u = regs[rb].node if u.kind notin {nkEmpty..nkNilLit}: u.add(regs[rc].node) else: stackTrace(c, tos, pc, "cannot add to node kind: " & $u.kind) regs[ra].node = u of opcNAddMultiple: decodeBC(rkNode) let x = regs[rc].node var u = regs[rb].node if u.kind notin {nkEmpty..nkNilLit}: # XXX can be optimized: for i in 0..<x.len: u.add(x.sons[i]) else: stackTrace(c, tos, pc, "cannot add to node kind: " & $u.kind) regs[ra].node = u of opcNKind: decodeB(rkInt) regs[ra].intVal = ord(regs[rb].node.kind) c.comesFromHeuristic = regs[rb].node.info of opcNSymKind: decodeB(rkInt) let a = regs[rb].node if a.kind == nkSym: regs[ra].intVal = ord(a.sym.kind) else: stackTrace(c, tos, pc, "node is not a symbol") c.comesFromHeuristic = regs[rb].node.info of opcNIntVal: decodeB(rkInt) let a = regs[rb].node case a.kind of nkCharLit..nkUInt64Lit: regs[ra].intVal = a.intVal else: stackTrace(c, tos, pc, errFieldXNotFound & "intVal") of opcNFloatVal: decodeB(rkFloat) let a = regs[rb].node case a.kind of nkFloatLit..nkFloat64Lit: regs[ra].floatVal = a.floatVal else: stackTrace(c, tos, pc, errFieldXNotFound & "floatVal") of opcNSymbol: decodeB(rkNode) let a = regs[rb].node if a.kind == nkSym: regs[ra].node = copyNode(a) else: stackTrace(c, tos, pc, errFieldXNotFound & "symbol") of opcNIdent: decodeB(rkNode) let a = regs[rb].node if a.kind == nkIdent: regs[ra].node = copyNode(a) else: stackTrace(c, tos, pc, errFieldXNotFound & "ident") of opcNGetType: let rb = instr.regB let rc = instr.regC case rc: of 0: # getType opcode: ensureKind(rkNode) if regs[rb].kind == rkNode and regs[rb].node.typ != nil: regs[ra].node = opMapTypeToAst(c.cache, regs[rb].node.typ, c.debug[pc]) elif regs[rb].kind == rkNode and regs[rb].node.kind == nkSym and regs[rb].node.sym.typ != nil: regs[ra].node = opMapTypeToAst(c.cache, regs[rb].node.sym.typ, c.debug[pc]) else: stackTrace(c, tos, pc, "node has no type") of 1: # typeKind opcode: ensureKind(rkInt) if regs[rb].kind == rkNode and regs[rb].node.typ != nil: regs[ra].intVal = ord(regs[rb].node.typ.kind) elif regs[rb].kind == rkNode and regs[rb].node.kind == nkSym and regs[rb].node.sym.typ != nil: regs[ra].intVal = ord(regs[rb].node.sym.typ.kind) #else: # stackTrace(c, tos, pc, "node has no type") of 2: # getTypeInst opcode: ensureKind(rkNode) if regs[rb].kind == rkNode and regs[rb].node.typ != nil: regs[ra].node = opMapTypeInstToAst(c.cache, regs[rb].node.typ, c.debug[pc]) elif regs[rb].kind == rkNode and regs[rb].node.kind == nkSym and regs[rb].node.sym.typ != nil: regs[ra].node = opMapTypeInstToAst(c.cache, regs[rb].node.sym.typ, c.debug[pc]) else: stackTrace(c, tos, pc, "node has no type") else: # getTypeImpl opcode: ensureKind(rkNode) if regs[rb].kind == rkNode and regs[rb].node.typ != nil: regs[ra].node = opMapTypeImplToAst(c.cache, regs[rb].node.typ, c.debug[pc]) elif regs[rb].kind == rkNode and regs[rb].node.kind == nkSym and regs[rb].node.sym.typ != nil: regs[ra].node = opMapTypeImplToAst(c.cache, regs[rb].node.sym.typ, c.debug[pc]) else: stackTrace(c, tos, pc, "node has no type") of opcNStrVal: decodeB(rkNode) createStr regs[ra] let a = regs[rb].node case a.kind of {nkStrLit..nkTripleStrLit}: regs[ra].node.strVal = a.strVal of nkCommentStmt: regs[ra].node.strVal = a.comment of nkIdent: regs[ra].node.strVal = a.ident.s of nkSym: regs[ra].node.strVal = a.sym.name.s else: stackTrace(c, tos, pc, errFieldXNotFound & "strVal") of opcSlurp: decodeB(rkNode) createStr regs[ra] regs[ra].node.strVal = opSlurp(regs[rb].node.strVal, c.debug[pc], c.module, c.config) of opcGorge: when defined(nimcore): decodeBC(rkNode) inc pc let rd = c.code[pc].regA createStr regs[ra] regs[ra].node.strVal = opGorge(regs[rb].node.strVal, regs[rc].node.strVal, regs[rd].node.strVal, c.debug[pc], c.config)[0] else: globalError(c.config, c.debug[pc], "VM is not built with 'gorge' support") of opcNError, opcNWarning, opcNHint: decodeB(rkNode) let a = regs[ra].node let b = regs[rb].node let info = if b.kind == nkNilLit: c.debug[pc] else: b.info if instr.opcode == opcNError: stackTrace(c, tos, pc, a.strVal, info) elif instr.opcode == opcNWarning: message(c.config, info, warnUser, a.strVal) elif instr.opcode == opcNHint: message(c.config, info, hintUser, a.strVal) of opcParseExprToAst: decodeB(rkNode) # c.debug[pc].line.int - countLines(regs[rb].strVal) ? var error: string let ast = parseString(regs[rb].node.strVal, c.cache, c.config, toFullPath(c.config, c.debug[pc]), c.debug[pc].line.int, proc (conf: ConfigRef; info: TLineInfo; msg: TMsgKind; arg: string) = if error.len == 0 and msg <= errMax: error = formatMsg(conf, info, msg, arg)) if error.len > 0: c.errorFlag = error elif sonsLen(ast) != 1: c.errorFlag = formatMsg(c.config, c.debug[pc], errGenerated, "expected expression, but got multiple statements") else: regs[ra].node = ast.sons[0] of opcParseStmtToAst: decodeB(rkNode) var error: string let ast = parseString(regs[rb].node.strVal, c.cache, c.config, toFullPath(c.config, c.debug[pc]), c.debug[pc].line.int, proc (conf: ConfigRef; info: TLineInfo; msg: TMsgKind; arg: string) = if error.len == 0 and msg <= errMax: error = formatMsg(conf, info, msg, arg)) if error.len > 0: c.errorFlag = error else: regs[ra].node = ast of opcQueryErrorFlag: createStr regs[ra] regs[ra].node.strVal = c.errorFlag c.errorFlag.setLen 0 of opcCallSite: ensureKind(rkNode) if c.callsite != nil: regs[ra].node = c.callsite else: stackTrace(c, tos, pc, errFieldXNotFound & "callsite") of opcNGetLineInfo: decodeBImm(rkNode) let n = regs[rb].node case imm of 0: # getFile regs[ra].node = newStrNode(nkStrLit, toFullPath(c.config, n.info)) of 1: # getLine regs[ra].node = newIntNode(nkIntLit, n.info.line.int) of 2: # getColumn regs[ra].node = newIntNode(nkIntLit, n.info.col) else: internalAssert c.config, false regs[ra].node.info = n.info regs[ra].node.typ = n.typ of opcNSetLineInfo: decodeB(rkNode) regs[ra].node.info = regs[rb].node.info of opcEqIdent: decodeBC(rkInt) # aliases for shorter and easier to understand code below let aNode = regs[rb].node let bNode = regs[rc].node # these are cstring to prevent string copy, and cmpIgnoreStyle from # takes cstring arguments var aStrVal: cstring = nil var bStrVal: cstring = nil # extract strVal from argument ``a`` case aNode.kind of {nkStrLit..nkTripleStrLit}: aStrVal = aNode.strVal.cstring of nkIdent: aStrVal = aNode.ident.s.cstring of nkSym: aStrVal = aNode.sym.name.s.cstring of nkOpenSymChoice, nkClosedSymChoice: aStrVal = aNode[0].sym.name.s.cstring else: discard # extract strVal from argument ``b`` case bNode.kind of {nkStrLit..nkTripleStrLit}: bStrVal = bNode.strVal.cstring of nkIdent: bStrVal = bNode.ident.s.cstring of nkSym: bStrVal = bNode.sym.name.s.cstring of nkOpenSymChoice, nkClosedSymChoice: bStrVal = bNode[0].sym.name.s.cstring else: discard # set result regs[ra].intVal = if aStrVal != nil and bStrVal != nil: ord(idents.cmpIgnoreStyle(aStrVal,bStrVal,high(int)) == 0) else: 0 of opcStrToIdent: decodeB(rkNode) if regs[rb].node.kind notin {nkStrLit..nkTripleStrLit}: stackTrace(c, tos, pc, errFieldXNotFound & "strVal") else: regs[ra].node = newNodeI(nkIdent, c.debug[pc]) regs[ra].node.ident = getIdent(c.cache, regs[rb].node.strVal) regs[ra].node.flags.incl nfIsRef of opcSetType: if regs[ra].kind != rkNode: internalError(c.config, c.debug[pc], "cannot set type") regs[ra].node.typ = c.types[instr.regBx - wordExcess] of opcConv: let rb = instr.regB inc pc let desttyp = c.types[c.code[pc].regBx - wordExcess] inc pc let srctyp = c.types[c.code[pc].regBx - wordExcess] if opConv(c, regs[ra], regs[rb], desttyp, srctyp): stackTrace(c, tos, pc, errIllegalConvFromXtoY % [ typeToString(srctyp), typeToString(desttyp)]) of opcCast: let rb = instr.regB inc pc let desttyp = c.types[c.code[pc].regBx - wordExcess] inc pc let srctyp = c.types[c.code[pc].regBx - wordExcess] when hasFFI: let dest = fficast(regs[rb], desttyp) asgnRef(regs[ra], dest) else: globalError(c.config, c.debug[pc], "cannot evaluate cast") of opcNSetIntVal: decodeB(rkNode) var dest = regs[ra].node if dest.kind in {nkCharLit..nkUInt64Lit} and regs[rb].kind in {rkInt}: dest.intVal = regs[rb].intVal else: stackTrace(c, tos, pc, errFieldXNotFound & "intVal") of opcNSetFloatVal: decodeB(rkNode) var dest = regs[ra].node if dest.kind in {nkFloatLit..nkFloat64Lit} and regs[rb].kind in {rkFloat}: dest.floatVal = regs[rb].floatVal else: stackTrace(c, tos, pc, errFieldXNotFound & "floatVal") of opcNSetSymbol: decodeB(rkNode) var dest = regs[ra].node if dest.kind == nkSym and regs[rb].node.kind == nkSym: dest.sym = regs[rb].node.sym else: stackTrace(c, tos, pc, errFieldXNotFound & "symbol") of opcNSetIdent: decodeB(rkNode) var dest = regs[ra].node if dest.kind == nkIdent and regs[rb].node.kind == nkIdent: dest.ident = regs[rb].node.ident else: stackTrace(c, tos, pc, errFieldXNotFound & "ident") of opcNSetType: decodeB(rkNode) let b = regs[rb].node internalAssert c.config, b.kind == nkSym and b.sym.kind == skType internalAssert c.config, regs[ra].node != nil regs[ra].node.typ = b.sym.typ of opcNSetStrVal: decodeB(rkNode) var dest = regs[ra].node if dest.kind in {nkStrLit..nkTripleStrLit} and regs[rb].kind in {rkNode}: dest.strVal = regs[rb].node.strVal elif dest.kind == nkCommentStmt and regs[rb].kind in {rkNode}: dest.comment = regs[rb].node.strVal else: stackTrace(c, tos, pc, errFieldXNotFound & "strVal") of opcNNewNimNode: decodeBC(rkNode) var k = regs[rb].intVal if k < 0 or k > ord(high(TNodeKind)): internalError(c.config, c.debug[pc], "request to create a NimNode of invalid kind") let cc = regs[rc].node let x = newNodeI(TNodeKind(int(k)), if cc.kind != nkNilLit: cc.info elif c.comesFromHeuristic.line != 0'u16: c.comesFromHeuristic elif c.callsite != nil and c.callsite.safeLen > 1: c.callsite[1].info else: c.debug[pc]) x.flags.incl nfIsRef # prevent crashes in the compiler resulting from wrong macros: if x.kind == nkIdent: x.ident = c.cache.emptyIdent regs[ra].node = x of opcNCopyNimNode: decodeB(rkNode) regs[ra].node = copyNode(regs[rb].node) of opcNCopyNimTree: decodeB(rkNode) regs[ra].node = copyTree(regs[rb].node) of opcNDel: decodeBC(rkNode) let bb = regs[rb].intVal.int for i in countup(0, regs[rc].intVal.int-1): delSon(regs[ra].node, bb) of opcGenSym: decodeBC(rkNode) let k = regs[rb].intVal let name = if regs[rc].node.strVal.len == 0: ":tmp" else: regs[rc].node.strVal if k < 0 or k > ord(high(TSymKind)): internalError(c.config, c.debug[pc], "request to create symbol of invalid kind") var sym = newSym(k.TSymKind, getIdent(c.cache, name), c.module.owner, c.debug[pc]) incl(sym.flags, sfGenSym) regs[ra].node = newSymNode(sym) regs[ra].node.flags.incl nfIsRef of opcNccValue: decodeB(rkInt) let destKey = regs[rb].node.strVal regs[ra].intVal = getOrDefault(c.graph.cacheCounters, destKey) of opcNccInc: let g = c.graph let destKey = regs[ra].node.strVal let by = regs[instr.regB].intVal let v = getOrDefault(g.cacheCounters, destKey) g.cacheCounters[destKey] = v+by recordInc(c, c.debug[pc], destKey, by) of opcNcsAdd: let g = c.graph let destKey = regs[ra].node.strVal let val = regs[instr.regB].node if not contains(g.cacheSeqs, destKey): g.cacheSeqs[destKey] = newTree(nkStmtList, val) # newNodeI(nkStmtList, c.debug[pc]) else: g.cacheSeqs[destKey].add val recordAdd(c, c.debug[pc], destKey, val) of opcNcsIncl: let g = c.graph let destKey = regs[ra].node.strVal let val = regs[instr.regB].node if not contains(g.cacheSeqs, destKey): g.cacheSeqs[destKey] = newTree(nkStmtList, val) else: block search: for existing in g.cacheSeqs[destKey]: if exprStructuralEquivalent(existing, val, strictSymEquality=true): break search g.cacheSeqs[destKey].add val recordIncl(c, c.debug[pc], destKey, val) of opcNcsLen: let g = c.graph decodeB(rkInt) let destKey = regs[rb].node.strVal regs[ra].intVal = if contains(g.cacheSeqs, destKey): g.cacheSeqs[destKey].len else: 0 of opcNcsAt: let g = c.graph decodeBC(rkNode) let idx = regs[rc].intVal let destKey = regs[rb].node.strVal if contains(g.cacheSeqs, destKey) and idx <% g.cacheSeqs[destKey].len: regs[ra].node = g.cacheSeqs[destKey][idx.int] else: stackTrace(c, tos, pc, errIndexOutOfBounds) of opcNctPut: let g = c.graph let destKey = regs[ra].node.strVal let key = regs[instr.regB].node.strVal let val = regs[instr.regC].node if not contains(g.cacheTables, destKey): g.cacheTables[destKey] = initBTree[string, PNode]() if not contains(g.cacheTables[destKey], key): g.cacheTables[destKey].add(key, val) recordPut(c, c.debug[pc], destKey, key, val) else: stackTrace(c, tos, pc, "key already exists: " & key) of opcNctLen: let g = c.graph decodeB(rkInt) let destKey = regs[rb].node.strVal regs[ra].intVal = if contains(g.cacheTables, destKey): g.cacheTables[destKey].len else: 0 of opcNctGet: let g = c.graph decodeBC(rkNode) let destKey = regs[rb].node.strVal let key = regs[rc].node.strVal if contains(g.cacheTables, destKey): if contains(g.cacheTables[destKey], key): regs[ra].node = getOrDefault(g.cacheTables[destKey], key) else: stackTrace(c, tos, pc, "key does not exist: " & key) else: stackTrace(c, tos, pc, "key does not exist: " & destKey) of opcNctHasNext: let g = c.graph decodeBC(rkInt) let destKey = regs[rb].node.strVal regs[ra].intVal = if g.cacheTables.contains(destKey): ord(btrees.hasNext(g.cacheTables[destKey], regs[rc].intVal.int)) else: 0 of opcNctNext: let g = c.graph decodeBC(rkNode) let destKey = regs[rb].node.strVal let index = regs[rc].intVal if contains(g.cacheTables, destKey): let (k, v, nextIndex) = btrees.next(g.cacheTables[destKey], index.int) regs[ra].node = newTree(nkTupleConstr, newStrNode(k, c.debug[pc]), v, newIntNode(nkIntLit, nextIndex)) else: stackTrace(c, tos, pc, "key does not exist: " & destKey) of opcTypeTrait: # XXX only supports 'name' for now; we can use regC to encode the # type trait operation decodeB(rkNode) var typ = regs[rb].node.typ internalAssert c.config, typ != nil while typ.kind == tyTypeDesc and typ.len > 0: typ = typ.sons[0] createStr regs[ra] regs[ra].node.strVal = typ.typeToString(preferExported) of opcMarshalLoad: let ra = instr.regA let rb = instr.regB inc pc let typ = c.types[c.code[pc].regBx - wordExcess] putIntoReg(regs[ra], loadAny(regs[rb].node.strVal, typ, c.cache, c.config)) of opcMarshalStore: decodeB(rkNode) inc pc let typ = c.types[c.code[pc].regBx - wordExcess] createStrKeepNode(regs[ra]) when not defined(nimNoNilSeqs): if regs[ra].node.strVal.isNil: regs[ra].node.strVal = newStringOfCap(1000) storeAny(regs[ra].node.strVal, typ, regs[rb].regToNode, c.config) of opcToNarrowInt: decodeBC(rkInt) let mask = (1'i64 shl rc) - 1 # 0xFF let signbit = 1'i64 shl (rc - 1) # 0x80 let toggle = mask - signbit # 0x7F # algorithm: -((i8 and 0xFF) xor 0x7F) + 0x7F # mask off higher bits. # uses two's complement to sign-extend integer. # reajust integer into desired range. regs[ra].intVal = -((regs[rb].intVal and mask) xor toggle) + toggle inc pc proc execute(c: PCtx, start: int): PNode = var tos = PStackFrame(prc: nil, comesFrom: 0, next: nil) newSeq(tos.slots, c.prc.maxSlots) result = rawExecute(c, start, tos).regToNode proc execProc*(c: PCtx; sym: PSym; args: openArray[PNode]): PNode = if sym.kind in routineKinds: if sym.typ.len-1 != args.len: localError(c.config, sym.info, "NimScript: expected $# arguments, but got $#" % [ $(sym.typ.len-1), $args.len]) else: let start = genProc(c, sym) var tos = PStackFrame(prc: sym, comesFrom: 0, next: nil) let maxSlots = sym.offset newSeq(tos.slots, maxSlots) # setup parameters: if not isEmptyType(sym.typ.sons[0]) or sym.kind == skMacro: putIntoReg(tos.slots[0], getNullValue(sym.typ.sons[0], sym.info, c.config)) # XXX We could perform some type checking here. for i in 1..<sym.typ.len: putIntoReg(tos.slots[i], args[i-1]) result = rawExecute(c, start, tos).regToNode else: localError(c.config, sym.info, "NimScript: attempt to call non-routine: " & sym.name.s) proc evalStmt*(c: PCtx, n: PNode) = let n = transformExpr(c.graph, c.module, n, noDestructors = true) let start = genStmt(c, n) # execute new instructions; this redundant opcEof check saves us lots # of allocations in 'execute': if c.code[start].opcode != opcEof: discard execute(c, start) proc evalExpr*(c: PCtx, n: PNode): PNode = let n = transformExpr(c.graph, c.module, n, noDestructors = true) let start = genExpr(c, n) assert c.code[start].opcode != opcEof result = execute(c, start) proc getGlobalValue*(c: PCtx; s: PSym): PNode = internalAssert c.config, s.kind in {skLet, skVar} and sfGlobal in s.flags result = c.globals.sons[s.position-1] include vmops proc setupGlobalCtx*(module: PSym; graph: ModuleGraph) = if graph.vm.isNil: graph.vm = newCtx(module, graph.cache, graph) registerAdditionalOps(PCtx graph.vm) else: refresh(PCtx graph.vm, module) proc myOpen(graph: ModuleGraph; module: PSym): PPassContext = #var c = newEvalContext(module, emRepl) #c.features = {allowCast, allowFFI, allowInfiniteLoops} #pushStackFrame(c, newStackFrame()) # XXX produce a new 'globals' environment here: setupGlobalCtx(module, graph) result = PCtx graph.vm when hasFFI: PCtx(graph.vm).features = {allowFFI, allowCast} proc myProcess(c: PPassContext, n: PNode): PNode = let c = PCtx(c) # don't eval errornous code: if c.oldErrorCount == c.config.errorCounter: evalStmt(c, n) result = newNodeI(nkEmpty, n.info) else: result = n c.oldErrorCount = c.config.errorCounter proc myClose(graph: ModuleGraph; c: PPassContext, n: PNode): PNode = myProcess(c, n) const evalPass* = makePass(myOpen, myProcess, myClose) proc evalConstExprAux(module: PSym; g: ModuleGraph; prc: PSym, n: PNode, mode: TEvalMode): PNode = let n = transformExpr(g, module, n, noDestructors = true) setupGlobalCtx(module, g) var c = PCtx g.vm let oldMode = c.mode defer: c.mode = oldMode c.mode = mode let start = genExpr(c, n, requiresValue = mode!=emStaticStmt) if c.code[start].opcode == opcEof: return newNodeI(nkEmpty, n.info) assert c.code[start].opcode != opcEof when debugEchoCode: c.echoCode start var tos = PStackFrame(prc: prc, comesFrom: 0, next: nil) newSeq(tos.slots, c.prc.maxSlots) #for i in 0 ..< c.prc.maxSlots: tos.slots[i] = newNode(nkEmpty) result = rawExecute(c, start, tos).regToNode if result.info.col < 0: result.info = n.info proc evalConstExpr*(module: PSym; g: ModuleGraph; e: PNode): PNode = result = evalConstExprAux(module, g, nil, e, emConst) proc evalStaticExpr*(module: PSym; g: ModuleGraph; e: PNode, prc: PSym): PNode = result = evalConstExprAux(module, g, prc, e, emStaticExpr) proc evalStaticStmt*(module: PSym; g: ModuleGraph; e: PNode, prc: PSym) = discard evalConstExprAux(module, g, prc, e, emStaticStmt) proc setupCompileTimeVar*(module: PSym; g: ModuleGraph; n: PNode) = discard evalConstExprAux(module, g, nil, n, emStaticStmt) proc setupMacroParam(x: PNode, typ: PType): TFullReg = case typ.kind of tyStatic: putIntoReg(result, x) of tyTypeDesc: putIntoReg(result, x) else: result.kind = rkNode var n = x if n.kind in {nkHiddenSubConv, nkHiddenStdConv}: n = n.sons[1] n = n.canonValue n.flags.incl nfIsRef n.typ = x.typ result.node = n iterator genericParamsInMacroCall*(macroSym: PSym, call: PNode): (PSym, PNode) = let gp = macroSym.ast[genericParamsPos] for i in 0 ..< gp.len: let genericParam = gp[i].sym let posInCall = macroSym.typ.len + i yield (genericParam, call[posInCall]) # to prevent endless recursion in macro instantiation const evalMacroLimit = 1000 proc evalMacroCall*(module: PSym; g: ModuleGraph; n, nOrig: PNode, sym: PSym): PNode = # XXX globalError() is ugly here, but I don't know a better solution for now inc(g.config.evalMacroCounter) if g.config.evalMacroCounter > evalMacroLimit: globalError(g.config, n.info, "macro instantiation too nested") # immediate macros can bypass any type and arity checking so we check the # arity here too: if sym.typ.len > n.safeLen and sym.typ.len > 1: globalError(g.config, n.info, "in call '$#' got $#, but expected $# argument(s)" % [ n.renderTree, $(n.safeLen-1), $(sym.typ.len-1)]) setupGlobalCtx(module, g) var c = PCtx g.vm c.comesFromHeuristic.line = 0'u16 c.callsite = nOrig let start = genProc(c, sym) var tos = PStackFrame(prc: sym, comesFrom: 0, next: nil) let maxSlots = sym.offset newSeq(tos.slots, maxSlots) # setup arguments: var L = n.safeLen if L == 0: L = 1 # This is wrong for tests/reject/tind1.nim where the passed 'else' part # doesn't end up in the parameter: #InternalAssert tos.slots.len >= L # return value: tos.slots[0].kind = rkNode tos.slots[0].node = newNodeI(nkEmpty, n.info) # setup parameters: for i in 1..<sym.typ.len: tos.slots[i] = setupMacroParam(n.sons[i], sym.typ.sons[i]) let gp = sym.ast[genericParamsPos] for i in 0 ..< gp.len: if sfImmediate notin sym.flags: let idx = sym.typ.len + i if idx < n.len: tos.slots[idx] = setupMacroParam(n.sons[idx], gp[i].sym.typ) else: dec(g.config.evalMacroCounter) c.callsite = nil localError(c.config, n.info, "expected " & $gp.len & " generic parameter(s)") elif gp[i].sym.typ.kind in {tyStatic, tyTypeDesc}: dec(g.config.evalMacroCounter) c.callsite = nil globalError(c.config, n.info, "static[T] or typedesc nor supported for .immediate macros") # temporary storage: #for i in L ..< maxSlots: tos.slots[i] = newNode(nkEmpty) result = rawExecute(c, start, tos).regToNode if result.info.line < 0: result.info = n.info if cyclicTree(result): globalError(c.config, n.info, "macro produced a cyclic tree") dec(g.config.evalMacroCounter) c.callsite = nil