diff options
Diffstat (limited to 'WWW/Library/Implementation')
-rw-r--r-- | WWW/Library/Implementation/HTChunk.c | 13 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTFTP.c | 28 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTFile.c | 16 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTMIME.c | 12 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTML.h | 44 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTMLDTD.c | 5 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTNews.c | 55 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTParse.c | 9 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTParse.h | 6 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTPlain.c | 226 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTString.c | 7 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTString.h | 4 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTTP.c | 36 | ||||
-rw-r--r-- | WWW/Library/Implementation/SGML.c | 523 | ||||
-rw-r--r-- | WWW/Library/Implementation/UCAux.h | 14 | ||||
-rw-r--r-- | WWW/Library/Implementation/UCMap.h | 5 |
16 files changed, 696 insertions, 307 deletions
diff --git a/WWW/Library/Implementation/HTChunk.c b/WWW/Library/Implementation/HTChunk.c index 497e5173..fd66a482 100644 --- a/WWW/Library/Implementation/HTChunk.c +++ b/WWW/Library/Implementation/HTChunk.c @@ -116,14 +116,15 @@ PUBLIC void HTChunkPutb ARGS3 (HTChunk *,ch, CONST char *,b, int,l) ch->size += l; } -#ifdef EXP_CHARTRANS - #define PUTC(code) ch->data[ch->size++] = (char)(code) #define PUTC2(code) ch->data[ch->size++] = (char)(0x80|(0x3f &(code))) -PUBLIC void HTChunkPutUtf8Char ARGS2 (HTChunk *,ch, UCode_t,code) +PUBLIC void HTChunkPutUtf8Char ARGS2( + HTChunk *, ch, + UCode_t, code) { int utflen; + if (code < 128) utflen = 1; else if (code < 0x800L) { @@ -148,7 +149,7 @@ PUBLIC void HTChunkPutUtf8Char ARGS2 (HTChunk *,ch, UCode_t,code) outofmem(__FILE__, "HTChunkPutUtf8Char"); } - switch(utflen) { + switch (utflen) { case 0: return; case 1: @@ -169,7 +170,7 @@ PUBLIC void HTChunkPutUtf8Char ARGS2 (HTChunk *,ch, UCode_t,code) case 6: PUTC(0xfc | (code>>30)); } - switch(utflen) { + switch (utflen) { case 6: PUTC2(code>>24); case 5: @@ -183,8 +184,6 @@ PUBLIC void HTChunkPutUtf8Char ARGS2 (HTChunk *,ch, UCode_t,code) } } -#endif /* EXP_CHARTRANS */ - /* Terminate a chunk ** ----------------- */ diff --git a/WWW/Library/Implementation/HTFTP.c b/WWW/Library/Implementation/HTFTP.c index 0e5b801b..701490e9 100644 --- a/WWW/Library/Implementation/HTFTP.c +++ b/WWW/Library/Implementation/HTFTP.c @@ -759,21 +759,21 @@ PRIVATE int get_connection ARGS2( } server_type = GENERIC_SERVER; /* reset */ if (status == 2) { /* Send username */ - { - char *cp; /* look at greeting text */ - if (strlen(response_text) > 4) { - if ((cp = strstr(response_text, " awaits your command")) || - (cp = strstr(response_text, " ready."))) { - *cp = '\0'; - } - cp = response_text + 4; - if (!strncasecomp(cp, "NetPresenz", 10)) - server_type = NETPRESENZ_SERVER; - } else { - cp = response_text; + char *cp; /* look at greeting text */ + + if (strlen(response_text) > 4) { + if ((cp = strstr(response_text, " awaits your command")) || + (cp = strstr(response_text, " ready."))) { + *cp = '\0'; } - StrAllocCopy(anchor->server, cp); + cp = response_text + 4; + if (!strncasecomp(cp, "NetPresenz", 10)) + server_type = NETPRESENZ_SERVER; + } else { + cp = response_text; } + StrAllocCopy(anchor->server, cp); + if (username && *username) { command = (char*)malloc(10+strlen(username)+2+1); if (command == NULL) @@ -2232,7 +2232,7 @@ PRIVATE EntryInfo * parse_dir_entry ARGS2( return(entry_info); } /* parse_dir_entry */ -PUBLIC int compare_EntryInfo_structs ARGS2( +PRIVATE int compare_EntryInfo_structs ARGS2( EntryInfo *, entry1, EntryInfo *, entry2) { diff --git a/WWW/Library/Implementation/HTFile.c b/WWW/Library/Implementation/HTFile.c index 45bd6464..6ef0fde1 100644 --- a/WWW/Library/Implementation/HTFile.c +++ b/WWW/Library/Implementation/HTFile.c @@ -873,16 +873,6 @@ PUBLIC HTFormat HTCharsetFormat ARGS3( *cp4 = '\0'; cp4 = cp3; chndl = UCGetLYhndl_byMIME(cp3); - if (chndl < 0) { - if (!strcmp(cp4, "cn-big5")) { - cp4 += 3; - chndl = UCGetLYhndl_byMIME(cp4); - } else if (!strncmp(cp4, "cn-gb", 5)) { - StrAllocCopy(cp3, "gb2312"); - cp4 = cp3; - chndl = UCGetLYhndl_byMIME(cp4); - } - } if (UCCanTranslateFromTo(chndl, current_char_set)) { chartrans_ok = YES; *cp1 = '\0'; @@ -1010,7 +1000,7 @@ PUBLIC HTFormat HTCharsetFormat ARGS3( HTAlert(anchor->charset); } else if (!strncmp(cp2, "koi8-r", 6) && !strncmp(LYchar_set_names[current_char_set], - "KOI8-R character set", 20)) { + "KOI8-R Cyrillic", 15)) { *cp1 = '\0'; format = HTAtom_for(cp); StrAllocCopy(anchor->charset, "koi8-r"); @@ -2446,7 +2436,7 @@ PUBLIC int HTLoadFile ARGS4( return status; } /* If succesfull open */ FREE(localname); - } /* scope of fp */ + } /* scope of fp */ } /* local unix file system */ #endif /* !NO_UNIX_IO */ #endif /* VMS */ @@ -2487,8 +2477,6 @@ PUBLIC int HTLoadFile ARGS4( return HTLoadError(sink, 403, "Can't access requested file."); } - - } /* diff --git a/WWW/Library/Implementation/HTMIME.c b/WWW/Library/Implementation/HTMIME.c index ecd61ac2..b694bb05 100644 --- a/WWW/Library/Implementation/HTMIME.c +++ b/WWW/Library/Implementation/HTMIME.c @@ -377,16 +377,6 @@ PRIVATE void HTMIME_put_character ARGS2( *cp4 = '\0'; cp4 = cp3; chndl = UCGetLYhndl_byMIME(cp3); - if (chndl < 0) { - if (!strcmp(cp4, "cn-big5")) { - cp4 += 3; - chndl = UCGetLYhndl_byMIME(cp4); - } else if (!strncmp(cp4, "cn-gb", 5)) { - StrAllocCopy(cp3, "gb2312"); - cp4 = cp3; - chndl = UCGetLYhndl_byMIME(cp4); - } - } if (UCCanTranslateFromTo(chndl, current_char_set)) { chartrans_ok = YES; @@ -535,7 +525,7 @@ PRIVATE void HTMIME_put_character ARGS2( } else if (!strncmp(cp2, "koi8-r", 6) && !strncmp(LYchar_set_names[current_char_set], - "KOI8-R character set", 20)) { + "KOI8-R Cyrillic", 15)) { *cp1 = '\0'; me->format = HTAtom_for(cp); StrAllocCopy(me->anchor->charset, diff --git a/WWW/Library/Implementation/HTML.h b/WWW/Library/Implementation/HTML.h index f1653cf1..42bf4638 100644 --- a/WWW/Library/Implementation/HTML.h +++ b/WWW/Library/Implementation/HTML.h @@ -146,12 +146,46 @@ struct _HTStructured { BOOL needBoldH; -#ifdef EXP_CHARTRANS - LYUCcharset * UCI; /* pointer to node_anchor's UCInfo */ - int UCLYhndl; /* tells us what charset we are fed */ - UCTransParams T; + /* + ** UCI and UCLYhndl give the UCInfo and charset registered for + ** the HTML parser in the node_anchor's UCStages structure. It + ** indicates what is fed to the HTML parser as the stream of character + ** data (not necessarily tags and attributes). It should currently + ** always be set to be the same as UCI and UCLhndl for the HTEXT stage + ** in the node_anchor's UCStages structure, since the HTML parser sends + ** its input character data to the output without further charset + ** translation. + */ + LYUCcharset * UCI; + int UCLYhndl; + /* + ** inUCI and inUCLYhndl indicate the UCInfo and charset which the + ** HTML parser treats at the input charset. It is normally set + ** to the UCI and UCLhndl for the SGML parser in the node_anchor's + ** UCStages structure (which may be a dummy, based on the MIME + ** parser's UCI and UCLhndl in that structure, when we are handling + ** a local file or non-http(s) gateway). It could be changed + ** temporarily by the HTML parser, for conversions of attribute + ** strings, but should be reset once done. - FM + */ + LYUCcharset * inUCI; + int inUCLYhndl; + /* + ** outUCI and outUCLYhndl indicate the UCInfo and charset which + ** the HTML parser treats as the output charset. It is normally + ** set to its own UCI and UCLhndl. It could be changed for + ** conversions of attribute strings, but should be reset once + ** done. - FM + */ + LYUCcharset * outUCI; + int outUCLYhndl; + /* + ** T holds the transformation rules for conversions of strings + ** between the input and output charsets by the HTML parser. - FM + */ + UCTransParams T; + int tag_charset; /* charset for attribute values etc. */ -#endif }; struct _HTStream { diff --git a/WWW/Library/Implementation/HTMLDTD.c b/WWW/Library/Implementation/HTMLDTD.c index cbeb35f9..e2ac932d 100644 --- a/WWW/Library/Implementation/HTMLDTD.c +++ b/WWW/Library/Implementation/HTMLDTD.c @@ -440,9 +440,8 @@ static CONST UC_entity_info extra_entities[] = { {"zcaron", 0x017e}, /* z with caron */ {"zdot", 0x017c}, /* z with dot above */ {"zeta", 0x03b6}, /* greek small letter zeta, u+03B6 ISOgrk3 -- */ - {"zwj", 8205}, /* zero width joiner */ - {"zwnj", 8204}, /* zero width non-joiner */ - + {"zwj", 8205}, /* zero width joiner */ + {"zwnj", 8204}, /* zero width non-joiner */ }; #endif /* EXP_CHARTRANS */ diff --git a/WWW/Library/Implementation/HTNews.c b/WWW/Library/Implementation/HTNews.c index adc9568b..bb3481d9 100644 --- a/WWW/Library/Implementation/HTNews.c +++ b/WWW/Library/Implementation/HTNews.c @@ -1010,11 +1010,11 @@ PRIVATE int read_article NOARGS } if (full_line[0] == '.') { - if (full_line[1] < ' ') { /* End of article? */ + if ((unsigned char)full_line[1] < ' ') { /* End of article? */ done = YES; break; } - } else if (full_line[0] < ' ') { + } else if ((unsigned char)full_line[0] < ' ') { break; /* End of Header? */ } else if (match(full_line, "SUBJECT:")) { @@ -1273,7 +1273,7 @@ PRIVATE int read_article NOARGS if (TRACE) fprintf(stderr, "B %s", line); if (line[0] == '.') { - if (line[1] < ' ') { /* End of article? */ + if ((unsigned char)line[1] < ' ') { /* End of article? */ done = YES; break; } else { /* Line starts with dot */ @@ -1411,6 +1411,7 @@ PRIVATE int read_list ARGS1(char *, arg) BOOL done = NO; BOOL head = NO; BOOL tail = NO; + BOOL skip_this_line = NO, skip_rest_of_line = NO; int listing = 0; char *pattern = NULL; int len = 0; @@ -1455,7 +1456,7 @@ PRIVATE int read_list ARGS1(char *, arg) START(HTML_DLC); PUTC('\n'); while (!done) { - char ch = *p++ = NEXT_CHAR; + char ch = NEXT_CHAR; if (ch == (char)EOF) { if (interrupted_in_htgetcharacter) { interrupted_in_htgetcharacter = 0; @@ -1470,13 +1471,47 @@ PRIVATE int read_list ARGS1(char *, arg) abort_socket(); /* End of file, close socket */ FREE(pattern); return(HT_LOADED); /* End of file on response */ + } else if (skip_this_line) { + if (ch == LF) { + skip_this_line = skip_rest_of_line = NO; + p = line; + } + continue; + } else if (skip_rest_of_line) { + if (ch != LF) { + continue; + } + } else if (p == &line[LINE_LENGTH]) { + if (TRACE) { + fprintf(stderr, "b %.*s%c[...]\n", (LINE_LENGTH), line, ch); + } + *p = '\0'; + if (ch == LF) { + ; /* Will be dealt with below */ + } else if (WHITE(ch)) { + ch = LF; /* May treat as line without description */ + skip_this_line = YES; /* ...and ignore until LF */ + } else if (strchr(line, ' ') == NULL && + strchr(line, '\t') == NULL) { + /* No separator found */ + if (TRACE) + fprintf(stderr, + "HTNews..... group name too long, discarding.\n"); + skip_this_line = YES; /* ignore whole line */ + continue; + } else { + skip_rest_of_line = YES; /* skip until ch == LF found */ + } + } else { + *p++ = ch; } - if ((ch == LF) || (p == &line[LINE_LENGTH])) { - *p++ = '\0'; /* Terminate the string */ + if (ch == LF) { + skip_rest_of_line = NO; /* done, reset flag */ + *p = '\0'; /* Terminate the string */ if (TRACE) fprintf(stderr, "B %s", line); if (line[0] == '.') { - if ((unsigned char)line[1] < ' ') { /* End of article? */ + if ((unsigned char)line[1] < ' ') { /* End of list? */ done = YES; break; } else { /* Line starts with dot */ @@ -1670,11 +1705,11 @@ PRIVATE int read_group ARGS3( return(HT_LOADED); /* End of file on response */ } if ((ch == '\n') || (p == &line[LINE_LENGTH])) { - *p++ = '\0'; /* Terminate the string */ + *p = '\0'; /* Terminate the string */ if (TRACE) fprintf(stderr, "X %s", line); if (line[0] == '.') { - if (line[1] < ' ') { /* End of article? */ + if (line[1] < ' ') { /* End of response? */ done = YES; break; } else { /* Line starts with dot */ @@ -1782,7 +1817,7 @@ PRIVATE int read_group ARGS3( switch(line[0]) { case '.': - done = (line[1] < ' '); /* End of article? */ + done = ((unsigned char)line[1] < ' '); /* End of response? */ break; case 'S': diff --git a/WWW/Library/Implementation/HTParse.c b/WWW/Library/Implementation/HTParse.c index 6f558ef5..ab5149b0 100644 --- a/WWW/Library/Implementation/HTParse.c +++ b/WWW/Library/Implementation/HTParse.c @@ -65,7 +65,7 @@ PRIVATE void scan ARGS2( char * p; #ifdef NOTDEFINED int length = strlen(name); -#endif +#endif /* NOTDEFINED */ parts->access = NULL; parts->host = NULL; @@ -89,7 +89,7 @@ PRIVATE void scan ARGS2( } #ifdef NOTDEFINED - for (p = (name + length-1); p >= name; p--) { + for (p = (name + length-1); p >= name; p--) {} #endif /* NOTDEFINED */ /* ** Scan left-to-right for a fragment (anchor). @@ -699,7 +699,10 @@ PUBLIC char * HTUnEscape ARGS1( while (*p != '\0') { if (*p == HEX_ESCAPE && - p[1] && p[2] && /* tests shouldn't be needed, but.. */ + /* + * Tests shouldn't be needed, but better safe than sorry. + */ + p[1] && p[2] && isxdigit((unsigned char)p[1]) && isxdigit((unsigned char)p[2])) { p++; diff --git a/WWW/Library/Implementation/HTParse.h b/WWW/Library/Implementation/HTParse.h index 63c84739..2f77f079 100644 --- a/WWW/Library/Implementation/HTParse.h +++ b/WWW/Library/Implementation/HTParse.h @@ -154,9 +154,11 @@ extern char * HTUnEscapeSome PARAMS(( char * str, CONST char * do_trans)); -/* Turn a string which is not a RFC 822 token into a quoted-string - kw */ +/* +** Turn a string which is not a RFC 822 token into a quoted-string. - KW +*/ extern void HTMake822Word PARAMS(( - char ** str)); + char ** str)); #endif /* HTPARSE_H */ diff --git a/WWW/Library/Implementation/HTPlain.c b/WWW/Library/Implementation/HTPlain.c index c95f55a8..277a6ebf 100644 --- a/WWW/Library/Implementation/HTPlain.c +++ b/WWW/Library/Implementation/HTPlain.c @@ -8,6 +8,7 @@ ** strings written must be less than buffer size. */ #include "HTUtils.h" +#include "tcp.h" #include "HTPlain.h" @@ -32,10 +33,8 @@ extern HTStyleSheet * styleSheet; extern int current_char_set; extern CONST char * LYchar_set_names[]; extern CONST char **LYCharSets[]; -#ifdef EXP_CHARTRANS extern int LYlowest_eightbit[]; extern BOOLEAN LYRawMode; -#endif /* EXP_CHARTRANS */ extern CONST char * HTMLGetEntityName PARAMS((int i)); extern BOOL HTPassEightBitRaw; extern BOOL HTPassHighCtrlRaw; @@ -47,24 +46,19 @@ PUBLIC int HTPlain_lastraw = -1; ** ----------- */ struct _HTStream { - CONST HTStreamClass * isa; - - HText * text; -#ifdef EXP_CHARTRANS + CONST HTStreamClass * isa; + HText * text; LYUCcharset * UCI; /* pointer to node_anchor's UCInfo */ int in_char_set; /* tells us what charset we are fed */ - int htext_char_set; /* what charset feed to HText */ - char utf_count; - long utf_char; - char utf_buf[7]; - char * utf_buf_p; - UCTransParams T; -#endif /* EXP_CHARTRANS */ + int htext_char_set; /* what charset we feed to HText */ + char utf_count; + UCode_t utf_char; + char utf_buf[8]; + char * utf_buf_p; + UCTransParams T; }; -#ifdef EXP_CHARTRANS - -PRIVATE char replace_buf [61]; /* buffer for replacement strings */ +PRIVATE char replace_buf [64]; /* buffer for replacement strings */ PRIVATE void HTPlain_getChartransInfo ARGS2( HTStream *, me, @@ -88,7 +82,6 @@ PRIVATE void HTPlain_getChartransInfo ARGS2( } me->UCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_PARSER); } -#endif /* EXP_CHARTRANS */ /* Write the buffer out to the socket ** ---------------------------------- @@ -140,13 +133,14 @@ PRIVATE void HTPlain_put_character ARGS2( HTPlain_lastraw = c; if (c == '\r') { HText_appendCharacter(me->text, '\n'); -#ifdef EXP_CHARTRANS - /* for now don't repeat everything here that has been done below - kw*/ - } else if ((unsigned char)c >= 127) { - HTPlain_write(me, &c, 1); -#endif } else if (HTCJK != NOCJK) { HText_appendCharacter(me->text, c); + } else if ((unsigned char)c >= 127) { + /* + ** For now, don't repeat everything here + ** that has been done below - KW + */ + HTPlain_write(me, &c, 1); } else if ((unsigned char)c >= 127 && (unsigned char)c < 161 && HTPassHighCtrlRaw) { HText_appendCharacter(me->text, c); @@ -159,7 +153,7 @@ PRIVATE void HTPlain_put_character ARGS2( HText_appendCharacter(me->text, c); } else if ((unsigned char)c > 160) { if (!HTPassEightBitRaw && - strncmp(LYchar_set_names[current_char_set], "ISO Latin 1", 11)) { + current_char_set != 0) { int len, high, low, i, diff = 1; CONST char * name; int value = (int)((unsigned char)c - 160); @@ -217,11 +211,10 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) { CONST char * p; CONST char * e = s+l; -#ifdef EXP_CHARTRANS BOOL chk; - long unsign_c, uck; + UCode_t code; + long uck; char c_p; -#endif /* EXP_CHARTRANS */ for (p = s; p < e; p++) { #ifdef REMOVE_CR_ONLY @@ -247,8 +240,7 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) HText_appendCharacter(me->text, '\n'); continue; } -#ifdef EXP_CHARTRANS - unsign_c = (unsigned char)(*p); + code = (unsigned char)*p; c_p = *p; if (me->T.decode_utf8) { @@ -257,20 +249,31 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) ** Incomplete characters silently ignored. ** from Linux kernel's console.c */ - if ((unsigned char)(*p) > 0x7f) { + if ((unsigned char)(*p) > 127) { if (me->utf_count > 0 && (*p & 0xc0) == 0x80) { me->utf_char = (me->utf_char << 6) | (*p & 0x3f); me->utf_count--; - *(me->utf_buf_p++) = *p; + *(me->utf_buf_p) = *p; + (me->utf_buf_p)++; if (me->utf_count == 0) { + /* + ** Got a complete multibyte character. + */ *(me->utf_buf_p) = '\0'; - unsign_c = me->utf_char; - if (unsign_c<256) c_p = (char)unsign_c; + code = me->utf_char; + if (code < 256) { + c_p = FROMASCII((char)code); + } + } else { + continue; /* iterate for more */ } - else continue; /* iterate for more */ } else { + /* + ** Accumulate a multibyte character. + */ me->utf_buf_p = me->utf_buf; - *(me->utf_buf_p++) = *p; + *(me->utf_buf_p) = *p; + (me->utf_buf_p)++; if ((*p & 0xe0) == 0xc0) { me->utf_count = 1; me->utf_char = (*p & 0x1f); @@ -286,14 +289,20 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) } else if ((*p & 0xfe) == 0xfc) { me->utf_count = 5; me->utf_char = (*p & 0x01); - } else { /* garbage */ + } else { + /* + * Garbage. + */ me->utf_count = 0; me->utf_buf_p = me->utf_buf; *(me->utf_buf_p) = '\0'; } continue; /* iterate for more */ } - } else { /* got an ASCII char */ + } else { + /* + ** Got an ASCII character. + */ me->utf_count = 0; me->utf_buf_p = me->utf_buf; *(me->utf_buf_p) = '\0'; @@ -301,25 +310,21 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) } if (me->T.trans_to_uni && - (unsign_c >= 127 || - (unsign_c < 32 && unsign_c != 0 && me->T.trans_C0_to_uni))) { - unsign_c = UCTransToUni(c_p, me->in_char_set); - if (unsign_c > 0) { - if (unsign_c < 256) { - c_p = (char)unsign_c; + (code >= 127 || + (code < 32 && code != 0 && me->T.trans_C0_to_uni))) { + code = UCTransToUni(c_p, me->in_char_set); + if (code > 0) { + if (code < 256) { + c_p = FROMASCII((char)code); } } } /* - ** At this point we have either unsign_c in Unicode - ** (and c in latin1 if unsign_c is in the latin1 range), - ** or unsign_c and c will have to be passed raw. + ** At this point we have either code in Unicode + ** (and c in latin1 if code is in the latin1 range), + ** or code and c will have to be passed raw. */ -#else -#define unsign_c (unsigned char)*p -#define c_p *p -#endif /* EXP_CHARTRANS */ /* ** If CJK mode is on, we'll assume the document matches ** the user's selected character set, and if not, the @@ -334,7 +339,7 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) #define PASSHI8BIT HTPassEightBitRaw #else #define PASSHICTRL (me->T.transp || \ - unsign_c >= LYlowest_eightbit[me->in_char_set]) + code >= LYlowest_eightbit[me->in_char_set]) #define PASS8859SPECL me->T.pass_160_173_raw #define PASSHI8BIT (HTPassEightBitRaw || \ (me->T.do_8bitraw && !me->T.trans_from_uni)) @@ -345,47 +350,75 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) ** document matches and pass 127-160 8-bit characters. If it ** doesn't match, the user should toggle raw/CJK mode off. - FM */ - } else if (unsign_c >= 127 && unsign_c < 161 && + } else if (code >= 127 && code < 161 && PASSHICTRL && PASS8859SPECL) { HText_appendCharacter(me->text, *p); - } else if (unsign_c == 173 && PASS8859SPECL) { + } else if (code == 173 && PASS8859SPECL) { HText_appendCharacter(me->text, *p); /* ** If neither HTPassHighCtrlRaw nor CJK is set, play it safe ** and treat 160 (nbsp) as an ASCII space (32). - FM */ - } else if (unsign_c == 160) { + } else if (code == 160) { HText_appendCharacter(me->text, ' '); /* ** If neither HTPassHighCtrlRaw nor CJK is set, play it safe ** and ignore 173 (shy). - FM */ - } else if (unsign_c == 173) { + } else if (code == 173) { continue; /* ** If we get to here, pass the displayable ASCII characters. - FM */ - } else if ((unsign_c >= 32 && unsign_c < 127) || -#ifdef EXP_CHARTRANS - (PASSHI8BIT && c_p>=LYlowest_eightbit[me->htext_char_set])|| -#endif + } else if ((code >= 32 && code < 127) || + (PASSHI8BIT && + c_p >= LYlowest_eightbit[me->htext_char_set]) || *p == '\n' || *p == '\t') { HText_appendCharacter(me->text, c_p); -#ifdef EXP_CHARTRANS } else if (me->T.use_raw_char_in) { HText_appendCharacter(me->text, *p); +#ifdef NOTUSED_FOTEMODS + /* + ** Use an ASCII space (32) for ensp, emsp or thinsp. - FM + */ + } else if (code == 8194 || code == 8195 || code == 8201) { + HText_appendCharacter(me->text, ' '); + /* + ** Use ASCII hyphen for 8211 (ndash/endash) + ** or 8212 (mdash/emdash). - FM + */ + } else if (code == 8211 || code == 8212) { + HText_appendCharacter(me->text, '-'); + /* + ** Ignore 8204 (zwnj) or 8205 (zwj), for now. - FM + */ + } else if (code == 8204 || code == 8205) { + if (TRACE) { + fprintf(stderr, + "HTPlain_write: Ignoring '%ld'.\n", code); + } + /* + ** Ignore 8206 (lrm) or 8207 (rlm), for now. - FM + */ + } else if (code == 8206 || code == 8207) { + if (TRACE) { + fprintf(stderr, + "HTPlain_write: Ignoring '%ld'.\n", code); + } +#endif /* NOTUSED_FOTEMODS */ + /****************************************************************** * I. LATIN-1 OR UCS2 TO DISPLAY CHARSET ******************************************************************/ - } else if ((chk = (me->T.trans_from_uni && unsign_c >= 160)) && - (uck = UCTransUniChar(unsign_c, + } else if ((chk = (me->T.trans_from_uni && code >= 160)) && + (uck = UCTransUniChar(code, me->htext_char_set)) >= 32 && uck < 256) { if (TRACE) { fprintf(stderr, - "UCTransUniChar returned 0x%lx:'%c'.\n", - uck, (char)uck); + "UCTransUniChar returned 0x%.2lX:'%c'.\n", + uck, FROMASCII((char)uck)); } HText_appendCharacter(me->text, (char)(uck & 0xff)); } else if (chk && @@ -394,7 +427,7 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) /* ** Not found; look for replacement string. */ - (uck = UCTransUniCharStr(replace_buf,60, unsign_c, + (uck = UCTransUniCharStr(replace_buf, 60, code, me->htext_char_set, 0) >= 0)) { /* ** No further tests for valididy - assume that whoever @@ -405,11 +438,20 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) ** If we get to here, and should have translated, ** translation has failed so far. */ - } else if (chk && unsign_c > 127 && me->T.output_utf8 && - *me->utf_buf) { - HText_appendText(me->text, me->utf_buf); - me->utf_buf_p = me->utf_buf; - *(me->utf_buf_p) = '\0'; + } else if (chk && code > 127 && me->T.output_utf8) { + /* + ** We want UTF-8 output, so do it now. - FM + */ + if (*me->utf_buf) { + HText_appendText(me->text, me->utf_buf); + me->utf_buf_p = me->utf_buf; + *(me->utf_buf_p) = '\0'; + } else if (UCConvertUniToUtf8(code, replace_buf)) { + HText_appendText(me->text, replace_buf); + } else { + sprintf(replace_buf, "U%.2lX", code); + HText_appendText(me->text, replace_buf); + } } else if (me->T.strip_raw_char_in && (unsigned char)*p >= 0xc0 && (unsigned char)*p < 255) { @@ -418,32 +460,30 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) ** (somewhat) readable ASCII. */ HText_appendCharacter(me->text, (char)(*p & 0x7f)); - } else if (me->T.trans_from_uni && unsign_c > 255) { + } else if (me->T.trans_from_uni && code > 255) { if (PASSHI8BIT && PASSHICTRL && LYRawMode && (unsigned char)*p >= LYlowest_eightbit[me->htext_char_set]) { HText_appendCharacter(me->text, *p); } else { - sprintf(replace_buf, "U%.2lx", unsign_c); + sprintf(replace_buf, "U%.2lX", code); HText_appendText(me->text, replace_buf); } -#endif /* EXP_CHARTRANS */ - /* ** If we get to here and HTPassEightBitRaw or the ** selected character set is not "ISO Latin 1", ** use the translation tables for 161-255 8-bit ** characters (173 was handled above). - FM */ - } else if (unsign_c > 160) { - if (!HTPassEightBitRaw && unsign_c <= 255 && - strncmp(LYchar_set_names[current_char_set], - "ISO Latin 1", 11)) { + } else if (code > 160) { + if (!HTPassEightBitRaw && code <= 255 && + me->htext_char_set != 0) { /* ** Attempt to translate. - FM */ int len, high, low, i, diff=1; CONST char * name; - int value = (int)(unsign_c - 160); + int value = (int)(code - 160); + name = HTMLGetEntityName(value); len = strlen(name); for(low = 0, high = HTML_dtd.number_of_entities; @@ -454,7 +494,7 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) diff = strncmp(HTML_dtd.entity_names[i], name, len); if (diff == 0) { HText_appendText(me->text, - LYCharSets[current_char_set][i]); + LYCharSets[me->htext_char_set][i]); break; } } @@ -466,7 +506,7 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) */ #ifdef EXP_CHARTRANS if (!PASSHI8BIT) - c_p = (char)unsign_c; + c_p = FROMASCII((char)code); if (me->T.output_utf8 && *me->utf_buf) { HText_appendText(me->text, me->utf_buf); @@ -474,7 +514,7 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) *(me->utf_buf_p) = '\0'; } else if (me->T.trans_from_uni) { - sprintf(replace_buf,"U%.2lx",unsign_c); + sprintf(replace_buf, "U%.2lX", code); HText_appendText(me->text, replace_buf); } else #endif /* EXP_CHARTRANS */ @@ -484,15 +524,12 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) /* ** Didn't attempt a translation. - FM */ -#ifdef EXP_CHARTRANS - /* either output as UTF8 or a hex representation or - ** pass the raw character and hope it's OK. - */ - if (unsign_c <= 255 && !PASSHI8BIT) - c_p = (char)unsign_c; - if (unsign_c > 127 && - me->T.output_utf8 && - *me->utf_buf) { + /* Either output as UTF8 or a hex representation or + ** pass the raw character and hope it's OK. + */ + if (code <= 255 && !PASSHI8BIT) + c_p = FROMASCII((char)code); + if (code > 127 && me->T.output_utf8 && *me->utf_buf) { HText_appendText(me->text, me->utf_buf); me->utf_buf_p = me->utf_buf; *(me->utf_buf_p) = '\0'; @@ -503,11 +540,10 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) (unsigned char)c_p >= LYlowest_eightbit[me->htext_char_set]) { HText_appendCharacter(me->text, c_p); - } else if (me->T.trans_from_uni && unsign_c >= 127) { - sprintf(replace_buf,"U%.2lx",unsign_c); + } else if (me->T.trans_from_uni && code >= 127) { + sprintf(replace_buf, "U%.2lX", code); HText_appendText(me->text, replace_buf); } else -#endif /* EXP_CHARTRANS */ HText_appendCharacter(me->text, c_p); } } @@ -541,7 +577,7 @@ PRIVATE void HTPlain_abort ARGS2( */ PUBLIC CONST HTStreamClass HTPlain = { - "SocketWriter", + "PlainPresenter", HTPlain_free, HTPlain_abort, HTPlain_put_character, HTPlain_put_string, HTPlain_write, @@ -566,7 +602,7 @@ PUBLIC HTStream* HTPlainPresent ARGS3( #ifdef EXP_CHARTRANS me->utf_count = 0; me->utf_char = 0; - me->utf_buf[0] = me->utf_buf[6] = '\0'; + me->utf_buf[0] = me->utf_buf[6] =me->utf_buf[7] = '\0'; me->utf_buf_p = me->utf_buf; me->htext_char_set = HTAnchor_getUCLYhndl(anchor,UCT_STAGE_HTEXT); diff --git a/WWW/Library/Implementation/HTString.c b/WWW/Library/Implementation/HTString.c index 3f3c9f4b..2ffa3e51 100644 --- a/WWW/Library/Implementation/HTString.c +++ b/WWW/Library/Implementation/HTString.c @@ -180,8 +180,11 @@ PUBLIC char * HTNextField ARGS1( ** found points to the delimiter found unless it was NULL. ** Returns a pointer to the first word or NULL on error */ -PUBLIC char * HTNextTok (char ** pstr, - const char * delims, const char * bracks, char * found) +PUBLIC char * HTNextTok ARGS4( + char **, pstr, + const char *, delims, + const char *, bracks, + char *, found) { char * p = *pstr; char * start = NULL; diff --git a/WWW/Library/Implementation/HTString.h b/WWW/Library/Implementation/HTString.h index ffc79e64..fb2d3397 100644 --- a/WWW/Library/Implementation/HTString.h +++ b/WWW/Library/Implementation/HTString.h @@ -43,8 +43,8 @@ Next word or quoted string extern char * HTNextField PARAMS ((char** pstr)); /* A more general parser - kw */ -extern char * HTNextTok (char ** pstr, - const char * delims, const char * bracks, char * found); +extern char * HTNextTok PARAMS((char ** pstr, + const char * delims, const char * bracks, char * found)); #endif /* diff --git a/WWW/Library/Implementation/HTTP.c b/WWW/Library/Implementation/HTTP.c index 2fc9145b..595f39bd 100644 --- a/WWW/Library/Implementation/HTTP.c +++ b/WWW/Library/Implementation/HTTP.c @@ -364,10 +364,20 @@ try_again: } if (!(LYUserSpecifiedURL || - LYNoRefererHeader || LYNoRefererForThis) && - strcmp((char *)HTLoadedDocumentURL(), "")) { + LYNoRefererHeader || LYNoRefererForThis) && + strcmp(HTLoadedDocumentURL(), "")) { + char *cp = HTLoadedDocumentURL(); StrAllocCat(command, "Referer: "); - StrAllocCat(command, (char *)HTLoadedDocumentURL()); + if (!strncasecomp(cp, "LYNXIMGMAP:", 11)) { + char *cp1 = strchr(cp, '#'); + if (cp1) + *cp1 = '\0'; + StrAllocCat(command, cp + 11); + if (cp1) + *cp1 = '#'; + } else { + StrAllocCat(command, cp); + } sprintf(line, "%c%c", CR, LF); StrAllocCat(command, line); } @@ -653,7 +663,7 @@ try_again: BOOL end_of_file = NO; int buffer_length = INIT_LINE_SIZE; - line_buffer = (char *) calloc(1, buffer_length * sizeof(char)); + line_buffer = (char *)calloc(1, (buffer_length * sizeof(char))); do {/* Loop to read in the first line */ /* @@ -662,7 +672,7 @@ try_again: if (buffer_length - length < LINE_EXTEND_THRESH) { buffer_length = buffer_length + buffer_length; line_buffer = - (char *) realloc(line_buffer, buffer_length * sizeof(char)); + (char *)realloc(line_buffer, (buffer_length * sizeof(char))); } if (TRACE) fprintf (stderr, "HTTP: Trying to read %d\n", @@ -672,8 +682,10 @@ try_again: if (TRACE) fprintf (stderr, "HTTP: Read %d\n", status); if (status <= 0) { - /* Retry if we get nothing back too; - bomb out if we get nothing twice. */ + /* + * Retry if we get nothing back too. + * Bomb out if we get nothing twice. + */ if (status == HT_INTERRUPTED) { if (TRACE) fprintf (stderr, "HTTP: Interrupted initial read.\n"); @@ -728,7 +740,7 @@ try_again: if (line_buffer) { FREE(line_kept_clean); - line_kept_clean = (char *)malloc (buffer_length * sizeof (char)); + line_kept_clean = (char *)malloc(buffer_length * sizeof(char)); memcpy(line_kept_clean, line_buffer, buffer_length); } @@ -924,8 +936,9 @@ try_again: * No Content. */ HTAlert(line_buffer); + HTTP_NETCLOSE(s, handle); status = HT_NO_DATA; - goto done; + goto clean_up; break; case 205: @@ -937,8 +950,9 @@ try_again: * document. - FM */ HTAlert("Request fulfilled. Reset Content."); + HTTP_NETCLOSE(s, handle); status = HT_NO_DATA; - goto done; + goto clean_up; break; case 206: @@ -951,7 +965,7 @@ try_again: HTAlert(line_buffer); HTTP_NETCLOSE(s, handle); status = HT_NO_DATA; - goto done; + goto clean_up; break; default: diff --git a/WWW/Library/Implementation/SGML.c b/WWW/Library/Implementation/SGML.c index 6e64714f..4af1fab9 100644 --- a/WWW/Library/Implementation/SGML.c +++ b/WWW/Library/Implementation/SGML.c @@ -44,6 +44,8 @@ PUBLIC BOOL HTPassEightBitNum = FALSE; /* Pass ^ numeric entities raw. */ PUBLIC BOOL HTPassHighCtrlRaw = FALSE; /* Pass 127-160,173, raw. */ PUBLIC BOOL HTPassHighCtrlNum = FALSE; /* Pass €-Ÿ raw. */ +extern UCode_t HTMLGetEntityUCValue PARAMS((CONST char *name)); +extern int LYlowest_eightbit[]; /* The State (context) of the parser ** @@ -79,6 +81,7 @@ struct _HTStream { HTTag *current_tag; CONST HTTag *unknown_tag; + BOOL inSELECT; int current_attribute_number; HTChunk *string; HTElement *element_stack; @@ -109,7 +112,6 @@ struct _HTStream { BOOL second_bracket; BOOL isHex; -#ifdef EXP_CHARTRANS HTParentAnchor * node_anchor; LYUCcharset * UCI; /* pointer to anchor UCInfo */ int in_char_set; /* charset we are fed */ @@ -121,7 +123,6 @@ struct _HTStream { char * utf_buf_p; UCTransParams T; int current_tag_charset; /* charset to pass attributes */ -#endif /* EXP_CHARTRANS */ char * recover; int recover_index; @@ -168,7 +169,8 @@ PRIVATE void set_chartrans_handling ARGS3( } else if (context->T.do_8bitraw || context->T.use_raw_char_in) { context->current_tag_charset = context->in_char_set; - } else if (context->T.trans_from_uni || context->T.output_utf8) { + } else if (context->T.output_utf8 || + context->T.trans_from_uni) { context->current_tag_charset = UCGetLYhndl_byMIME("unicode-1-1-utf-8"); } else { context->current_tag_charset = 0; @@ -321,6 +323,23 @@ PRIVATE BOOL put_special_unicodes ARGS2( ** Use ASCII hyphen for ndash/endash or mdash/emdash. */ PUTC('-'); +#ifdef NOTUSED_FOTEMODS + } else if (code == 8204 || code == 8205) { + /* + ** Ignore zwnj or zwj, for now. Note that zwnj may have + ** been handled as <WBR> by the calling function. - FM + */ + if (TRACE) { + fprintf(stderr, "put_special_unicodes: Ignoring '%ld'.\n", code); + } + } else if (code == 8206 || code == 8207) { + /* + ** Ignore lrm or rlm, for now. + */ + if (TRACE) { + fprintf(stderr, "put_special_unicodes: Ignoring '%ld'.\n", code); + } +#endif /* NOTUSED_FOTEMODS */ } else { /* ** Return NO if nothing done. @@ -348,28 +367,31 @@ PRIVATE BOOL put_special_unicodes ARGS2( ** ** Modified more (for use with Lynx character translation code): */ - -#ifdef EXP_CHARTRANS PRIVATE char replace_buf [64]; /* buffer for replacement strings */ -#endif - PRIVATE BOOL FoundEntity = FALSE; +#define IncludesLatin1Enc(cs) \ + (cs == 0 || \ + (context->htmlUCI && \ + (context->htmlUCI->enc & (UCT_CP_SUPERSETOF_LAT1)))) + PRIVATE void handle_entity ARGS2( HTStream *, context, char, term) { CONST char ** entities = context->dtd->entity_names; +#ifdef NOTDEFINED CONST UC_entity_info * extra_entities = context->dtd->extra_entity_info; - extern int current_char_set; int rc; +#endif /* NOTDEFINED */ + UCode_t code; + long uck; CONST char *s = context->string->data; int high, low, i, diff; /* - ** Use Lynx special characters directly for nbsp, ensp, emsp, - ** thinsp, and shy so we go through the HTML_put_character() - ** filters instead of using HTML_put_string(). - FM + ** Use Lynx special characters for nbsp (160), ensp (8194), + ** emsp (8195), thinsp (8201), and shy (173). - FM */ if (!strcmp(s, "nbsp")) { PUTC(HT_NON_BREAK_SPACE); @@ -387,13 +409,122 @@ PRIVATE void handle_entity ARGS2( return; } +#ifdef NOTUSED_FOTEMODS + /* + ** For ndash or endash (8211), and mdash or emdash (8212), + ** use an ASCII hyphen (32). - FM + */ + if (!strcmp(s, "ndash") || + !strcmp(s, "endash") || + !strcmp(s, "mdash") || + !strcmp(s, "endash")) { + PUTC('-'); + FoundEntity = TRUE; + return; + } + + /* + ** Ignore zwnj (8204) and zwj (8205), for now. + ** Note that zwnj may have been handled as <WBR> + ** by the calling function. - FM + */ + if (!strcmp(s, "zwnj") || + !strcmp(s, "zwnj")) { + if (TRACE) { + fprintf(stderr, "handle_entity: Ignoring '%s'.\n", s); + } + FoundEntity = TRUE; + return; + } + + /* + ** Ignore lrm (8206), and rln (8207), for now. - FM + */ + if (!strcmp(s, "lrm") || + !strcmp(s, "rlm")) { + if (TRACE) { + fprintf(stderr, "handle_entity: Ignoring '%s'.\n", s); + } + FoundEntity = TRUE; + return; + } +#endif /* NOTUSED_FOTEMODS */ + /* ** Handle all other entities normally. - FM */ FoundEntity = FALSE; + if ((code = HTMLGetEntityUCValue(s)) != 0) { + /* + ** We got a Unicode value for the entity name. + ** Check for special Unicodes. - FM + */ + if (put_special_unicodes(context, code)) { + FoundEntity = TRUE; + return; + } + /* + ** Seek a translation from the chartrans tables. + */ + if ((uck = UCTransUniChar(code, context->html_char_set)) >= 32 && + uck < 256 && + (uck < 127 || + uck >= LYlowest_eightbit[context->html_char_set])) { + if (uck == 160 && IncludesLatin1Enc(context->html_char_set)) { + /* + ** Would only happen if some other Unicode + ** is mapped to Latin-1 160. + */ + PUTC(HT_NON_BREAK_SPACE); + } else if (uck == 173 && IncludesLatin1Enc(context->html_char_set)) { + /* + ** Would only happen if some other Unicode + ** is mapped to Latin-1 173. + */ + PUTC(LY_SOFT_HYPHEN); + } else { + PUTC(FROMASCII((char)uck)); + } + FoundEntity = TRUE; + return; + } else if ((uck == -4 || + (context->T.repl_translated_C0 && + uck > 0 && uck < 32)) && + /* + ** Not found; look for replacement string. + */ + (uck = UCTransUniCharStr(replace_buf, 60, code, + context->html_char_set, 0) >= 0)) { + CONST char *p; + for (p = replace_buf; *p; p++) + PUTC(*p); + FoundEntity = TRUE; + return; + } + /* + ** If we're displaying UTF-8, try that now. - FM + */ + if (context->T.output_utf8 && PUTUTF8(code)) { + FoundEntity = TRUE; + return; + } + /* + ** If it's safe ASCII, use it. - FM + */ + if (code >= 32 && code < 127) { + PUTC(FROMASCII((char)code)); + FoundEntity = TRUE; + return; + } + } + + /* + ** We haven't succeeded yet, so try the old LYCharSets + ** arrays for translation strings. - FM + */ for (low = 0, high = context->dtd->number_of_entities; high > low; - diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */ + diff < 0 ? (low = i+1) : (high = i)) { /* Binary search */ i = (low + (high-low)/2); diff = strcmp(entities[i], s); /* Case sensitive! */ if (diff == 0) { /* success: found it */ @@ -402,38 +533,43 @@ PRIVATE void handle_entity ARGS2( return; } } -#ifdef EXP_CHARTRANS + +#ifdef NOTDEFINED /* repeat for extra entities if not found... hack... -kw */ if (TRACE) fprintf(stderr, "SGML: Unknown entity %s so far, checking extra...\n", s); for (low = 0, high = context->dtd->number_of_extra_entities; high > low; - diff < 0 ? (low = i+1) : (high = i)) { /* Binary serach */ - i = (low + (high-low)/2); - diff = strcmp(extra_entities[i].name, s); /* Case sensitive! */ - if (diff==0) { /* success: found it */ - if (put_special_unicodes(context, extra_entities[i].code)) { - FoundEntity = TRUE; - return; - } else if (context->T.output_utf8 && - PUTUTF8(extra_entities[i].code)) { - FoundEntity = TRUE; - return; - } + diff < 0 ? (low = i+1) : (high = i)) { /* Binary search */ + i = (low + (high - low)/2); + diff = strcmp(extra_entities[i].name, s); /* Case sensitive! */ + if (diff == 0) { /* success: found it */ + if (put_special_unicodes(context, extra_entities[i].code)) { + FoundEntity = TRUE; + return; + } else if (context->T.output_utf8 && + PUTUTF8(extra_entities[i].code)) { + FoundEntity = TRUE; + return; + } if ((rc = UCTransUniChar(extra_entities[i].code, - current_char_set)) > 0) { + context->html_char_set)) > 0 && + rc < 256) { /* - * Could do further checks here... - KW - */ - PUTC(rc); - FoundEntity = TRUE; - return; + ** Could do further checks here... - KW + */ + PUTC(FROMASCII((char)rc)); + FoundEntity = TRUE; + return; } else if ((rc == -4) && - /* Not found; look for replacement string */ + /* + ** Not found; look for replacement string. + */ (rc = UCTransUniCharStr(replace_buf, 60, extra_entities[i].code, - current_char_set, 0) >= 0)) { + context->html_char_set, + 0) >= 0)) { CONST char *p; for (p = replace_buf; *p; p++) PUTC(*p); @@ -442,13 +578,14 @@ PRIVATE void handle_entity ARGS2( } rc = (*context->actions->put_entity)(context->target, i+context->dtd->number_of_entities); - if (rc != HT_CANNOT_TRANSLATE) { - FoundEntity = TRUE; - return; - } + if (rc != HT_CANNOT_TRANSLATE) { + FoundEntity = TRUE; + return; + } } } -#endif +#endif /* NOTDEFINED */ + /* ** If entity string not found, display as text. */ @@ -625,6 +762,9 @@ PRIVATE void do_close_stacked ARGS1( HTElement * stacked = context->element_stack; if (!stacked) return; /* stack was empty */ + if (context->inSELECT && !strcasecomp(stacked->tag->name, "SELECT")) { + context->inSELECT = FALSE; + } (*context->actions->end_element)( context->target, stacked->tag - context->dtd->tags, @@ -635,8 +775,8 @@ PRIVATE void do_close_stacked ARGS1( PRIVATE int is_on_stack ARGS2( HTStream *, context, HTTag *, old_tag) -{ - HTElement * stacked = context->element_stack; +{ + HTElement * stacked = context->element_stack; int i = 1; for (; stacked; stacked = stacked->next, i++) { if (stacked->tag == old_tag) @@ -700,10 +840,36 @@ PRIVATE void end_element ARGS2( return; } } - /* Now let the old code deal with the rest... - kw */ + /* Now let the non-extended code deal with the rest. - kw */ #endif /* EXTENDED_HTMLDTD */ + /* + ** If we are in a SELECT block, ignore anything + ** but a SELECT end tag. - FM + */ + if (context->inSELECT) { + if (!strcasecomp(old_tag->name, "SELECT")) { + /* + ** Turn off the inSELECT flag and fall through. - FM + */ + context->inSELECT = FALSE; + } else { + /* + ** Ignore the end tag. - FM + */ + if (TRACE) { + fprintf(stderr, + "SGML: Ignoring end tag </%s> in SELECT block.\n", + old_tag->name); + } + return; + } + } + + /* + ** Handle the end tag. - FM + */ if (TRACE) fprintf(stderr, "SGML: End </%s>\n", old_tag->name); if (old_tag->contents == SGML_EMPTY) { @@ -832,10 +998,67 @@ PRIVATE void start_element ARGS1( new_tag->name); } } - /* fall through to the old code - kw */ + /* Fall through to the non-extended code - kw */ #endif /* EXTENDED_HTMLDTD */ + /* + ** If we are not in a SELECT block, check if this is + ** a SELECT start tag. Otherwise (i.e., we are in a + ** SELECT block) accept only OPTION as valid, terminate + ** the SELECT block if it is any other form-related + ** element, and otherwise ignore it. - FM + */ + if (!context->inSELECT) { + /* + ** We are not in a SELECT block, so check if this starts one. - FM + */ + if (!strcasecomp(new_tag->name, "SELECT")) { + /* + ** Set the inSELECT flag and fall through. - FM + */ + context->inSELECT = TRUE; + } + } else { + /* + ** We are in a SELECT block. - FM + */ + if (strcasecomp(new_tag->name, "OPTION")) { + /* + ** Ugh, it is not an OPTION. - FM + */ + if (!strcasecomp(new_tag->name, "INPUT") || + !strcasecomp(new_tag->name, "TEXTAREA") || + !strcasecomp(new_tag->name, "SELECT") || + !strcasecomp(new_tag->name, "BUTTON") || + !strcasecomp(new_tag->name, "FIELDSET") || + !strcasecomp(new_tag->name, "LABEL") || + !strcasecomp(new_tag->name, "LEGEND") || + !strcasecomp(new_tag->name, "FORM")) { + /* + ** It is another form-related start tag, so terminate + ** the current SELECT block and fall through. - FM + */ + if (TRACE) + fprintf(stderr, + "SGML: Faking SELECT end tag before <%s> start tag.\n", + new_tag->name); + end_element(context, SGMLFindTag(context->dtd, "SELECT")); + } else { + /* + ** Ignore the start tag. - FM + */ + if (TRACE) + fprintf(stderr, + "SGML: Ignoring start tag <%s> in SELECT block.\n", + new_tag->name); + return; + } + } + } + /* + ** Handle the start tag. - FM + */ if (TRACE) fprintf(stderr, "SGML: Start <%s>\n", new_tag->name); (*context->actions->start_element)( @@ -1016,11 +1239,7 @@ PUBLIC void SGML_character ARGS2( CONST SGML_dtd *dtd = context->dtd; HTChunk *string = context->string; CONST char * EntityName; - extern int current_char_set; extern CONST char * HTMLGetEntityName PARAMS((int i)); - -#ifdef EXP_CHARTRANS - extern int LYlowest_eightbit[]; char * p; BOOLEAN chk; /* Helps (?) walk through all the else ifs... */ UCode_t clong, uck; /* Enough bits for UCS4 ... */ @@ -1033,12 +1252,6 @@ PUBLIC void SGML_character ARGS2( */ #define unsign_c clong -#else -#define c c_in -#define unsign_c (unsigned char)c -#endif - -#ifdef EXP_CHARTRANS c = c_in; clong = (unsigned char)c; /* a.k.a. unsign_c */ @@ -1119,7 +1332,7 @@ PUBLIC void SGML_character ARGS2( if (clong > 0) { saved_char_in = c; if (clong < 256) { - c = (char)clong; + c = FROMASCII((char)clong); } } goto top1; @@ -1134,7 +1347,7 @@ PUBLIC void SGML_character ARGS2( (clong = UCTransToUni(c, context->in_char_set)) > 0))) { saved_char_in = c; if (clong < 256) { - c = (char)clong; + c = FROMASCII((char)clong); } goto top1; } else { @@ -1169,13 +1382,11 @@ PUBLIC void SGML_character ARGS2( goto top0a; } - /* At this point we have either unsign_c a.k.a. clong in Unicode - (and c in latin1 if clong is in the latin1 range), - or unsign_c and c will have to be passed raw. */ - -#endif /* EXP_CHARTRANS */ - - + /* + ** At this point we have either unsign_c a.k.a. clong in + ** Unicode (and c in latin1 if clong is in the latin1 range), + ** or unsign_c and c will have to be passed raw. - KW + */ top: #ifdef EXP_CHARTRANS saved_char_in = '\0'; @@ -1283,7 +1494,6 @@ top1: !(PASS8859SPECL || HTCJK != NOCJK)) { PUTC(LY_SOFT_HYPHEN); -#ifdef EXP_CHARTRANS } else if (context->T.use_raw_char_in && saved_char_in) { /* ** Only if the original character is still in saved_char_in, @@ -1300,8 +1510,8 @@ top1: uck < 256) { if (TRACE) { fprintf(stderr, - "UCTransUniChar returned 0x%lx:'%c'.\n", - uck, (char)uck); + "UCTransUniChar returned 0x%.2lX:'%c'.\n", + uck, FROMASCII((char)uck)); } c = (char)(uck & 0xff); PUTC(c); @@ -1325,8 +1535,6 @@ top1: */ } else if (context->T.output_utf8 && PUTUTF8(clong)) { ; /* do nothing more */ -#endif /* EXP_CHARTRANS */ - /* ** If it's any other (> 160) 8-bit chararcter, and ** we have not set HTPassEightBitRaw nor HTCJK, nor @@ -1337,14 +1545,10 @@ top1: #define PASSHI8BIT HTPassEightBitRaw #else #define PASSHI8BIT (HTPassEightBitRaw || (context->T.do_8bitraw && !context->T.trans_from_uni)) -#define IncludesLatin1Enc(cs) \ - (cs == 0 || \ - (context->htmlUCI && \ - (context->htmlUCI->enc & (UCT_CP_SUPERSETOF_LAT1)))) #endif /* EXP_CHARTRANS */ } else if (unsign_c > 160 && unsign_c < 256 && !(PASSHI8BIT || HTCJK != NOCJK) && - !IncludesLatin1Enc(current_char_set)) { + !IncludesLatin1Enc(context->html_char_set)) { int i; int value; @@ -1514,7 +1718,7 @@ top1: ** Check for a numeric entity. */ case S_cro: - if (unsign_c < 127 && (unsigned char)c == 'x') { + if (unsign_c < 127 && TOLOWER((unsigned char)c) == 'x') { context->isHex = TRUE; context->state = S_incro; } else if (unsign_c < 127 && isdigit((unsigned char)c)) { @@ -1628,17 +1832,20 @@ top1: /* * Seek a translation from the chartrans tables. */ - if ((uck = UCTransUniChar(value,current_char_set)) >= 32 && + if ((uck = UCTransUniChar(value, + context->html_char_set)) >= 32 && uck < 256 && (uck < 127 || uck >= LYlowest_eightbit[context->html_char_set])) { - if (uck == 160 && current_char_set == 0) { + if (uck == 160 && + IncludesLatin1Enc(context->html_char_set)) { /* ** Would only happen if some other Unicode ** is mapped to Latin-1 160. */ PUTC(HT_NON_BREAK_SPACE); - } else if (uck == 173 && current_char_set == 0) { + } else if (uck == 173 && + IncludesLatin1Enc(context->html_char_set)) { /* ** Would only happen if some other Unicode ** is mapped to Latin-1 173. @@ -1653,8 +1860,9 @@ top1: /* ** Not found; look for replacement string. */ - (uck = UCTransUniCharStr(replace_buf,60,value, - current_char_set, 0) >= 0 ) ) { + (uck = UCTransUniCharStr(replace_buf, 60, value, + context->html_char_set, + 0) >= 0)) { for (p = replace_buf; *p; p++) { PUTC(*p); } @@ -1676,6 +1884,34 @@ top1: context->isHex = FALSE; context->state = S_entity; goto top1; +#ifdef NOTUSED_FOTEMODS + /* + ** If the value is greater than 255 and we do not + ** have the "7-bit approximations" as our output + ** character set (in which case we did it already) + ** seek a translation for that. - FM + */ + } else if ((chk = ((code > 255) && + context->html_char_set != + UCGetLYhndl_byMIME("us-ascii"))) && + (uck = UCTransUniChar(code, + UCGetLYhndl_byMIME("us-ascii"))) + >= 32 && uck < 127) { + /* + ** Got an ASCII character (yippey). - FM + */ + PUTC(((char)(uck & 0xff))); + } else if ((chk && uck == -4) && + (uck = UCTransUniCharStr(replace_buf, + 60, code, + UCGetLYhndl_byMIME("us-ascii"), + 0) >= 0)) { + /* + ** Got a replacement string (yippey). - FM + */ + for (p = replace_buf; *p; p++) + PUTC(*p); +#endif /* NOTUSED_FOTEMODS */ /* ** Show the numeric entity if we get to here ** and the value: @@ -1690,13 +1926,13 @@ top1: ** - FM */ } else if ((value > 255) || - (value < 32 && - value != 9 && value != 10 && value != 13 && - HTCJK == NOCJK) || - (value == 127 && - !(HTPassHighCtrlRaw || HTCJK != NOCJK)) || - (value > 127 && value < 160 && - !HTPassHighCtrlNum)) { + (value < 32 && + value != 9 && value != 10 && value != 13 && + HTCJK == NOCJK) || + (value == 127 && + !(HTPassHighCtrlRaw || HTCJK != NOCJK)) || + (value > 127 && value < 160 && + !HTPassHighCtrlNum)) { if (value == 8194 || value == 8195 || value == 8201) { /* ** ensp, emsp or thinsp. - FM @@ -1728,7 +1964,7 @@ top1: goto top1; } } else if (value < 161 || HTPassEightBitNum || - IncludesLatin1Enc(current_char_set)) { + IncludesLatin1Enc(context->html_char_set)) { /* ** No conversion needed. - FM */ @@ -2283,11 +2519,13 @@ top1: } else if (HTCJK == NOCJK && (context->T.output_utf8 || context->T.trans_from_uni)) { if (clong == 0xfffd && saved_char_in && HTPassEightBitRaw && - (unsigned char)saved_char_in >= LYlowest_eightbit[current_char_set]) + (unsigned char)saved_char_in >= + LYlowest_eightbit[context->html_char_set]) { HTChunkPutUtf8Char(string, (0xf000 | (unsigned char)saved_char_in)); - else + } else { HTChunkPutUtf8Char(string, clong); + } } else if (saved_char_in && context->T.use_raw_char_in) { HTChunkPutc(string, saved_char_in); #endif /* EXP_CHARTRANS */ @@ -2318,11 +2556,13 @@ top1: } else if (HTCJK == NOCJK && (context->T.output_utf8 || context->T.trans_from_uni)) { if (clong == 0xfffd && saved_char_in && HTPassEightBitRaw && - (unsigned char)saved_char_in >= LYlowest_eightbit[current_char_set]) + (unsigned char)saved_char_in >= + LYlowest_eightbit[context->html_char_set]) { HTChunkPutUtf8Char(string, (0xf000 | (unsigned char)saved_char_in)); - else + } else { HTChunkPutUtf8Char(string, clong); + } } else if (saved_char_in && context->T.use_raw_char_in) { HTChunkPutc(string, saved_char_in); #endif /* EXP_CHARTRANS */ @@ -2357,11 +2597,13 @@ top1: } else if (HTCJK == NOCJK && (context->T.output_utf8 || context->T.trans_from_uni)) { if (clong == 0xfffd && saved_char_in && HTPassEightBitRaw && - (unsigned char)saved_char_in >= LYlowest_eightbit[current_char_set]) + (unsigned char)saved_char_in >= + LYlowest_eightbit[context->html_char_set]) { HTChunkPutUtf8Char(string, (0xf000 | (unsigned char)saved_char_in)); - else + } else { HTChunkPutUtf8Char(string, clong); + } } else if (saved_char_in && context->T.use_raw_char_in) { HTChunkPutc(string, saved_char_in); #endif /* EXP_CHARTRANS */ @@ -2425,34 +2667,6 @@ top1: } break; } else if (tag_OK && - !strcasecomp(string->data, "P")) { - /* - ** Treat a P end tag like a P start tag (Ugh, - ** what a hack! 8-). - FM - */ - if (TRACE) - fprintf(stderr, - "SGML: `</%s%c' found! Treating as '<%s%c'.\n", - string->data, c, string->data, c); - { - int i; - for (i = 0; - i < context->current_tag->number_of_attributes; - i++) { - context->present[i] = NO; - } - } - string->size = 0; - context->current_attribute_number = INVALID; - if (context->current_tag->name) - start_element(context); - if (c != '>') { - context->state = S_junk_tag; - } else { - context->state = S_text; - } - break; - } else if (tag_OK && (!strcasecomp(string->data, "A") || !strcasecomp(string->data, "B") || !strcasecomp(string->data, "BLINK") || @@ -2461,6 +2675,7 @@ top1: !strcasecomp(string->data, "FONT") || !strcasecomp(string->data, "FORM") || !strcasecomp(string->data, "I") || + !strcasecomp(string->data, "P") || !strcasecomp(string->data, "STRONG") || !strcasecomp(string->data, "TT") || !strcasecomp(string->data, "U"))) { @@ -2471,12 +2686,69 @@ top1: ** with checks there to avoid throwing the HTML.c stack ** out of whack (Ugh, what a hack! 8-). - FM */ - if (TRACE) - fprintf(stderr, "SGML: End </%s>\n", string->data); - (*context->actions->end_element) - (context->target, - (context->current_tag - context->dtd->tags), - (char **)&context->include); + if (context->inSELECT) { + /* + ** We are in a SELECT block. - FM + */ + if (strcasecomp(string->data, "FORM")) { + /* + ** It is not at FORM end tag, so ignore it. - FM + */ + if (TRACE) { + fprintf(stderr, + "SGML: Ignoring end tag </%s> in SELECT block.\n", + string->data); + } + } else { + /* + ** End the SELECT block and then + ** handle the FORM end tag. - FM + */ + if (TRACE) { + fprintf(stderr, + "SGML: Faking SELECT end tag before </%s> end tag.\n", + string->data); + } + end_element(context, + SGMLFindTag(context->dtd, "SELECT")); + if (TRACE) { + fprintf(stderr, + "SGML: End </%s>\n", string->data); + } + (*context->actions->end_element) + (context->target, + (context->current_tag - context->dtd->tags), + (char **)&context->include); + } + } else if (!strcasecomp(string->data, "P")) { + /* + ** Treat a P end tag like a P start tag (Ugh, + ** what a hack! 8-). - FM + */ + if (TRACE) + fprintf(stderr, + "SGML: `</%s%c' found! Treating as '<%s%c'.\n", + string->data, c, string->data, c); + { + int i; + for (i = 0; + i < context->current_tag->number_of_attributes; + i++) { + context->present[i] = NO; + } + } + if (context->current_tag->name) + start_element(context); + } else { + if (TRACE) { + fprintf(stderr, + "SGML: End </%s>\n", string->data); + } + (*context->actions->end_element) + (context->target, + (context->current_tag - context->dtd->tags), + (char **)&context->include); + } string->size = 0; context->current_attribute_number = INVALID; if (c != '>') { @@ -2764,6 +3036,7 @@ PUBLIC HTStream* SGML_new ARGS3( context->unknown_tag = &HTTag_unrecognized; context->state = S_text; context->element_stack = 0; /* empty */ + context->inSELECT = FALSE; #ifdef CALLERDATA context->callerData = (void*) callerData; #endif /* CALLERDATA */ diff --git a/WWW/Library/Implementation/UCAux.h b/WWW/Library/Implementation/UCAux.h index 36c17050..5b2d52ce 100644 --- a/WWW/Library/Implementation/UCAux.h +++ b/WWW/Library/Implementation/UCAux.h @@ -20,7 +20,9 @@ typedef enum { extern UCTQ_t UCCanUniTranslateFrom PARAMS((int from)); extern UCTQ_t UCCanTranslateUniTo PARAMS((int to)); extern UCTQ_t UCCanTranslateFromTo PARAMS((int from, int to)); -extern BOOL UCNeedNotTranslate PARAMS((int from, int to)); +extern BOOL UCNeedNotTranslate PARAMS(( + int from, + int to)); struct _UCTransParams { @@ -68,9 +70,17 @@ typedef void putc_func_t PARAMS(( HTStream * me, char ch)); +#ifndef UCMAP_H +#include "UCMap.h" +#endif /* UCMAP_H */ + extern BOOL UCPutUtf8_charstring PARAMS(( HTStream * target, putc_func_t * actions, - long code)); + UCode_t code)); +extern BOOL UCConvertUniToUtf8 PARAMS(( + UCode_t code, + char * buffer)); + #endif /* UCAUX_H */ diff --git a/WWW/Library/Implementation/UCMap.h b/WWW/Library/Implementation/UCMap.h index 017ebc92..e634f760 100644 --- a/WWW/Library/Implementation/UCMap.h +++ b/WWW/Library/Implementation/UCMap.h @@ -17,7 +17,10 @@ extern int UCTransChar PARAMS(( char ch_in, int charset_in, int charset_out)); -PUBLIC int UCReverseTransChar PARAMS((char ch_out, int charset_in, int charset_out)); +extern int UCReverseTransChar PARAMS(( + char ch_out, + int charset_in, + int charset_out)); extern int UCTransCharStr PARAMS(( char * outbuf, int buflen, |