diff options
author | Thomas E. Dickey <dickey@invisible-island.net> | 1998-11-10 19:47:00 -0500 |
---|---|---|
committer | Thomas E. Dickey <dickey@invisible-island.net> | 1998-11-10 19:47:00 -0500 |
commit | d3f9d5478df478427c2aa5db4507ddd0a38f0eb6 (patch) | |
tree | e27eacd6bbda653dd77f11cc020b9e0a59f7f4fc /WWW/Library/Implementation/HTPlain.c | |
parent | 18024037b515bfff83e0230b35151babe6005e18 (diff) | |
download | lynx-snapshots-d3f9d5478df478427c2aa5db4507ddd0a38f0eb6.tar.gz |
snapshot of project "lynx", label v2-8-2dev_2
Diffstat (limited to 'WWW/Library/Implementation/HTPlain.c')
-rw-r--r-- | WWW/Library/Implementation/HTPlain.c | 252 |
1 files changed, 122 insertions, 130 deletions
diff --git a/WWW/Library/Implementation/HTPlain.c b/WWW/Library/Implementation/HTPlain.c index 14b89bf8..81e46d72 100644 --- a/WWW/Library/Implementation/HTPlain.c +++ b/WWW/Library/Implementation/HTPlain.c @@ -7,8 +7,8 @@ ** Bugs: ** strings written must be less than buffer size. */ + #include <HTUtils.h> -#include <tcp.h> #include <HTPlain.h> @@ -31,9 +31,6 @@ #include <LYCharSets.h> #include <LYLeaks.h> -#define FREE(x) if (x) {free(x); x = NULL;} - -extern BOOLEAN LYRawMode; extern BOOL HTPassEightBitRaw; extern BOOL HTPassHighCtrlRaw; extern HTCJKlang HTCJK; @@ -54,6 +51,7 @@ struct _HTStream { /* ** The node_anchor UCInfo and handle for the output (HTEXT) stage. - FM */ + LYUCcharset * outUCI; int outUCLYhndl; /* ** Counter, value, buffer and pointer for UTF-8 handling. - FM @@ -91,6 +89,7 @@ PRIVATE void HTPlain_getChartransInfo ARGS2( me->outUCLYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT); } me->inUCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_PARSER); + me->outUCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_HTEXT); } /* Write the buffer out to the socket @@ -136,14 +135,14 @@ PRIVATE void HTPlain_put_character ARGS2( HTPlain_lastraw = c; if (c == '\r') { HText_appendCharacter(me->text, '\n'); - } else if (HTCJK != NOCJK) { - HText_appendCharacter(me->text, c); } else if ((unsigned char)c >= 127) { /* ** For now, don't repeat everything here ** that has been done below - KW */ HTPlain_write(me, &c, 1); + } else if (HTCJK != NOCJK) { + HText_appendCharacter(me->text, c); } else if ((unsigned char)c >= 127 && (unsigned char)c < 161 && HTPassHighCtrlRaw) { HText_appendCharacter(me->text, c); @@ -156,9 +155,9 @@ PRIVATE void HTPlain_put_character ARGS2( HText_appendCharacter(me->text, c); } else if ((unsigned char)c > 160) { if (!HTPassEightBitRaw && - current_char_set != 0) { - size_t len, high, low, i; - int diff = 1; + !((me->outUCLYhndl == LATIN1) || + (me->outUCI->enc & (UCT_CP_SUPERSETOF_LAT1)))) { + int len, high, low, i, diff = 1; CONST char * name; UCode_t value = (UCode_t)((unsigned char)c - 160); @@ -172,7 +171,7 @@ PRIVATE void HTPlain_put_character ARGS2( diff = strncmp(HTML_dtd.entity_names[i], name, len); if (diff == 0) { HText_appendText(me->text, - LYCharSets[current_char_set][i]); + LYCharSets[me->outUCLYhndl][i]); break; } } @@ -218,8 +217,8 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) char c; unsigned char c_unsign; BOOL chk; - UCode_t code; - long uck = 0; + UCode_t code, uck = -1; + char saved_char_in = '\0'; for (p = s; p < e; p++) { #ifdef REMOVE_CR_ONLY @@ -252,6 +251,7 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) c = *p; c_unsign = (unsigned char)c; code = (UCode_t)c_unsign; + saved_char_in = '\0'; /* ** Combine any UTF-8 multibytes into Unicode ** to check for special characters. - FM @@ -282,8 +282,9 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) */ *(me->utf_buf_p) = '\0'; code = me->utf_char; - if (code < 256) { + if (code > 0 && code < 256) { c = FROMASCII((char)code); + c_unsign = (unsigned char)c; } } else { /* @@ -295,9 +296,8 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) /* ** Start handling a new multibyte character. - FM */ - me->utf_buf_p = me->utf_buf; me->utf_buf_p[0] = c; - (me->utf_buf_p)++; + me->utf_buf_p = &me->utf_buf[1]; if ((*p & 0xe0) == 0xc0) { me->utf_count = 1; me->utf_char = (c & 0x1f); @@ -318,26 +318,45 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) * We got garbage, so ignore it. - FM */ me->utf_count = 0; - me->utf_buf_p = me->utf_buf; me->utf_buf_p[0] = '\0'; + me->utf_buf_p = me->utf_buf; } /* ** Get the next byte. - FM */ continue; } - } else { + } else if (me->utf_count > 0) { /* - ** Got an ASCII character. + ** Got an ASCII character when expecting + ** UTF-8 multibytes, so ignore the buffered + ** multibye characters and fall through with + ** the current ASCII character. - FM */ me->utf_count = 0; me->utf_buf[0] = '\0'; me->utf_buf_p = me->utf_buf; + code = (UCode_t)c_unsign; + } else { + /* + ** Got a valid ASCII character, so fall + ** through with it. - FM + */ + code = (UCode_t)c_unsign; } } - + /* + ** Convert characters from non-UTF-8 charsets + ** to Unicode (if appropriate). - FM + */ + if (!(me->T.decode_utf8 && + (unsigned char)(*p) > 127)) { +#ifdef NOTDEFINED + if (me->T.strip_raw_char_in) + saved_char_in = c; +#endif /* NOTDEFINED */ if (me->T.trans_to_uni && - (code >= 127 || + (code >= LYlowest_eightbit[me->inUCLYhndl] || (code < 32 && code != 0 && me->T.trans_C0_to_uni))) { /* @@ -345,10 +364,58 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) */ code = (UCode_t)UCTransToUni(c, me->inUCLYhndl); if (code > 0) { + saved_char_in = c; if (code < 256) { - c = FROMASCII((char)code); + c = FROMASCII((char)code); + c_unsign = (unsigned char)c; } } + } else if (code < 32 && code != 0 && + me->T.trans_C0_to_uni) { + /* + ** Quote from SGML.c: + ** "This else if may be too ugly to keep. - KW" + */ + if (me->T.trans_from_uni && + (((code = UCTransToUni(c, me->inUCLYhndl)) >= 32) || + (me->T.transp && + (code = UCTransToUni(c, me->inUCLYhndl)) > 0))) { + saved_char_in = c; + if (code < 256) { + c = FROMASCII((char)code); + c_unsign = (unsigned char)c; + } + } else { + uck = -1; + if (me->T.transp) { + uck = UCTransCharStr(replace_buf, 60, c, + me->inUCLYhndl, + me->inUCLYhndl, NO); + } + if (!me->T.transp || uck < 0) { + uck = UCTransCharStr(replace_buf, 60, c, + me->inUCLYhndl, + me->outUCLYhndl, YES); + } + if (uck == 0) { + continue; + } else if (uck < 0) { + me->utf_buf[0] = '\0'; + code = (unsigned char)c; + } else { + c = replace_buf[0]; + if (c && replace_buf[1]) { + HText_appendText(me->text, replace_buf); + continue; + } + } + me->utf_buf[0] = '\0'; + code = (unsigned char)c; + } /* Next line end of ugly stuff for C0. - KW */ + } else { + me->utf_buf[0] = '\0'; + code = (unsigned char)c; + } } /* ** At this point we have either code in Unicode @@ -399,17 +466,16 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) c >= LYlowest_eightbit[me->outUCLYhndl]) || *p == '\n' || *p == '\t') { HText_appendCharacter(me->text, c); - - } else if (me->T.use_raw_char_in) { - HText_appendCharacter(me->text, *p); -#ifdef NOTDEFINED /* ** Use an ASCII space (32) for ensp, emsp or thinsp. - FM */ } else if (code == 8194 || code == 8195 || code == 8201) { HText_appendCharacter(me->text, ' '); -#endif /* NOTDEFINED */ - + /* + ** If we want the raw character, pass it now. - FM + */ + } else if (me->T.use_raw_char_in && saved_char_in) { + HText_appendCharacter(me->text, saved_char_in); /****************************************************************** * I. LATIN-1 OR UCS2 TO DISPLAY CHARSET ******************************************************************/ @@ -417,11 +483,8 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) (uck = UCTransUniChar(code, me->outUCLYhndl)) >= 32 && uck < 256) { - if (TRACE) { - fprintf(stderr, - "UCTransUniChar returned 0x%.2lX:'%c'.\n", + CTRACE(tfp, "UCTransUniChar returned 0x%.2lX:'%c'.\n", uck, FROMASCII((char)uck)); - } HText_appendCharacter(me->text, ((char)(uck & 0xff))); } else if (chk && (uck == -4 || @@ -463,23 +526,29 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) ** (somewhat) readable ASCII. */ HText_appendCharacter(me->text, (char)(*p & 0x7f)); +#endif /* NOTDEFINED */ + /* + ** If we don't actually want the character, + ** make it safe and output that now. - FM + */ + } else if ((c_unsign > 0 && + c_unsign < LYlowest_eightbit[me->outUCLYhndl]) || + (me->T.trans_from_uni && !HTPassEightBitRaw)) { /* ** If we do not have the "7-bit approximations" as our ** output character set (in which case we did it already) ** seek a translation for that. Otherwise, or if the ** translation fails, use UHHH notation. - FM */ - } else if (chk && - (chk = (!HTPassEightBitRaw && - (me->outUCLYhndl != - UCGetLYhndl_byMIME("us-ascii")))) && + if ((chk = (me->outUCLYhndl != + UCGetLYhndl_byMIME("us-ascii"))) && (uck = UCTransUniChar(code, UCGetLYhndl_byMIME("us-ascii"))) >= 32 && uck < 127) { /* ** Got an ASCII character (yippey). - FM */ - c = ((char)(uck & 0xff)); + c = FROMASCII((char)uck); HText_appendCharacter(me->text, c); } else if ((chk && uck == -4) && (uck = UCTransUniCharStr(replace_buf, @@ -489,110 +558,33 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) /* ** Got a repacement string (yippey). - FM */ - HText_appendText(me->text, replace_buf); - } else if (code == 8204 || code == 8205) { - /* - ** Ignore 8204 (zwnj) or 8205 (zwj), if we get to here. - FM - */ - if (TRACE) { - fprintf(stderr, - "HTPlain_write: Ignoring '%ld'.\n", code); - } - } else if (code == 8206 || code == 8207) { - /* - ** Ignore 8206 (lrm) or 8207 (rlm), if we get to here. - FM - */ - if (TRACE) { - fprintf(stderr, - "HTPlain_write: Ignoring '%ld'.\n", code); - } -#endif /* NOTDEFINED */ - } else if (me->T.trans_from_uni && code > 255) { - if (PASSHI8BIT && PASSHICTRL && LYRawMode && - (unsigned char)*p >= LYlowest_eightbit[me->outUCLYhndl]) { - HText_appendCharacter(me->text, *p); - } else { - sprintf(replace_buf, "U%.2lX", code); HText_appendText(me->text, replace_buf); - } - /* - ** If we get to here and HTPassEightBitRaw or the - ** selected character set is not "ISO Latin 1", - ** use the translation tables for 161-255 8-bit - ** characters (173 was handled above). - FM - */ - } else if (code > 160) { - if (!HTPassEightBitRaw && code <= 255 && - me->outUCLYhndl != 0) { + } else if (code == 8204 || code == 8205) { + /* + ** Ignore 8204 (zwnj) or 8205 (zwj), if we get to here. - FM + */ + CTRACE(tfp, "HTPlain_write: Ignoring '%ld'.\n", code); + } else if (code == 8206 || code == 8207) { + /* + ** Ignore 8206 (lrm) or 8207 (rlm), if we get to here. - FM + */ + CTRACE(tfp, "HTPlain_write: Ignoring '%ld'.\n", code); + } else { /* ** Out of luck, so use the UHHH notation (ugh). - FM */ - size_t len, high, low, i; - int diff = 1; - CONST char * name; - int value = (int)(code - 160); - - name = HTMLGetEntityName(value); - len = strlen(name); - for(low = 0, high = HTML_dtd.number_of_entities; - high > low; - diff < 0 ? (low = i+1) : (high = i)) { - /* Binary search */ - i = (low + (high-low)/2); - diff = strncmp(HTML_dtd.entity_names[i], name, len); - if (diff == 0) { - HText_appendText(me->text, - LYCharSets[me->outUCLYhndl][i]); - break; - } - } - if (diff) { - /* - ** Something went wrong in the translation, so - ** either output as UTF8 or a hex representation or - ** pass the raw character and hope it's OK. - */ - if (!PASSHI8BIT) - c = FROMASCII((char)code); - if (me->T.output_utf8 && - *me->utf_buf) { - HText_appendText(me->text, me->utf_buf); - me->utf_buf_p = me->utf_buf; - *(me->utf_buf_p) = '\0'; - - } else if (me->T.trans_from_uni) { + /* do not print UHHH for now sprintf(replace_buf, "U%.2lX", code); HText_appendText(me->text, replace_buf); - } else - HText_appendCharacter(me->text, c); + */ } - } else { /* - ** Didn't attempt a translation. - FM + ** If we get to here and have a monobyte character, + ** pass it. - FM */ - /* Either output as UTF8 or a hex representation or - ** pass the raw character and hope it's OK. - */ - if (code <= 255 && !PASSHI8BIT) - c = FROMASCII((char)code); - if (code > 127 && me->T.output_utf8 && *me->utf_buf) { - HText_appendText(me->text, me->utf_buf); - me->utf_buf_p = me->utf_buf; - *(me->utf_buf_p) = '\0'; - - } else if (LYRawMode && - me->inUCLYhndl != me->outUCLYhndl && - (PASSHI8BIT || PASSHICTRL) && - (unsigned char)c >= - LYlowest_eightbit[me->outUCLYhndl]) { - HText_appendCharacter(me->text, c); - } else if (me->T.trans_from_uni && code >= 127) { - sprintf(replace_buf, "U%.2lX", code); - HText_appendText(me->text, replace_buf); - } else + } else if (c_unsign != 0 && c_unsign < 256) { HText_appendCharacter(me->text, c); } - } #endif /* REMOVE_CR_ONLY */ } } @@ -658,7 +650,7 @@ PUBLIC HTStream* HTPlainPresent ARGS3( HTAnchor_getUCInfoStage(anchor,UCT_STAGE_HTEXT)); me->text = HText_new(anchor); - HText_setStyle(me->text, styles[HTML_XMP] ); + HText_setStyle(me->text, LYstyles(HTML_XMP) ); HText_beginAppend(me->text); return (HTStream*) me; |