diff options
Diffstat (limited to 'WWW/Library/Implementation/HTPlain.c')
-rw-r--r-- | WWW/Library/Implementation/HTPlain.c | 268 |
1 files changed, 150 insertions, 118 deletions
diff --git a/WWW/Library/Implementation/HTPlain.c b/WWW/Library/Implementation/HTPlain.c index 78f027ea..cc121ce6 100644 --- a/WWW/Library/Implementation/HTPlain.c +++ b/WWW/Library/Implementation/HTPlain.c @@ -42,28 +42,40 @@ PUBLIC int HTPlain_lastraw = -1; struct _HTStream { CONST HTStreamClass * isa; HText * text; - LYUCcharset * UCI; /* pointer to node_anchor's UCInfo */ - int in_char_set; /* tells us what charset we are fed */ - int htext_char_set; /* what charset we feed to HText */ + /* + ** The node_anchor UCInfo and handle for the input (PARSER) stage. - FM + */ + LYUCcharset * inUCI; + int inUCLYhndl; + /* + ** The node_anchor UCInfo and handle for the output (HTEXT) stage. - FM + */ + int outUCLYhndl; + /* + ** Counter, value, buffer and pointer for UTF-8 handling. - FM + */ char utf_count; UCode_t utf_char; - char utf_buf[8]; + char utf_buf[8]; char * utf_buf_p; + /* + ** The charset transformation structure. - FM + */ UCTransParams T; }; -PRIVATE char replace_buf [64]; /* buffer for replacement strings */ +PRIVATE char replace_buf [64]; /* buffer for replacement strings */ PRIVATE void HTPlain_getChartransInfo ARGS2( HTStream *, me, HTParentAnchor *, anchor) { - if (me->in_char_set < 0) { + if (me->inUCLYhndl < 0) { HTAnchor_copyUCInfoStage(anchor, UCT_STAGE_PARSER, UCT_STAGE_MIME, - UCT_SETBY_PARSER); - me->in_char_set = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_PARSER); + UCT_SETBY_PARSER); + me->inUCLYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_PARSER); } - if (me->htext_char_set < 0) { + if (me->outUCLYhndl < 0) { int chndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT); if (chndl < 0) { chndl = current_char_set; @@ -72,9 +84,9 @@ PRIVATE void HTPlain_getChartransInfo ARGS2( } HTAnchor_setUCInfoStage(anchor, chndl, UCT_STAGE_HTEXT, UCT_SETBY_DEFAULT); - me->htext_char_set = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT); + me->outUCLYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT); } - me->UCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_PARSER); + me->inUCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_PARSER); } /* Write the buffer out to the socket @@ -83,18 +95,13 @@ PRIVATE void HTPlain_getChartransInfo ARGS2( /*_________________________________________________________________________ ** -** A C T I O N R O U T I N E S +** A C T I O N R O U T I N E S */ - /* for forward reference to HTPlain_write - kw */ -#ifdef _WINDOWS -PRIVATE void HTPlain_write (HTStream * me, CONST char* s, int l); -#else PRIVATE void HTPlain_write PARAMS(( HTStream * me, CONST char * s, int l)); -#endif /* _WINDOWS */ /* Character handling ** ------------------ @@ -105,18 +112,18 @@ PRIVATE void HTPlain_put_character ARGS2( { #ifdef REMOVE_CR_ONLY /* - ** Throw away \r's. + ** Throw away \r's. */ if (c != '\r') { HText_appendCharacter(me->text, c); } #else /* - ** See HTPlain_write() for explanations of the following code - ** (we've been called via HTPlain_put_string() to do for each - ** character of a terminated string what HTPlain_write() does - ** via a while loop for each character in a stream of given - ** length). - FM + ** See HTPlain_write() for explanations of the following code + ** (we've been called via HTPlain_put_string() to do for each + ** character of a terminated string what HTPlain_write() does + ** via a while loop for each character in a stream of given + ** length). - FM */ if ((HTPlain_lastraw == '\r') && c == '\n') { HTPlain_lastraw = -1; @@ -134,12 +141,12 @@ PRIVATE void HTPlain_put_character ARGS2( */ HTPlain_write(me, &c, 1); } else if ((unsigned char)c >= 127 && (unsigned char)c < 161 && - HTPassHighCtrlRaw) { + HTPassHighCtrlRaw) { HText_appendCharacter(me->text, c); } else if ((unsigned char)c == 160) { HText_appendCharacter(me->text, ' '); } else if ((unsigned char)c == 173) { - return; + return; } else if (((unsigned char)c >= 32 && (unsigned char)c < 127) || c == '\n' || c == '\t') { HText_appendCharacter(me->text, c); @@ -148,7 +155,7 @@ PRIVATE void HTPlain_put_character ARGS2( current_char_set != 0) { int len, high, low, i, diff = 1; CONST char * name; - int value = (int)((unsigned char)c - 160); + UCode_t value = (UCode_t)((unsigned char)c - 160); name = HTMLGetEntityName(value); len = strlen(name); @@ -160,7 +167,7 @@ PRIVATE void HTPlain_put_character ARGS2( diff = strncmp(HTML_dtd.entity_names[i], name, len); if (diff == 0) { HText_appendText(me->text, - LYCharSets[current_char_set][i]); + LYCharSets[current_char_set][i]); break; } } @@ -189,7 +196,7 @@ PRIVATE void HTPlain_put_string ARGS2(HTStream *, me, CONST char*, s) if (s == NULL) return; for (p = s; *p; p++) { - HTPlain_put_character(me, *p); + HTPlain_put_character(me, *p); } #endif /* REMOVE_CR_ONLY */ } @@ -203,14 +210,15 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) { CONST char * p; CONST char * e = s+l; + char c; + unsigned char c_unsign; BOOL chk; UCode_t code; - long uck; - char c_p; + long uck = 0; for (p = s; p < e; p++) { #ifdef REMOVE_CR_ONLY - /* + /* ** Append the whole string, but remove any \r's. - FM */ if (*p != '\r') { @@ -232,82 +240,108 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) HText_appendCharacter(me->text, '\n'); continue; } - code = (unsigned char)*p; - c_p = *p; - + /* + ** Make sure the character is handled as Unicode + ** whenever that's appropriate. - FM + */ + c = *p; + c_unsign = (unsigned char)c; + code = (UCode_t)c_unsign; + /* + ** Combine any UTF-8 multibytes into Unicode + ** to check for special characters. - FM + */ if (me->T.decode_utf8) { /* - ** Combine UTF-8 into Unicode. - ** Incomplete characters silently ignored. - ** from Linux kernel's console.c + ** Combine UTF-8 into Unicode. + ** Incomplete characters silently ignored. + ** from Linux kernel's console.c - KW */ - if ((unsigned char)(*p) > 127) { - if (me->utf_count > 0 && (*p & 0xc0) == 0x80) { - me->utf_char = (me->utf_char << 6) | (*p & 0x3f); + if (c_unsign > 127) { + /* + ** We have an octet from a multibyte character. - FM + */ + if (me->utf_count > 0 && (c & 0xc0) == 0x80) { + /* + ** Adjust the UCode_t value, add the octet + ** to the buffer, and decrement the byte + ** count. - FM + */ + me->utf_char = (me->utf_char << 6) | (c & 0x3f); me->utf_count--; - *(me->utf_buf_p) = *p; + *(me->utf_buf_p) = c; (me->utf_buf_p)++; if (me->utf_count == 0) { - /* + /* ** Got a complete multibyte character. */ *(me->utf_buf_p) = '\0'; code = me->utf_char; if (code < 256) { - c_p = FROMASCII((char)code); + c = FROMASCII((char)code); } } else { - continue; /* iterate for more */ + /* + ** Get the next byte. - FM + */ + continue; } } else { /* - ** Accumulate a multibyte character. + ** Start handling a new multibyte character. - FM */ me->utf_buf_p = me->utf_buf; - *(me->utf_buf_p) = *p; + me->utf_buf_p[0] = c; (me->utf_buf_p)++; if ((*p & 0xe0) == 0xc0) { me->utf_count = 1; - me->utf_char = (*p & 0x1f); + me->utf_char = (c & 0x1f); } else if ((*p & 0xf0) == 0xe0) { me->utf_count = 2; - me->utf_char = (*p & 0x0f); + me->utf_char = (c & 0x0f); } else if ((*p & 0xf8) == 0xf0) { me->utf_count = 3; - me->utf_char = (*p & 0x07); + me->utf_char = (c & 0x07); } else if ((*p & 0xfc) == 0xf8) { me->utf_count = 4; - me->utf_char = (*p & 0x03); + me->utf_char = (c & 0x03); } else if ((*p & 0xfe) == 0xfc) { me->utf_count = 5; - me->utf_char = (*p & 0x01); + me->utf_char = (c & 0x01); } else { /* - * Garbage. + * We got garbage, so ignore it. - FM */ me->utf_count = 0; me->utf_buf_p = me->utf_buf; - *(me->utf_buf_p) = '\0'; + me->utf_buf_p[0] = '\0'; } - continue; /* iterate for more */ + /* + ** Get the next byte. - FM + */ + continue; } } else { - /* + /* ** Got an ASCII character. */ me->utf_count = 0; + me->utf_buf[0] = '\0'; me->utf_buf_p = me->utf_buf; - *(me->utf_buf_p) = '\0'; } } - + if (me->T.trans_to_uni && (code >= 127 || - (code < 32 && code != 0 && me->T.trans_C0_to_uni))) { - code = UCTransToUni(c_p, me->in_char_set); + (code < 32 && code != 0 && + me->T.trans_C0_to_uni))) { + /* + ** Convert the octet to Unicode. - FM + */ + code = (UCode_t)UCTransToUni(c, me->inUCLYhndl); if (code > 0) { if (code < 256) { - c_p = FROMASCII((char)code); + c = FROMASCII((char)code); } } } @@ -323,14 +357,13 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) ** user should toggle off raw/CJK mode to reload. - FM */ if (HTCJK != NOCJK) { - HText_appendCharacter(me->text, c_p); + HText_appendCharacter(me->text, c); #define PASSHICTRL (me->T.transp || \ - code >= LYlowest_eightbit[me->in_char_set]) + code >= LYlowest_eightbit[me->inUCLYhndl]) #define PASS8859SPECL me->T.pass_160_173_raw #define PASSHI8BIT (HTPassEightBitRaw || \ (me->T.do_8bitraw && !me->T.trans_from_uni)) - /* ** If HTPassHighCtrlRaw is set (e.g., for KOI8-R) assume the ** document matches and pass 127-160 8-bit characters. If it @@ -338,9 +371,9 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) */ } else if (code >= 127 && code < 161 && PASSHICTRL && PASS8859SPECL) { - HText_appendCharacter(me->text, *p); + HText_appendCharacter(me->text, c); } else if (code == 173 && PASS8859SPECL) { - HText_appendCharacter(me->text, *p); + HText_appendCharacter(me->text, c); /* ** If neither HTPassHighCtrlRaw nor CJK is set, play it safe ** and treat 160 (nbsp) as an ASCII space (32). - FM @@ -358,15 +391,15 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) */ } else if ((code >= 32 && code < 127) || (PASSHI8BIT && - c_p >= LYlowest_eightbit[me->htext_char_set]) || + c >= LYlowest_eightbit[me->outUCLYhndl]) || *p == '\n' || *p == '\t') { - HText_appendCharacter(me->text, c_p); + HText_appendCharacter(me->text, c); } else if (me->T.use_raw_char_in) { HText_appendCharacter(me->text, *p); -#ifdef NOTUSED_FOTEMODS +#ifdef NOTDEFINED /* - ** Use an ASCII space (32) for ensp, emsp or thinsp. - FM + ** Use an ASCII space (32) for ensp, emsp or thinsp. - FM */ } else if (code == 8194 || code == 8195 || code == 8201) { HText_appendCharacter(me->text, ' '); @@ -376,21 +409,21 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) */ } else if (code == 8211 || code == 8212) { HText_appendCharacter(me->text, '-'); -#endif /* NOTUSED_FOTEMODS */ +#endif /* NOTDEFINED */ /****************************************************************** * I. LATIN-1 OR UCS2 TO DISPLAY CHARSET - ******************************************************************/ + ******************************************************************/ } else if ((chk = (me->T.trans_from_uni && code >= 160)) && (uck = UCTransUniChar(code, - me->htext_char_set)) >= 32 && + me->outUCLYhndl)) >= 32 && uck < 256) { if (TRACE) { fprintf(stderr, "UCTransUniChar returned 0x%.2lX:'%c'.\n", uck, FROMASCII((char)uck)); } - HText_appendCharacter(me->text, (char)(uck & 0xff)); + HText_appendCharacter(me->text, ((char)(uck & 0xff))); } else if (chk && (uck == -4 || (me->T.repl_translated_C0 && uck > 0 && uck < 32)) && @@ -398,48 +431,48 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) ** Not found; look for replacement string. */ (uck = UCTransUniCharStr(replace_buf, 60, code, - me->htext_char_set, 0) >= 0)) { + me->outUCLYhndl, 0) >= 0)) { /* - ** No further tests for valididy - assume that whoever - ** defined replacement strings knew what she was doing. + ** No further tests for valididy - assume that whoever + ** defined replacement strings knew what she was doing. */ HText_appendText(me->text, replace_buf); /* ** If we get to here, and should have translated, - ** translation has failed so far. + ** translation has failed so far. */ } else if (chk && code > 127 && me->T.output_utf8) { /* - ** We want UTF-8 output, so do it now. - FM + ** We want UTF-8 output, so do it now. - FM */ if (*me->utf_buf) { HText_appendText(me->text, me->utf_buf); + me->utf_buf[0] = '\0'; me->utf_buf_p = me->utf_buf; - *(me->utf_buf_p) = '\0'; } else if (UCConvertUniToUtf8(code, replace_buf)) { HText_appendText(me->text, replace_buf); } else { sprintf(replace_buf, "U%.2lX", code); HText_appendText(me->text, replace_buf); } -#ifdef NOTUSED_FOTEMODS +#ifdef NOTDEFINED } else if (me->T.strip_raw_char_in && - (unsigned char)*p >= 0xc0 && + (unsigned char)*p >= 192 && (unsigned char)*p < 255) { /* - ** KOI special: strip high bit, gives - ** (somewhat) readable ASCII. + ** KOI special: strip high bit, gives + ** (somewhat) readable ASCII. */ HText_appendCharacter(me->text, (char)(*p & 0x7f)); /* - ** If we do not have the "7-bit approximations" as our - ** output character set (in which case we did it already) - ** seek a translation for that. Otherwise, or if the - ** translation fails, use UHHH notation. - FM + ** If we do not have the "7-bit approximations" as our + ** output character set (in which case we did it already) + ** seek a translation for that. Otherwise, or if the + ** translation fails, use UHHH notation. - FM */ } else if (chk && (chk = (!HTPassEightBitRaw && - (me->htext_char_set != + (me->outUCLYhndl != UCGetLYhndl_byMIME("us-ascii")))) && (uck = UCTransUniChar(code, UCGetLYhndl_byMIME("us-ascii"))) @@ -460,7 +493,7 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) HText_appendText(me->text, replace_buf); } else if (code == 8204 || code == 8205) { /* - ** Ignore 8204 (zwnj) or 8205 (zwj), if we get to here. - FM + ** Ignore 8204 (zwnj) or 8205 (zwj), if we get to here. - FM */ if (TRACE) { fprintf(stderr, @@ -468,16 +501,16 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) } } else if (code == 8206 || code == 8207) { /* - ** Ignore 8206 (lrm) or 8207 (rlm), if we get to here. - FM + ** Ignore 8206 (lrm) or 8207 (rlm), if we get to here. - FM */ if (TRACE) { fprintf(stderr, "HTPlain_write: Ignoring '%ld'.\n", code); } -#endif /* NOTUSED_FOTEMODS */ +#endif /* NOTDEFINED */ } else if (me->T.trans_from_uni && code > 255) { if (PASSHI8BIT && PASSHICTRL && LYRawMode && - (unsigned char)*p >= LYlowest_eightbit[me->htext_char_set]) { + (unsigned char)*p >= LYlowest_eightbit[me->outUCLYhndl]) { HText_appendCharacter(me->text, *p); } else { sprintf(replace_buf, "U%.2lX", code); @@ -491,9 +524,9 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) */ } else if (code > 160) { if (!HTPassEightBitRaw && code <= 255 && - me->htext_char_set != 0) { + me->outUCLYhndl != 0) { /* - ** Attempt to translate. - FM + ** Out of luck, so use the UHHH notation (ugh). - FM */ int len, high, low, i, diff=1; CONST char * name; @@ -509,18 +542,18 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) diff = strncmp(HTML_dtd.entity_names[i], name, len); if (diff == 0) { HText_appendText(me->text, - LYCharSets[me->htext_char_set][i]); + LYCharSets[me->outUCLYhndl][i]); break; } } if (diff) { /* - ** Something went wrong in the translation, so - ** either output as UTF8 or a hex representation or - ** pass the raw character and hope it's OK. + ** Something went wrong in the translation, so + ** either output as UTF8 or a hex representation or + ** pass the raw character and hope it's OK. */ if (!PASSHI8BIT) - c_p = FROMASCII((char)code); + c = FROMASCII((char)code); if (me->T.output_utf8 && *me->utf_buf) { HText_appendText(me->text, me->utf_buf); @@ -531,33 +564,33 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) sprintf(replace_buf, "U%.2lX", code); HText_appendText(me->text, replace_buf); } else - HText_appendCharacter(me->text, c_p); + HText_appendCharacter(me->text, c); } } else { - /* + /* ** Didn't attempt a translation. - FM */ /* Either output as UTF8 or a hex representation or ** pass the raw character and hope it's OK. */ if (code <= 255 && !PASSHI8BIT) - c_p = FROMASCII((char)code); + c = FROMASCII((char)code); if (code > 127 && me->T.output_utf8 && *me->utf_buf) { HText_appendText(me->text, me->utf_buf); me->utf_buf_p = me->utf_buf; *(me->utf_buf_p) = '\0'; } else if (LYRawMode && - me->in_char_set != me->htext_char_set && + me->inUCLYhndl != me->outUCLYhndl && (PASSHI8BIT || PASSHICTRL) && - (unsigned char)c_p >= - LYlowest_eightbit[me->htext_char_set]) { - HText_appendCharacter(me->text, c_p); + (unsigned char)c >= + LYlowest_eightbit[me->outUCLYhndl]) { + HText_appendCharacter(me->text, c); } else if (me->T.trans_from_uni && code >= 127) { sprintf(replace_buf, "U%.2lX", code); HText_appendText(me->text, replace_buf); } else - HText_appendCharacter(me->text, c_p); + HText_appendCharacter(me->text, c); } } #endif /* REMOVE_CR_ONLY */ @@ -589,25 +622,25 @@ PRIVATE void HTPlain_abort ARGS2( ** ----------------------- */ PUBLIC CONST HTStreamClass HTPlain = -{ +{ "PlainPresenter", HTPlain_free, HTPlain_abort, - HTPlain_put_character, HTPlain_put_string, HTPlain_write, -}; + HTPlain_put_character, HTPlain_put_string, HTPlain_write, +}; /* New object ** ---------- */ PUBLIC HTStream* HTPlainPresent ARGS3( HTPresentation *, pres, - HTParentAnchor *, anchor, + HTParentAnchor *, anchor, HTStream *, sink) { HTStream* me = (HTStream*)malloc(sizeof(*me)); if (me == NULL) - outofmem(__FILE__, "HTPlain_new"); + outofmem(__FILE__, "HTPlain_new"); me->isa = &HTPlain; HTPlain_lastraw = -1; @@ -616,18 +649,17 @@ PUBLIC HTStream* HTPlainPresent ARGS3( me->utf_char = 0; me->utf_buf[0] = me->utf_buf[6] =me->utf_buf[7] = '\0'; me->utf_buf_p = me->utf_buf; - me->htext_char_set = - HTAnchor_getUCLYhndl(anchor,UCT_STAGE_HTEXT); - me->in_char_set = HTAnchor_getUCLYhndl(anchor,UCT_STAGE_PARSER); + me->outUCLYhndl = HTAnchor_getUCLYhndl(anchor,UCT_STAGE_HTEXT); + me->inUCLYhndl = HTAnchor_getUCLYhndl(anchor,UCT_STAGE_PARSER); HTPlain_getChartransInfo(me, anchor); UCSetTransParams(&me->T, - me->in_char_set, me->UCI, - me->htext_char_set, + me->inUCLYhndl, me->inUCI, + me->outUCLYhndl, HTAnchor_getUCInfoStage(anchor,UCT_STAGE_HTEXT)); + me->text = HText_new(anchor); HText_setStyle(me->text, HTStyleNamed(styleSheet, "Example")); HText_beginAppend(me->text); return (HTStream*) me; } - |