diff options
author | Thomas E. Dickey <dickey@invisible-island.net> | 1998-05-24 00:20:00 -0400 |
---|---|---|
committer | Thomas E. Dickey <dickey@invisible-island.net> | 1998-05-24 00:20:00 -0400 |
commit | 6e75abc094af51d02563fd5250d1b55e9bb96912 (patch) | |
tree | 467ddc6c66cc643b960c48f70c93907c1bc775aa /WWW | |
parent | becd7d203aa4cc62c6e4113a496c88f6eab9edaf (diff) | |
download | lynx-snapshots-6e75abc094af51d02563fd5250d1b55e9bb96912.tar.gz |
snapshot of project "lynx", label v2-8-1dev_13
Diffstat (limited to 'WWW')
-rw-r--r-- | WWW/Library/Implementation/HTAAProt.c | 4 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTPlain.c | 233 | ||||
-rw-r--r-- | WWW/Library/Implementation/SGML.c | 3 |
3 files changed, 122 insertions, 118 deletions
diff --git a/WWW/Library/Implementation/HTAAProt.c b/WWW/Library/Implementation/HTAAProt.c index 12c6361a..0c8dac86 100644 --- a/WWW/Library/Implementation/HTAAProt.c +++ b/WWW/Library/Implementation/HTAAProt.c @@ -179,7 +179,7 @@ PUBLIC int HTAA_getGid NOARGS CTRACE(tfp, "%s(%s) returned (%s:%s:%d:...)\n", "HTAA_getGid: getgrgid", current_prot->gid_name, - gr->gr_name, (int) gr->gr_passwd, (int) gr->gr_gid); + gr->gr_name, gr->gr_passwd, (int) gr->gr_gid); #endif return gr->gr_gid; } @@ -190,7 +190,7 @@ PUBLIC int HTAA_getGid NOARGS CTRACE(tfp, "%s(\"%s\") returned (%s:%s:%d:...)\n", "HTAA_getGid: getgrnam", current_prot->gid_name, - gr->gr_name, (int) gr->gr_passwd, (int) gr->gr_gid); + gr->gr_name, gr->gr_passwd, (int) gr->gr_gid); #endif return gr->gr_gid; } diff --git a/WWW/Library/Implementation/HTPlain.c b/WWW/Library/Implementation/HTPlain.c index df6cba9a..5caa4550 100644 --- a/WWW/Library/Implementation/HTPlain.c +++ b/WWW/Library/Implementation/HTPlain.c @@ -33,7 +33,6 @@ #define FREE(x) if (x) {free(x); x = NULL;} -extern BOOLEAN LYRawMode; extern BOOL HTPassEightBitRaw; extern BOOL HTPassHighCtrlRaw; extern HTCJKlang HTCJK; @@ -54,6 +53,7 @@ struct _HTStream { /* ** The node_anchor UCInfo and handle for the output (HTEXT) stage. - FM */ + LYUCcharset * outUCI; int outUCLYhndl; /* ** Counter, value, buffer and pointer for UTF-8 handling. - FM @@ -91,6 +91,7 @@ PRIVATE void HTPlain_getChartransInfo ARGS2( me->outUCLYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT); } me->inUCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_PARSER); + me->outUCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_HTEXT); } /* Write the buffer out to the socket @@ -136,14 +137,14 @@ PRIVATE void HTPlain_put_character ARGS2( HTPlain_lastraw = c; if (c == '\r') { HText_appendCharacter(me->text, '\n'); - } else if (HTCJK != NOCJK) { - HText_appendCharacter(me->text, c); } else if ((unsigned char)c >= 127) { /* ** For now, don't repeat everything here ** that has been done below - KW */ HTPlain_write(me, &c, 1); + } else if (HTCJK != NOCJK) { + HText_appendCharacter(me->text, c); } else if ((unsigned char)c >= 127 && (unsigned char)c < 161 && HTPassHighCtrlRaw) { HText_appendCharacter(me->text, c); @@ -156,9 +157,9 @@ PRIVATE void HTPlain_put_character ARGS2( HText_appendCharacter(me->text, c); } else if ((unsigned char)c > 160) { if (!HTPassEightBitRaw && - current_char_set != 0) { - size_t len, high, low, i; - int diff = 1; + !((me->outUCLYhndl == 0) || + (me->outUCI->enc & (UCT_CP_SUPERSETOF_LAT1)))) { + int len, high, low, i, diff = 1; CONST char * name; UCode_t value = (UCode_t)((unsigned char)c - 160); @@ -172,7 +173,7 @@ PRIVATE void HTPlain_put_character ARGS2( diff = strncmp(HTML_dtd.entity_names[i], name, len); if (diff == 0) { HText_appendText(me->text, - LYCharSets[current_char_set][i]); + LYCharSets[me->outUCLYhndl][i]); break; } } @@ -218,8 +219,8 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) char c; unsigned char c_unsign; BOOL chk; - UCode_t code; - long uck = 0; + UCode_t code, uck; + char saved_char_in = '\0'; for (p = s; p < e; p++) { #ifdef REMOVE_CR_ONLY @@ -252,6 +253,7 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) c = *p; c_unsign = (unsigned char)c; code = (UCode_t)c_unsign; + saved_char_in = '\0'; /* ** Combine any UTF-8 multibytes into Unicode ** to check for special characters. - FM @@ -282,8 +284,9 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) */ *(me->utf_buf_p) = '\0'; code = me->utf_char; - if (code < 256) { + if (code > 0 && code < 256) { c = FROMASCII((char)code); + c_unsign = (unsigned char)c; } } else { /* @@ -295,9 +298,8 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) /* ** Start handling a new multibyte character. - FM */ - me->utf_buf_p = me->utf_buf; me->utf_buf_p[0] = c; - (me->utf_buf_p)++; + me->utf_buf_p = &me->utf_buf[1]; if ((*p & 0xe0) == 0xc0) { me->utf_count = 1; me->utf_char = (c & 0x1f); @@ -318,24 +320,43 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) * We got garbage, so ignore it. - FM */ me->utf_count = 0; - me->utf_buf_p = me->utf_buf; me->utf_buf_p[0] = '\0'; + me->utf_buf_p = me->utf_buf; } /* ** Get the next byte. - FM */ continue; } - } else { + } else if (me->utf_count > 0) { /* - ** Got an ASCII character. + ** Got an ASCII character when expecting + ** UTF-8 multibytes, so ignore the buffered + ** multibye characters and fall through with + ** the current ASCII character. - FM */ me->utf_count = 0; me->utf_buf[0] = '\0'; me->utf_buf_p = me->utf_buf; + code = (UCode_t)c_unsign; + } else { + /* + ** Got a valid ASCII character, so fall + ** through with it. - FM + */ + code = (UCode_t)c_unsign; } } - + /* + ** Convert characters from non-UTF-8 charsets + ** to Unicode (if appropriate). - FM + */ + if (!(me->T.decode_utf8 && + (unsigned char)(*p) > 127)) { +#ifdef NOTDEFINED + if (me->T.strip_raw_char_in) + saved_char_in = c; +#endif /* NOTDEFINED */ if (me->T.trans_to_uni && (code >= LYlowest_eightbit[me->inUCLYhndl] || (code < 32 && code != 0 && @@ -345,10 +366,58 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) */ code = (UCode_t)UCTransToUni(c, me->inUCLYhndl); if (code > 0) { + saved_char_in = c; if (code < 256) { - c = FROMASCII((char)code); + c = FROMASCII((char)code); + c_unsign = (unsigned char)c; } } + } else if (code < 32 && code != 0 && + me->T.trans_C0_to_uni) { + /* + ** Quote from SGML.c: + ** "This else if may be too ugly to keep. - KW" + */ + if (me->T.trans_from_uni && + (((code = UCTransToUni(c, me->inUCLYhndl)) >= 32) || + (me->T.transp && + (code = UCTransToUni(c, me->inUCLYhndl)) > 0))) { + saved_char_in = c; + if (code < 256) { + c = FROMASCII((char)code); + c_unsign = (unsigned char)c; + } + } else { + uck = -1; + if (me->T.transp) { + uck = UCTransCharStr(replace_buf, 60, c, + me->inUCLYhndl, + me->inUCLYhndl, NO); + } + if (!me->T.transp || uck < 0) { + uck = UCTransCharStr(replace_buf, 60, c, + me->inUCLYhndl, + me->outUCLYhndl, YES); + } + if (uck == 0) { + continue; + } else if (uck < 0) { + me->utf_buf[0] = '\0'; + code = (unsigned char)c; + } else { + c = replace_buf[0]; + if (c && replace_buf[1]) { + HText_appendText(me->text, replace_buf); + continue; + } + } + me->utf_buf[0] = '\0'; + code = (unsigned char)c; + } /* Next line end of ugly stuff for C0. - KW */ + } else { + me->utf_buf[0] = '\0'; + code = (unsigned char)c; + } } /* ** At this point we have either code in Unicode @@ -399,17 +468,16 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) c >= LYlowest_eightbit[me->outUCLYhndl]) || *p == '\n' || *p == '\t') { HText_appendCharacter(me->text, c); - - } else if (me->T.use_raw_char_in) { - HText_appendCharacter(me->text, *p); -#ifdef NOTDEFINED /* ** Use an ASCII space (32) for ensp, emsp or thinsp. - FM */ } else if (code == 8194 || code == 8195 || code == 8201) { HText_appendCharacter(me->text, ' '); -#endif /* NOTDEFINED */ - + /* + ** If we want the raw character, pass it now. - FM + */ + } else if (me->T.use_raw_char_in && saved_char_in) { + HText_appendCharacter(me->text, saved_char_in); /****************************************************************** * I. LATIN-1 OR UCS2 TO DISPLAY CHARSET ******************************************************************/ @@ -460,23 +528,29 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) ** (somewhat) readable ASCII. */ HText_appendCharacter(me->text, (char)(*p & 0x7f)); +#endif /* NOTDEFINED */ + /* + ** If we don't actually want the character, + ** make it safe and output that now. - FM + */ + } else if ((c_unsign > 0 && + c_unsign < LYlowest_eightbit[me->outUCLYhndl]) || + (me->T.trans_from_uni && !HTPassEightBitRaw)) { /* ** If we do not have the "7-bit approximations" as our ** output character set (in which case we did it already) ** seek a translation for that. Otherwise, or if the ** translation fails, use UHHH notation. - FM */ - } else if (chk && - (chk = (!HTPassEightBitRaw && - (me->outUCLYhndl != - UCGetLYhndl_byMIME("us-ascii")))) && + if ((chk = (me->outUCLYhndl != + UCGetLYhndl_byMIME("us-ascii"))) && (uck = UCTransUniChar(code, UCGetLYhndl_byMIME("us-ascii"))) >= 32 && uck < 127) { /* ** Got an ASCII character (yippey). - FM */ - c = ((char)(uck & 0xff)); + c = FROMASCII((char)uck); HText_appendCharacter(me->text, c); } else if ((chk && uck == -4) && (uck = UCTransUniCharStr(replace_buf, @@ -486,104 +560,33 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) /* ** Got a repacement string (yippey). - FM */ - HText_appendText(me->text, replace_buf); - } else if (code == 8204 || code == 8205) { - /* - ** Ignore 8204 (zwnj) or 8205 (zwj), if we get to here. - FM - */ - CTRACE(tfp, "HTPlain_write: Ignoring '%ld'.\n", code); - } else if (code == 8206 || code == 8207) { - /* - ** Ignore 8206 (lrm) or 8207 (rlm), if we get to here. - FM - */ - CTRACE(tfp, "HTPlain_write: Ignoring '%ld'.\n", code); -#endif /* NOTDEFINED */ - } else if (me->T.trans_from_uni && code > 255) { - if (PASSHI8BIT && PASSHICTRL && LYRawMode && - (unsigned char)*p >= LYlowest_eightbit[me->outUCLYhndl]) { - HText_appendCharacter(me->text, *p); - } else { - sprintf(replace_buf, "U%.2lX", code); HText_appendText(me->text, replace_buf); - } - /* - ** If we get to here and HTPassEightBitRaw or the - ** selected character set is not "ISO Latin 1", - ** use the translation tables for 161-255 8-bit - ** characters (173 was handled above). - FM - */ - } else if (code > 160) { - if (!HTPassEightBitRaw && code <= 255 && - me->outUCLYhndl != 0) { + } else if (code == 8204 || code == 8205) { + /* + ** Ignore 8204 (zwnj) or 8205 (zwj), if we get to here. - FM + */ + CTRACE(tfp, "HTPlain_write: Ignoring '%ld'.\n", code); + } else if (code == 8206 || code == 8207) { + /* + ** Ignore 8206 (lrm) or 8207 (rlm), if we get to here. - FM + */ + CTRACE(tfp, "HTPlain_write: Ignoring '%ld'.\n", code); + } else { /* ** Out of luck, so use the UHHH notation (ugh). - FM */ - size_t len, high, low, i; - int diff = 1; - CONST char * name; - int value = (int)(code - 160); - - name = HTMLGetEntityName(value); - len = strlen(name); - for(low = 0, high = HTML_dtd.number_of_entities; - high > low; - diff < 0 ? (low = i+1) : (high = i)) { - /* Binary search */ - i = (low + (high-low)/2); - diff = strncmp(HTML_dtd.entity_names[i], name, len); - if (diff == 0) { - HText_appendText(me->text, - LYCharSets[me->outUCLYhndl][i]); - break; - } - } - if (diff) { - /* - ** Something went wrong in the translation, so - ** either output as UTF8 or a hex representation or - ** pass the raw character and hope it's OK. - */ - if (!PASSHI8BIT) - c = FROMASCII((char)code); - if (me->T.output_utf8 && - *me->utf_buf) { - HText_appendText(me->text, me->utf_buf); - me->utf_buf_p = me->utf_buf; - *(me->utf_buf_p) = '\0'; - - } else if (me->T.trans_from_uni) { + /* do not print UHHH for now sprintf(replace_buf, "U%.2lX", code); HText_appendText(me->text, replace_buf); - } else - HText_appendCharacter(me->text, c); + */ } - } else { /* - ** Didn't attempt a translation. - FM + ** If we get to here and have a monobyte character, + ** pass it. - FM */ - /* Either output as UTF8 or a hex representation or - ** pass the raw character and hope it's OK. - */ - if (code <= 255 && !PASSHI8BIT) - c = FROMASCII((char)code); - if (code > 127 && me->T.output_utf8 && *me->utf_buf) { - HText_appendText(me->text, me->utf_buf); - me->utf_buf_p = me->utf_buf; - *(me->utf_buf_p) = '\0'; - - } else if (LYRawMode && - me->inUCLYhndl != me->outUCLYhndl && - (PASSHI8BIT || PASSHICTRL) && - (unsigned char)c >= - LYlowest_eightbit[me->outUCLYhndl]) { - HText_appendCharacter(me->text, c); - } else if (me->T.trans_from_uni && code >= 127) { - sprintf(replace_buf, "U%.2lX", code); - HText_appendText(me->text, replace_buf); - } else + } else if (c_unsign > 0 && c_unsign < 256) { HText_appendCharacter(me->text, c); } - } #endif /* REMOVE_CR_ONLY */ } } diff --git a/WWW/Library/Implementation/SGML.c b/WWW/Library/Implementation/SGML.c index 32243519..d2a84397 100644 --- a/WWW/Library/Implementation/SGML.c +++ b/WWW/Library/Implementation/SGML.c @@ -329,7 +329,8 @@ PRIVATE void handle_attribute_value ARGS2( ** ** Additional issue, like handling bidirectional text if necessary ** may be called from here: zwnj (8204), zwj (8205), lrm (8206), rlm (8207) -** - currently they are passed to def7_uni.tbl as regular characters. +** - currently they are ignored in SGML.c and LYCharUtils.c +** but also in UCdomap.c because they are non printable... ** */ PRIVATE BOOL put_special_unicodes ARGS2( |