diff options
author | Thomas E. Dickey <dickey@invisible-island.net> | 1997-09-12 09:32:20 -0400 |
---|---|---|
committer | Thomas E. Dickey <dickey@invisible-island.net> | 1997-09-12 09:32:20 -0400 |
commit | 2f5222ea2a4d549b4f269c11d34016fc7dc58dde (patch) | |
tree | a71f2059819076b2adf4922b25d7cbe7d217c3fe /WWW | |
parent | 549ec595d1da7693d5f7730e63f539cc8452307f (diff) | |
download | lynx-snapshots-2f5222ea2a4d549b4f269c11d34016fc7dc58dde.tar.gz |
snapshot of project "lynx", label v2-7-1ac_0-64
Diffstat (limited to 'WWW')
-rw-r--r-- | WWW/Library/Implementation/CommonMakefile | 4 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTAnchor.c | 4 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTFile.c | 2 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTInit.h | 1 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTMIME.c | 5 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTMLDTD.c | 4 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTMLDTD.h | 5 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTMLGen.c | 223 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTPlain.c | 8 | ||||
-rw-r--r-- | WWW/Library/Implementation/SGML.c | 64 | ||||
-rw-r--r-- | WWW/Library/Implementation/UCAux.h | 2 | ||||
-rw-r--r-- | WWW/Library/Implementation/UCDefs.h | 4 | ||||
-rw-r--r-- | WWW/Library/Implementation/tcp.h | 4 |
13 files changed, 271 insertions, 59 deletions
diff --git a/WWW/Library/Implementation/CommonMakefile b/WWW/Library/Implementation/CommonMakefile index dea974b0..95a0c3a4 100644 --- a/WWW/Library/Implementation/CommonMakefile +++ b/WWW/Library/Implementation/CommonMakefile @@ -264,7 +264,7 @@ $(LOB)/HTRules.o : $(OE) $(CMN)HTRules.c $(CMN)HTUtils.h $(CMN)Version.make \ $(CMN)HTAAServ.h $(CMN)HTAAProt.h $(CC) -c -o $@ $(CFLAGS2) -DVC=\"$(VC)\" $(CMN)HTRules.c -$(LOB)/SGML.o : $(OE) $(CMN)SGML.c $(CMN)HTUtils.h +$(LOB)/SGML.o : $(OE) $(CMN)SGML.c $(CMN)HTUtils.h $(CMN)UCAux.h $(CC) -c -o $@ $(CFLAGS2) $(CMN)SGML.c $(LOB)/HTMLGen.o : $(OE) $(CMN)HTMLGen.c $(CMN)HTUtils.h $(CMN)HTMLDTD.h @@ -273,7 +273,7 @@ $(LOB)/HTMLGen.o : $(OE) $(CMN)HTMLGen.c $(CMN)HTUtils.h $(CMN)HTMLDTD.h $(LOB)/HTMLDTD.o : $(OE) $(CMN)HTMLDTD.c $(CMN)SGML.h $(CC) -c -o $@ $(CFLAGS2) $(CMN)HTMLDTD.c -$(LOB)/HTPlain.o : $(OE) $(CMN)HTPlain.c $(CMN)HTPlain.h $(CMN)HTStream.h +$(LOB)/HTPlain.o : $(OE) $(CMN)HTPlain.c $(CMN)HTPlain.h $(CMN)HTStream.h $(CMN)UCAux.h $(CC) -c -o $@ $(CFLAGS2) $(CMN)HTPlain.c $(LOB)/HTWAIS.o : $(OE) $(CMN)HTWAIS.c $(CMN)HTUtils.h $(CMN)HTList.h diff --git a/WWW/Library/Implementation/HTAnchor.c b/WWW/Library/Implementation/HTAnchor.c index 8c91f3fb..6db5f974 100644 --- a/WWW/Library/Implementation/HTAnchor.c +++ b/WWW/Library/Implementation/HTAnchor.c @@ -1313,8 +1313,8 @@ PUBLIC LYUCcharset * HTAnchor_copyUCInfoStage ARGS4( me->UCStages->s[to_stage].LYhndl = me->UCStages->s[from_stage].LYhndl; - memcpy(p_to, p_from, - sizeof(LYUCcharset)); + if (p_to != p_from) + memcpy(p_to, p_from, sizeof(LYUCcharset)); return p_to; } diff --git a/WWW/Library/Implementation/HTFile.c b/WWW/Library/Implementation/HTFile.c index 9e9a31a6..c9fdfe9b 100644 --- a/WWW/Library/Implementation/HTFile.c +++ b/WWW/Library/Implementation/HTFile.c @@ -865,7 +865,7 @@ PUBLIC HTFormat HTCharsetFormat ARGS3( UCT_STAGE_HTEXT, UCT_SETBY_DEFAULT); } - if (!(p_in->enc & UCT_ENC_CJK) && + if (p_in->enc != UCT_ENC_CJK && (p_in->codepoints & UCT_CP_SUBSETOF_LAT1)) { HTCJK = NOCJK; } else if (chndl == current_char_set) { diff --git a/WWW/Library/Implementation/HTInit.h b/WWW/Library/Implementation/HTInit.h index b11e7238..61c7d776 100644 --- a/WWW/Library/Implementation/HTInit.h +++ b/WWW/Library/Implementation/HTInit.h @@ -15,6 +15,7 @@ #endif /* HTUTILS_H */ extern void HTFormatInit NOPARAMS; +extern void HTPreparsedFormatInit NOPARAMS; extern void HTFileInit NOPARAMS; /* diff --git a/WWW/Library/Implementation/HTMIME.c b/WWW/Library/Implementation/HTMIME.c index 3b887b8c..129cc846 100644 --- a/WWW/Library/Implementation/HTMIME.c +++ b/WWW/Library/Implementation/HTMIME.c @@ -442,7 +442,7 @@ PRIVATE void HTMIME_put_character ARGS2( UCT_STAGE_MIME), UCT_STAGE_HTEXT, UCT_SETBY_DEFAULT); } - if (!(p_in->enc & UCT_ENC_CJK) && + if (p_in->enc != UCT_ENC_CJK && (p_in->codepoints & UCT_CP_SUBSETOF_LAT1)){ HTCJK = NOCJK; } else if (chndl == current_char_set) { @@ -1610,7 +1610,8 @@ PRIVATE void HTMIME_put_character ARGS2( fprintf(stderr, "HTMIME: PICKED UP Content-Encoding: '%s'\n", me->value); - if (!(me->value && *me->value)) + if (!(me->value && *me->value) || + !strcasecomp(me->value, "identity")) break; /* ** Convert to lowercase and indicate in anchor. - FM diff --git a/WWW/Library/Implementation/HTMLDTD.c b/WWW/Library/Implementation/HTMLDTD.c index 0466e8ba..7b5c0279 100644 --- a/WWW/Library/Implementation/HTMLDTD.c +++ b/WWW/Library/Implementation/HTMLDTD.c @@ -1659,7 +1659,7 @@ static HTTag tags_new[HTML_ELEMENTS] = { { "H4" , h_attr, HTML_H_ATTRIBUTES, SGML_MIXED,T_H4}, { "H5" , h_attr, HTML_H_ATTRIBUTES, SGML_MIXED,T_H5}, { "H6" , h_attr, HTML_H_ATTRIBUTES, SGML_MIXED,T_H6}, - { "HEAD" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_HEAD}, + { "HEAD" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_ELEMENT,T_HEAD}, { "HR" , hr_attr, HTML_HR_ATTRIBUTES, SGML_EMPTY,T_HR}, { "HTML" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_HTML}, { "HY" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_EMPTY,T_HY}, @@ -1677,7 +1677,7 @@ static HTTag tags_new[HTML_ELEMENTS] = { { "LI" , list_attr, HTML_LI_ATTRIBUTES, SGML_MIXED,T_LI}, { "LINK" , link_attr, HTML_LINK_ATTRIBUTES, SGML_EMPTY,T_LINK}, { "LISTING" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_LITTERAL,T_LISTING}, - { "MAP" , map_attr, HTML_MAP_ATTRIBUTES, SGML_MIXED,T_MAP}, + { "MAP" , map_attr, HTML_MAP_ATTRIBUTES, SGML_ELEMENT,T_MAP}, { "MARQUEE" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_MARQUEE}, { "MATH" , math_attr, HTML_MATH_ATTRIBUTES, SGML_LITTERAL,T_MATH}, { "MENU" , ulist_attr, HTML_UL_ATTRIBUTES, SGML_MIXED,T_MENU}, diff --git a/WWW/Library/Implementation/HTMLDTD.h b/WWW/Library/Implementation/HTMLDTD.h index 8c273d0d..f1688ffc 100644 --- a/WWW/Library/Implementation/HTMLDTD.h +++ b/WWW/Library/Implementation/HTMLDTD.h @@ -9,6 +9,11 @@ Tant pis. */ + +#define HT_NON_BREAK_SPACE ((char)1) /* For now */ +#define HT_EM_SPACE ((char)2) +#define LY_SOFT_HYPHEN '\007' + #ifndef HTMLDTD_H #define HTMLDTD_H diff --git a/WWW/Library/Implementation/HTMLGen.c b/WWW/Library/Implementation/HTMLGen.c index 55679ef3..9a1efaf5 100644 --- a/WWW/Library/Implementation/HTMLGen.c +++ b/WWW/Library/Implementation/HTMLGen.c @@ -15,7 +15,8 @@ #include "HTUtils.h" #include "tcp.h" -#define BUFFER_SIZE 80 /* Line buffer attempts to make neat breaks */ +#define BUFFER_SIZE 200 /* Line buffer attempts to make neat breaks */ +#define MAX_CLEANNESS 20 /* Implements: */ @@ -50,16 +51,29 @@ struct _HTStructured { HTStreamClass targetClass; /* COPY for speed */ char buffer[BUFFER_SIZE+1]; /* 1for NL */ + int buffer_maxchars; char * write_pointer; - char * line_break; + char * line_break [MAX_CLEANNESS+1]; int cleanness; - BOOL delete_line_break_char; + BOOL overflowed; + BOOL delete_line_break_char[MAX_CLEANNESS+1]; BOOL preformatted; + BOOL escape_specials; + BOOL in_attrval; }; /* Flush Buffer ** ------------ */ + +PRIVATE void flush_breaks (HTStructured * me) +{ + int i; + for (i=0; i<= MAX_CLEANNESS; i++) { + me->line_break[i] = NULL; + } +} + PRIVATE void HTMLGen_flush ARGS1( HTStructured *, me) { @@ -67,9 +81,26 @@ PRIVATE void HTMLGen_flush ARGS1( me->buffer, me->write_pointer - me->buffer); me->write_pointer = me->buffer; - me->line_break = me->buffer; + flush_breaks(me); me->cleanness = 0; - me->delete_line_break_char = NO; + me->delete_line_break_char[0] = NO; +} + +/* Weighted optional line break +** +** We keep track of all the breaks for when we chop the line +*/ + +PRIVATE void allow_break (HTStructured * me, int new_cleanness, BOOL dlbc) +{ + if (dlbc && me->write_pointer == me->buffer) dlbc = NO; + me->line_break[new_cleanness] = + dlbc ? me->write_pointer - 1 /* Point to space */ + : me->write_pointer ; /* point to gap */ + me->delete_line_break_char[new_cleanness] = dlbc; + if (new_cleanness >= me->cleanness && + (me->overflowed || me->line_break[new_cleanness] > me->buffer)) + me->cleanness = new_cleanness; } /* Character handling @@ -83,12 +114,38 @@ PRIVATE void HTMLGen_flush ARGS1( ** file. We give extra "cleanness" to spaces appearing directly ** after periods (full stops), [semi]colons and commas. ** This should make the source files easier to read and modify -** by hand, too, though this is not a primary design consideration. +** by hand, too, though this is not a primary design consideration. TBL */ PRIVATE void HTMLGen_put_character ARGS2( HTStructured *, me, char, c) { + if (me->escape_specials && (unsigned char)c < 32) { + if (c == HT_NON_BREAK_SPACE || c == HT_EM_SPACE || + c == LY_SOFT_HYPHEN) { /* recursion... */ + HTMLGen_put_character(me, '&'); + HTMLGen_put_character(me, '#'); + HTMLGen_put_character(me, 'x'); + switch(c) { + case HT_NON_BREAK_SPACE: /*   */ + HTMLGen_put_character(me, 'A'); + HTMLGen_put_character(me, '0'); + break; + case HT_EM_SPACE: /*   */ + HTMLGen_put_character(me, '2'); + HTMLGen_put_character(me, '0'); + HTMLGen_put_character(me, '0'); + HTMLGen_put_character(me, '3'); + break; + case LY_SOFT_HYPHEN: /* ­ */ + HTMLGen_put_character(me, 'A'); + HTMLGen_put_character(me, 'D'); + break; + } + c = ';'; + } + } + *me->write_pointer++ = c; if (c == '\n') { @@ -96,36 +153,37 @@ PRIVATE void HTMLGen_put_character ARGS2( return; } - if ((!me->preformatted && c == ' ')) { - int new_cleanness = 1; + /* Figure our whether we can break at this point + */ + if ((!me->preformatted && (c == ' ' || c == '\t'))) { + int new_cleanness = 3; if (me->write_pointer > (me->buffer + 1)) { char delims[5]; char * p; strcpy(delims, ",;:."); /* @@ english bias */ p = strchr(delims, me->write_pointer[-2]); - if (p) new_cleanness = p - delims + 2; - } - if (new_cleanness >= me->cleanness) { - me->line_break = me->write_pointer - 1; /* Point to space */ - me->cleanness = new_cleanness; - me->delete_line_break_char = YES; + if (p) new_cleanness = p - delims + 6; + if (!me->in_attrval) new_cleanness += 10; } + allow_break(me, new_cleanness, YES); } /* - * Flush buffer out when full. + * Flush buffer out when full, or whenever the line is over + * the nominal maximum and we can break at all */ - if (me->write_pointer == me->buffer + BUFFER_SIZE) { + if (me->write_pointer >= me->buffer + me->buffer_maxchars || + (me->overflowed && me->cleanness)) { if (me->cleanness) { - char line_break_char = me->line_break[0]; - char * saved = me->line_break; + char line_break_char = me->line_break[me->cleanness][0]; + char * saved = me->line_break[me->cleanness]; - if (me->delete_line_break_char) saved++; - me->line_break[0] = '\n'; + if (me->delete_line_break_char[me->cleanness]) saved++; + me->line_break[me->cleanness][0] = '\n'; (*me->targetClass.put_block)(me->target, me->buffer, - me->line_break - me->buffer + 1); - me->line_break[0] = line_break_char; + me->line_break[me->cleanness] - me->buffer + 1); + me->line_break[me->cleanness][0] = line_break_char; { /* move next line in */ char * p = saved; char *q; @@ -133,16 +191,36 @@ PRIVATE void HTMLGen_put_character ARGS2( *q++ = *p++; } me->cleanness = 0; - me->delete_line_break_char = 0; + /* Now we have to check whether ther are any perfectly good breaks + ** which weren't good enough for the last line but may be + ** good enough for the next + */ + { + int i; + for(i=0; i <= MAX_CLEANNESS; i++) { + if (me->line_break[i] != NULL && + me->line_break[i] > saved) { + me->line_break[i] = me->line_break[i] - + (saved-me->buffer); + me->cleanness = i; + } else { + me->line_break[i] = NULL; + } + } + } + + me->delete_line_break_char[0] = 0; me->write_pointer = me->write_pointer - (saved-me->buffer); + me->overflowed = NO; } else { (*me->targetClass.put_block)(me->target, me->buffer, - BUFFER_SIZE); + me->buffer_maxchars); me->write_pointer = me->buffer; + flush_breaks(me); + me->overflowed = YES; } - me->line_break = me->buffer; } } @@ -187,21 +265,47 @@ PRIVATE void HTMLGen_start_element ARGS5( BOOL was_preformatted = me->preformatted; HTTag * tag = &HTML_dtd.tags[element_number]; - me->preformatted = NO; /* free text within tags */ + me->preformatted = YES; /* free text within tags */ HTMLGen_put_character(me, '<'); HTMLGen_put_string(me, tag->name); if (present) { + BOOL had_attr = NO; for (i = 0; i < tag->number_of_attributes; i++) { if (present[i]) { + had_attr = YES; HTMLGen_put_character(me, ' '); + allow_break(me, 11, YES); HTMLGen_put_string(me, tag->attributes[i].name); if (value[i]) { - HTMLGen_put_string(me, "=\""); - HTMLGen_put_string(me, value[i]); - HTMLGen_put_character(me, '"'); + me->preformatted = was_preformatted; + me->in_attrval = YES; + if (strchr(value[i], '"') == NULL) { + HTMLGen_put_string(me, "=\""); + HTMLGen_put_string(me, value[i]); + HTMLGen_put_character(me, '"'); + } else if (strchr(value[i], '\'') == NULL) { + HTMLGen_put_string(me, "='"); + HTMLGen_put_string(me, value[i]); + HTMLGen_put_character(me, '\''); + } else { /* attribute value has both kinds of quotes */ + CONST char *p; + HTMLGen_put_string(me, "=\""); + for (p = value[i]; *p; p++) { + if (*p != '"') { + HTMLGen_put_character(me, *p); + } else { + HTMLGen_put_string(me, """); + } + } + HTMLGen_put_character(me, '"'); + } + me->preformatted = YES; + me->in_attrval = NO; } } } + if (had_attr) + allow_break(me, 12, NO); } HTMLGen_put_string(me, ">"); /* got rid of \n LJM */ @@ -214,9 +318,10 @@ PRIVATE void HTMLGen_start_element ARGS5( * Can break after element start. */ if (!me->preformatted && tag->contents != SGML_EMPTY) { - me->line_break = me->write_pointer; /* Don't you hate SGML? */ - me->cleanness = 1; - me->delete_line_break_char = NO; + if (HTML_dtd.tags[element_number].contents == SGML_ELEMENT) + allow_break(me, 15, NO); + else + allow_break(me, 2, NO); } } @@ -241,9 +346,10 @@ PRIVATE void HTMLGen_end_element ARGS3( /* * Can break before element end. */ - me->line_break = me->write_pointer; /* Don't you hate SGML? */ - me->cleanness = 1; - me->delete_line_break_char = NO; + if (HTML_dtd.tags[element_number].contents == SGML_ELEMENT) + allow_break(me, 14, NO); + else + allow_break(me, 1, NO); } HTMLGen_put_string(me, "</"); HTMLGen_put_string(me, HTML_dtd.tags[element_number].name); @@ -325,6 +431,10 @@ PRIVATE CONST HTStructuredClass HTMLGeneration = /* As opposed to print etc */ /* Subclass-specific Methods ** ------------------------- */ +extern int LYcols; /* LYCurses.h, set in LYMain.c */ +extern BOOL dump_output_immediately; /* TRUE if no interactive user */ +extern BOOLEAN LYPreparsedSource; /* Show source as preparsed? */ + PUBLIC HTStructured * HTMLGenerator ARGS1( HTStream *, output) { @@ -337,10 +447,41 @@ PUBLIC HTStructured * HTMLGenerator ARGS1( me->targetClass = *me->target->isa; /* Copy pointers to routines for speed*/ me->write_pointer = me->buffer; - me->line_break = me->buffer; + flush_breaks(me); + me->line_break[0] = me->buffer; me->cleanness = 0; - me->delete_line_break_char = NO; + me->overflowed = NO; + me->delete_line_break_char[0] = NO; me->preformatted = NO; + me->in_attrval = NO; + + /* + * For what line length should we attempt to wrap ? - kw + */ + if (!LYPreparsedSource) { + me->buffer_maxchars = 80; /* work as before - kw */ + } else if (dump_output_immediately) { + me->buffer_maxchars = 80; /* work as before - kw */ + } else { + me->buffer_maxchars = LYcols - 2; + if (me->buffer_maxchars < 38) /* too narrow, let GridText deal */ + me->buffer_maxchars = 40; + if (me->buffer_maxchars > 900) /* likely not true - kw */ + me->buffer_maxchars = 78; + if (me->buffer_maxchars > BUFFER_SIZE) /* must not be larger! */ + me->buffer_maxchars = BUFFER_SIZE - 2; + } + + /* + * If dump_output_immediately is set, there likely isn't anything + * after this stream to interpret the Lynx special chars. Also + * if they get displayed via HTPlain, that will probably make + * non-breaking space chars etc. invisible. So let's translate + * them to numerical character references. For debugging + * purposes we'll use the new hex format. + */ + me->escape_specials = LYPreparsedSource; + return me; } @@ -382,8 +523,16 @@ PUBLIC HTStream* HTPlainToHTML ARGS3( */ me->target = sink; me->targetClass = *me->target->isa; + me->write_pointer = me->buffer; + flush_breaks(me); + me->cleanness = 0; + me->overflowed = NO; + me->delete_line_break_char[0] = NO; + me->buffer_maxchars = 80; HTMLGen_put_string(me, "<HTML>\n<BODY>\n<PRE>\n"); me->preformatted = YES; + me->escape_specials = NO; + me->in_attrval = NO; return (HTStream*) me; } diff --git a/WWW/Library/Implementation/HTPlain.c b/WWW/Library/Implementation/HTPlain.c index 58f81e62..8c177625 100644 --- a/WWW/Library/Implementation/HTPlain.c +++ b/WWW/Library/Implementation/HTPlain.c @@ -299,7 +299,9 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) } } - if (me->T.trans_to_uni && unsign_c >= 127) { + if (me->T.trans_to_uni && (unsign_c >= 127 || + (unsign_c < 32 && unsign_c != 0 && + me->T.trans_C0_to_uni))) { unsign_c = UCTransToUni(c_p, me->in_char_set); if (unsign_c > 0) { if (unsign_c < 256) { @@ -395,7 +397,9 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) uck, (char)uck); } HText_appendCharacter(me->text, (char)(uck & 0xff)); - } else if (chk && (uck == -4) && + } else if (chk && (uck == -4 || + (me->T.repl_translated_C0 && + uck > 0 && uck <32)) && /* ** Not found; look for replacement string. */ diff --git a/WWW/Library/Implementation/SGML.c b/WWW/Library/Implementation/SGML.c index 9c607da3..5cd5d871 100644 --- a/WWW/Library/Implementation/SGML.c +++ b/WWW/Library/Implementation/SGML.c @@ -44,7 +44,7 @@ PUBLIC BOOL HTPassEightBitNum = FALSE; /* Pass ^ numeric entities raw. */ PUBLIC BOOL HTPassHighCtrlRaw = FALSE; /* Pass 127-160,173, raw. */ PUBLIC BOOL HTPassHighCtrlNum = FALSE; /* Pass €-Ÿ raw. */ -extern BOOLEAN LYCheckForCSI PARAMS((HTStructured *target, char **url)); +extern BOOLEAN LYCheckForCSI PARAMS((HTParentAnchor *anchor, char **url)); extern void LYDoCSI PARAMS((char *url, CONST char *comment, char **csi)); /* The State (context) of the parser @@ -457,7 +457,7 @@ PRIVATE void handle_comment ARGS1( if (context->csi == NULL && strncmp(s, "!--#", 4) == 0 && - LYCheckForCSI(context->target, (char **)&context->url) == TRUE) { + LYCheckForCSI(context->node_anchor, (char **)&context->url) == TRUE) { LYDoCSI(context->url, s, (char **)&context->csi); } @@ -1072,7 +1072,10 @@ PUBLIC void SGML_character ARGS2( if (context->T.strip_raw_char_in) saved_char_in = c; - if (context->T.trans_to_uni && unsign_c >= 127) { + if (context->T.trans_to_uni && + (unsign_c >= 127 || + (unsign_c < 32 && unsign_c != 0 && + context->T.trans_C0_to_uni))) { clong = UCTransToUni(c, context->in_char_set); if (clong > 0) { saved_char_in = c; @@ -1081,6 +1084,46 @@ PUBLIC void SGML_character ARGS2( } } goto top1; + } else if (unsign_c < 32 && unsign_c != 0 && + context->T.trans_C0_to_uni) { + /* This else if may be too ugly to keep... - kw */ + if (context->T.trans_from_uni && + (((clong = UCTransToUni(c, context->in_char_set)) >= 32) || + (context->T.transp && + (clong = UCTransToUni(c, context->in_char_set)) > 0))) { + saved_char_in = c; + if (clong < 256) { + c = (char)clong; + } + goto top1; + } else { + uck = -1; + if (context->T.transp) { + uck = UCTransCharStr(replace_buf, 60, c, + context->in_char_set, + context->in_char_set, NO); + } + if (!context->T.transp || uck < 0) { + uck = UCTransCharStr(replace_buf, 60, c, + context->in_char_set, + context->html_char_set, YES); + } + if (uck == 0) { + return; + } else if (uck < 0) { + goto top0a; + } + c = replace_buf[0]; + if (c && replace_buf[1]) { + if (context->state == S_text) { + for (p=replace_buf; *p; p++) + PUTC(*p); + return; + } + StrAllocCat(context->recover, replace_buf + 1); + } + goto top0a; + } /* next line end of ugly stuff for C0 - kw */ } else { goto top0a; } @@ -1221,7 +1264,9 @@ top1: } c = (char)(uck & 0xff); PUTC(c); - } else if (chk && (uck == -4) && + } else if (chk && ((uck == -4 || + (context->T.repl_translated_C0 && + uck > 0 && uck <32))) && /* ** Not found; look for replacement string. */ @@ -1443,13 +1488,16 @@ top1: if ((context->isHex ? sscanf(string->data, "%x", &value) : sscanf(string->data, "%d", &value)) == 1) { #ifdef EXP_CHARTRANS - if (value == 160) { + if (value == 160 || value == 173) { /* ** We *always* should interpret this as Latin1 here! ** Output the Lynx special character for nbsp and ** then recycle the terminator or break. - FM */ - PUTC(1); + if (value == 160) + PUTC(1); + else /* 173 */ + PUTC(7); string->size = 0; context->isHex = FALSE; context->state = S_text; @@ -1474,7 +1522,9 @@ top1: else { PUTC(FROMASCII((char)uck)); } - } else if ((uck == -4) && + } else if ((uck == -4 || + (context->T.repl_translated_C0 && + uck > 0 && uck <32)) && /* ** Not found; look for replacement string. */ diff --git a/WWW/Library/Implementation/UCAux.h b/WWW/Library/Implementation/UCAux.h index fd4b4618..ac7a5439 100644 --- a/WWW/Library/Implementation/UCAux.h +++ b/WWW/Library/Implementation/UCAux.h @@ -16,6 +16,8 @@ struct _UCTransParams BOOL pass_160_173_raw; BOOL do_8bitraw; BOOL trans_to_uni; + BOOL trans_C0_to_uni; + BOOL repl_translated_C0; BOOL trans_from_uni; }; typedef struct _UCTransParams UCTransParams; diff --git a/WWW/Library/Implementation/UCDefs.h b/WWW/Library/Implementation/UCDefs.h index a43088b7..53d56439 100644 --- a/WWW/Library/Implementation/UCDefs.h +++ b/WWW/Library/Implementation/UCDefs.h @@ -16,8 +16,8 @@ typedef struct _LYUCcharset { #define UCT_ENC_7BIT 0 #define UCT_ENC_8BIT 1 -#define UCT_ENC_8859 2 /* ??? */ -#define UCT_ENC_2022 3 +#define UCT_ENC_8859 2 +#define UCT_ENC_8BIT_C0 3 /* 8-bit some chars in C0 control area */ #define UCT_ENC_MAYBE2022 4 #define UCT_ENC_CJK 5 #define UCT_ENC_16BIT 6 diff --git a/WWW/Library/Implementation/tcp.h b/WWW/Library/Implementation/tcp.h index 0533c9c1..e94bc0eb 100644 --- a/WWW/Library/Implementation/tcp.h +++ b/WWW/Library/Implementation/tcp.h @@ -291,8 +291,8 @@ extern int bzero(); extern int multinet_connect(); extern int multinet_gethostname(); extern int multinet_getsockname(); -extern unsigned short multinet_htons(); -extern unsigned short multinet_ntohs(); +extern unsigned short multinet_htons(unsigned short __val); +extern unsigned short multinet_ntohs(unsigned short __val); extern int multinet_listen(); extern int multinet_select(); extern int multinet_socket(); |