diff options
Diffstat (limited to 'WWW')
22 files changed, 853 insertions, 215 deletions
diff --git a/WWW/Library/Implementation/HTAnchor.c b/WWW/Library/Implementation/HTAnchor.c index e2dfa964..346f595c 100644 --- a/WWW/Library/Implementation/HTAnchor.c +++ b/WWW/Library/Implementation/HTAnchor.c @@ -441,7 +441,25 @@ PUBLIC HTAnchor * HTAnchor_findAddress ARGS1( return (HTAnchor *)foundAnchor; } } - +/* Create new or find old named anchor - simple form +** ------------------------------------------------- +** +** Like the previous one, but simpler to use for simple cases. +** No post data etc. can be supplied. - kw +*/ +PUBLIC HTAnchor * HTAnchor_findSimpleAddress ARGS1( + CONST char *, url) +{ + DocAddress urldoc; + + urldoc.address = (char *)url; /* ignore warning, it IS treated like const - kw */ + urldoc.post_data = NULL; + urldoc.post_content_type = NULL; + urldoc.bookmark = NULL; + urldoc.isHEAD = FALSE; + urldoc.safe = FALSE; + return HTAnchor_findAddress(&urldoc); +} /* Delete an anchor and possibly related things (auto garbage collection) ** -------------------------------------------- diff --git a/WWW/Library/Implementation/HTAnchor.h b/WWW/Library/Implementation/HTAnchor.h index ee0dcfef..de49d162 100644 --- a/WWW/Library/Implementation/HTAnchor.h +++ b/WWW/Library/Implementation/HTAnchor.h @@ -168,6 +168,15 @@ extern HTChildAnchor * HTAnchor_findChildAndLink PARAMS(( extern HTAnchor * HTAnchor_findAddress PARAMS(( CONST DocAddress * address)); +/* Create new or find old named anchor - simple form +** ------------------------------------------------- +** +** Like the previous one, but simpler to use for simple cases. +** No post data etc. can be supplied. - kw +*/ +extern HTAnchor * HTAnchor_findSimpleAddress PARAMS(( + CONST char * url)); + /* Delete an anchor and possibly related things (auto garbage collection) ** -------------------------------------------- ** diff --git a/WWW/Library/Implementation/HTChunk.c b/WWW/Library/Implementation/HTChunk.c index 0149dfff..6b8c73f1 100644 --- a/WWW/Library/Implementation/HTChunk.c +++ b/WWW/Library/Implementation/HTChunk.c @@ -37,6 +37,8 @@ PUBLIC HTChunk * HTChunkCreate2 ARGS2 (int,grow, size_t, needed) if (needed > 0) { ch->allocated = needed-1 - ((needed-1) % ch->growby) + ch->growby; /* Round up */ + CTRACE((tfp, "HTChunkCreate2: requested %d, allocate %d\n", + needed, ch->allocated)); ch->data = (char *)calloc(1, ch->allocated); if (!ch->data) outofmem(__FILE__, "HTChunkCreate2 data"); diff --git a/WWW/Library/Implementation/HTFTP.c b/WWW/Library/Implementation/HTFTP.c index 2c7593ce..d2a41a12 100644 --- a/WWW/Library/Implementation/HTFTP.c +++ b/WWW/Library/Implementation/HTFTP.c @@ -2757,7 +2757,7 @@ unload_btree: } if (entry_info->type) { - for (i = 0; entry_info->type[i] != '\0' && i < 15; i++) + for (i = 0; entry_info->type[i] != '\0' && i < 16; i++) PUTC(entry_info->type[i]); for (; i < 17; i++) PUTC(' '); @@ -3565,7 +3565,7 @@ listen: /* Reset buffering to control connection DD 921208 */ if (rv < 0) { -#if 0 +#if 0 /* any known servers where ABOR would work this way? */ if (rv == HT_INTERRUPTED || rv == -501) if (send_cmd_nowait("ABOR") == 1) { outstanding++; @@ -3586,7 +3586,6 @@ listen: if (status < 0 && rv != HT_INTERRUPTED && rv != -1) { (void) HTInetStatus("close"); /* Comment only */ - data_soc = -1; /* invalidate it */ } else { if (rv != HT_LOADED && outstanding--) { status = response(NIL); /* Pick up final reply */ diff --git a/WWW/Library/Implementation/HTFile.c b/WWW/Library/Implementation/HTFile.c index 918c914e..cd8bcdab 100644 --- a/WWW/Library/Implementation/HTFile.c +++ b/WWW/Library/Implementation/HTFile.c @@ -607,7 +607,7 @@ PUBLIC char * HTURLPath_toFile ARGS2( StrAllocCopy(path, name); if (expand_all) - HTUnEscape(path); /* Interpret all % signs */ + HTUnEscape(path); /* Interpret all % signs */ else HTUnEscapeSome(path, "/"); /* Interpret % signs for path delims */ @@ -886,6 +886,10 @@ PUBLIC HTFormat HTFileFormat ARGS3( if (suff->rep) { if (pdesc && !(*pdesc)) *pdesc = suff->desc; + if (pencoding && IsUnityEnc(*pencoding) && + *pencoding != WWW_ENC_7BIT && + !IsUnityEnc(suff->encoding)) + *pencoding = suff->encoding; #ifdef VMS if (semicolon != NULL) *semicolon = ';'; @@ -1087,6 +1091,85 @@ PUBLIC HTFormat HTCharsetFormat ARGS3( return format; } + + +/* Get various pieces of meta info from file name. +** ----------------------------------------------- +** +** LYGetFileInfo fills in information that can be determined without +** an actual (new) access to the filesystem, based on current suffix +** and character set configuration. If the file has been loaded and +** parsed before (with the same URL generated here!) and the anchor +** is still around, some results may be influenced by that (in +** particular, charset info from a META tag - this is not actually +** tested!). +** The caller should not keep pointers to the returned objects around +** for too long, the valid lifetimes vary. In particular, the returned +** charset string should be copied if necessary. If return of the +** file_anchor is requested, that one can be used to retrieve +** additional bits of info that are stored in the anchor object and +** are not covered here; as usual, don't keep pointers to the +** file_anchor longer than necessary since the object may disappear +** through HTuncache_current_document or at the next document load. +** - kw +*/ +PUBLIC void LYGetFileInfo ARGS7( + CONST char *, filename, + HTParentAnchor **, pfile_anchor, + HTFormat *, pformat, + HTAtom **, pencoding, + CONST char**, pdesc, + CONST char**, pcharset, + int *, pfile_cs) +{ + char *Afn; + char *Aname = NULL; + HTFormat format; + HTAtom * myEnc = NULL; + HTParentAnchor *file_anchor; + CONST char *file_csname = file_anchor->charset; + int file_cs; + + /* + * Convert filename to URL. Note that it is always supposed to + * be a filename, not maybe-filename-maybe-URL, so we don't + * use LYFillLocalFileURL and LYEnsureAbsoluteURL. - kw + */ + Afn = HTEscape(filename, URL_PATH); + LYLocalFileToURL(&Aname, Afn); + file_anchor = HTAnchor_parent(HTAnchor_findSimpleAddress(Aname)); + + file_csname = file_anchor->charset; + format = HTFileFormat(filename, &myEnc, pdesc); + format = HTCharsetFormat(format, file_anchor, UCLYhndl_HTFile_for_unspec); + file_cs = HTAnchor_getUCLYhndl(file_anchor, UCT_STAGE_MIME); + if (!file_csname) { + if (file_cs >= 0) + file_csname = LYCharSet_UC[file_cs].MIMEname; + else file_csname = "display character set"; + } + CTRACE((tfp, "GetFileInfo: '%s' is a%s %s %s file, charset=%s (%d).\n", + filename, + ((myEnc && *HTAtom_name(myEnc) == '8') ? "n" : myEnc ? "" : + *HTAtom_name(format) == 'a' ? "n" : ""), + myEnc ? HTAtom_name(myEnc) : "", + HTAtom_name(format), + file_csname, + file_cs)); + FREE(Afn); + FREE(Aname); + if (pfile_anchor) + *pfile_anchor = file_anchor; + if (pformat) + *pformat = format; + if (pencoding) + *pencoding = myEnc; + if (pcharset) + *pcharset = file_csname; + if (pfile_cs) + *pfile_cs = file_cs; + } + /* Determine value from file name. ** ------------------------------- ** diff --git a/WWW/Library/Implementation/HTFile.h b/WWW/Library/Implementation/HTFile.h index 9e03b790..d2207645 100644 --- a/WWW/Library/Implementation/HTFile.h +++ b/WWW/Library/Implementation/HTFile.h @@ -158,6 +158,35 @@ extern HTFormat HTCharsetFormat PARAMS(( HTParentAnchor * anchor, int default_LYhndl)); +/* Get various pieces of meta info from file name. +** ----------------------------------------------- +** +** LYGetFileInfo fills in information that can be determined without +** an actual (new) access to the filesystem, based on current suffix +** and character set configuration. If the file has been loaded and +** parsed before (with the same URL generated here!) and the anchor +** is still around, some results may be influenced by that (in +** particular, charset info from a META tag - this is not actually +** tested!). +** The caller should not keep pointers to the returned objects around +** for too long, the valid lifetimes vary. In particular, the returned +** charset string should be copied if necessary. If return of the +** file_anchor is requested, that one can be used to retrieve +** additional bits of info that are stored in the anchor object and +** are not covered here; as usual, don't keep pointers to the +** file_anchor longer than necessary since the object may disappear +** through HTuncache_current_document or at the next document load. +** - kw +*/ +extern void LYGetFileInfo PARAMS(( + CONST char * filename, + HTParentAnchor ** pfile_anchor, + HTFormat * pformat, + HTAtom ** pencoding, + CONST char** pdesc, + CONST char** pcharset, + int * pfile_cs)); + /* ** Determine file value from file name. */ diff --git a/WWW/Library/Implementation/HTFormat.c b/WWW/Library/Implementation/HTFormat.c index 5b521c87..868d5d51 100644 --- a/WWW/Library/Implementation/HTFormat.c +++ b/WWW/Library/Implementation/HTFormat.c @@ -275,6 +275,8 @@ PRIVATE int half_match ARGS2(char *,trial_type, char *,target) return 0; } +#define WWW_WILDCARD_REP_OUT HTAtom_for("*") + /* Look up a presentation ** ---------------------- ** @@ -291,7 +293,7 @@ PRIVATE HTPresentation * HTFindPresentation ARGS3( HTFormat, rep_out, HTPresentation*, fill_in) { - HTAtom * wildcard = HTAtom_for("*"); + HTAtom * wildcard = NULL; /* = HTAtom_for("*"); lookup when needed - kw */ CTRACE((tfp, "HTFormat: Looking up presentation for %s to %s\n", HTAtom_name(rep_in), HTAtom_name(rep_out))); @@ -318,12 +320,15 @@ PRIVATE HTPresentation * HTFindPresentation ARGS3( } else if (!fill_in) { continue; - } else if (pres->rep_out == wildcard) { - if (!strong_wildcard_match) - strong_wildcard_match = pres; - /* otherwise use the first one */ - CTRACE((tfp, "StreamStack: found strong wildcard match: %s\n", - HTAtom_name(pres->rep))); + } else { + if (!wildcard) wildcard = WWW_WILDCARD_REP_OUT; + if (pres->rep_out == wildcard) { + if (!strong_wildcard_match) + strong_wildcard_match = pres; + /* otherwise use the first one */ + CTRACE((tfp, "StreamStack: found strong wildcard match: %s\n", + HTAtom_name(pres->rep))); + } } } else if (!fill_in) { @@ -347,9 +352,10 @@ PRIVATE HTPresentation * HTFindPresentation ARGS3( /* otherwise use the first one */ CTRACE((tfp, "StreamStack: found weak wildcard match: %s\n", HTAtom_name(pres->rep_out))); - } - if (pres->rep_out == wildcard) { - if (!last_default_match) + + } else if (!last_default_match) { + if (!wildcard) wildcard = WWW_WILDCARD_REP_OUT; + if (pres->rep_out == wildcard) last_default_match = pres; /* otherwise use the first one */ } @@ -390,6 +396,7 @@ PUBLIC HTStream * HTStreamStack ARGS4( { HTPresentation temp; HTPresentation *match; + HTStream *result; CTRACE((tfp, "HTFormat: Constructing stream stack for %s to %s\n", HTAtom_name(rep_in), HTAtom_name(rep_out))); @@ -402,20 +409,31 @@ PUBLIC HTStream * HTStreamStack ARGS4( return sink; /* LJM */ #endif - if (rep_out == rep_in) - return sink; + if (rep_out == rep_in) { + result = sink; - if ((match = HTFindPresentation(rep_in, rep_out, &temp))) { + } else if ((match = HTFindPresentation(rep_in, rep_out, &temp))) { if (match == &temp) { CTRACE((tfp, "StreamStack: Using %s\n", HTAtom_name(temp.rep_out))); } else { CTRACE((tfp, "StreamStack: found exact match: %s\n", HTAtom_name(match->rep))); } - return (*match->converter)(match, anchor, sink); + result = (*match->converter)(match, anchor, sink); } else { - return NULL; + result = NULL; + } + if (TRACE) { + if (result && result->isa && result->isa->name) { + CTRACE((tfp, "StreamStack: Returning \"%s\"\n", result->isa->name)); + } else if (result) { + CTRACE((tfp, "StreamStack: Returning *unknown* stream!\n")); + } else { + CTRACE((tfp, "StreamStack: Returning NULL!\n")); + CTRACE_FLUSH(tfp); /* a crash may be imminent... - kw */ + } } + return result; } /* Put a presentation near start of list @@ -448,7 +466,7 @@ PUBLIC float HTStackValue ARGS4( float, initial_value, long int, length) { - HTAtom * wildcard = HTAtom_for("*"); + HTAtom * wildcard = WWW_WILDCARD_REP_OUT; CTRACE((tfp, "HTFormat: Evaluating stream stack for %s worth %.3f to %s\n", HTAtom_name(rep_in), initial_value, HTAtom_name(rep_out))); diff --git a/WWW/Library/Implementation/HTMLDTD.c b/WWW/Library/Implementation/HTMLDTD.c index 66397291..7f8c37ed 100644 --- a/WWW/Library/Implementation/HTMLDTD.c +++ b/WWW/Library/Implementation/HTMLDTD.c @@ -721,7 +721,7 @@ static attr map_attr[] = { /* MAP attributes */ { "DIR" T(N) }, { "ID" T(i) }, { "LANG" T(N) }, - { "NAME" T(N) }, + { "NAME" T(i) }, { "STYLE" T(N) }, { "TITLE" T(N) }, { 0 T(N) } /* Terminate list */ @@ -1117,7 +1117,7 @@ static attr ulist_attr[] = { /* UL attributes */ /* 1 2 3 4 5 6 7 8 */ /*345678901234567890123456789012345678901234567890123456789012345678901234567890 */ -/* self contain icont'n contn'd icont'd canclos omit */ +/* self contain icont'n contn'd icont'd canclos flags*/ /* { "A" , a_attr, HTML_A_ATTRIBUTES, SGML_MIXED }, */ #define T_A 0x0008, 0x0B007,0x0FF17,0x37787,0x77BA7,0x8604F,0x00014 /* { "ABBREV" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED }, */ @@ -1160,7 +1160,7 @@ static attr ulist_attr[] = { /* UL attributes */ #define T_BQ 0x0200, 0xAFBCF,0xAFFFF,0xB6680,0xB6FAF,0x8031F,0x00000 /* { "BR" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_EMPTY }, */ #define T_BR 0x1000, 0x00000,0x00000,0x377BF,0x77FBF,0x8101F,0x00001 -#define T_BUTTON 0x0200, 0x0BB0B,0x0FF3B,0x0378F,0x37FAF,0x8035F,0x00000 +#define T_BUTTON 0x2000, 0x0BB07,0x0FF37,0x0378F,0x37FBF,0x8135F,0x00000 /* { "CAPTION" , caption_attr, HTML_CAPTION_ATTRIBUTES, SGML_MIXED }, */ #define T_CAPTION 0x0100, 0x0B04F,0x8FFFF,0x06A00,0xB6FA7,0x8035F,0x00000 /* { "CENTER" , div_attr, HTML_DIV_ATTRIBUTES, SGML_MIXED }, */ @@ -1180,7 +1180,7 @@ static attr ulist_attr[] = { /* UL attributes */ /* { "DD" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_EMPTY }, */ #define T_DD 0x0400, 0x0FBCF,0x8FFFF,0x00800,0xB6FFF,0x8071F,0x00001 /* { "DEL" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED }, */ -#define T_DEL 0x0002, 0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00003,0x00000 +#define T_DEL 0x0002, 0x8BBCF,0x8FFFF,0xA7F8F,0xF7FBF,0x00003,0x00000 /* { "DFN" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED }, */ #define T_DFN 0x0002, 0x8B0CF,0x8FFFF,0x8778F,0xF7FBF,0x00003,0x00000 /* { "DIR" , ulist_attr, HTML_UL_ATTRIBUTES, SGML_MIXED }, */ @@ -1198,7 +1198,7 @@ static attr ulist_attr[] = { /* UL attributes */ /* { "EMBED" , embed_attr, HTML_EMBED_ATTRIBUTES, SGML_EMPTY }, */ #define T_EMBED 0x2000, 0x8F107,0x8FFF7,0xB6FBF,0xB7FBF,0x1FF7F,0x00001 /* { "FIELDSET", fieldset_attr,HTML_FIELDSET_ATTRIBUTES, SGML_MIXED }, */ -#define T_FIELDSET 0x0200, 0x0FB42,0x0FF5F,0x07787,0x37FF7,0x8805F,0x00000 +#define T_FIELDSET 0x0200, 0x8FB4F,0x8FF7F,0x86787,0xB7FF7,0x8805F,0x00000 /* { "FIG" , fig_attr, HTML_FIG_ATTRIBUTES, SGML_MIXED }, */ #define T_FIG 0x0200, 0x0FB00,0x8FFFF,0x36680,0xB6FBF,0x8834F,0x00000 /* { "FN" , fn_attr, HTML_FN_ATTRIBUTES, SGML_MIXED }, */ @@ -1206,7 +1206,7 @@ static attr ulist_attr[] = { /* UL attributes */ /* { "FONT" , font_attr, HTML_FONT_ATTRIBUTES, SGML_EMPTY }, */ #define T_FONT 0x0001, 0x8B04F,0x8FFFF,0xB778F,0xF7FBF,0x00001,0x00014 /* { "FORM" , form_attr, HTML_FORM_ATTRIBUTES, SGML_EMPTY }, */ -#define T_FORM 0x0080, 0x0FF6F,0x0FF7F,0x36E07,0x33F07,0x88DFF,0x00000 +#define T_FORM 0x0080, 0x0FF6F,0x0FF7F,0x36E07,0x32F07,0x88DFF,0x00000 /* { "FRAME" , frame_attr, HTML_FRAME_ATTRIBUTES, SGML_EMPTY }, */ #define T_FRAME 0x10000,0x00000,0x00000,0x10000,0x10000,0x9FFFF,0x00001 /* { "FRAMESET", frameset_attr,HTML_FRAMESET_ATTRIBUTES, SGML_MIXED }, */ @@ -1224,7 +1224,7 @@ static attr ulist_attr[] = { /* UL attributes */ /* { "H6" , h_attr, HTML_H_ATTRIBUTES, SGML_MIXED }, */ #define T_H6 0x0100, 0x0B04F,0x0B05F,0x36680,0x37FAF,0x80117,0x00000 /* { "HEAD" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED }, */ -#define T_HEAD 0x40000,0x4F000,0x47000,0x10000,0x10000,0x9FF7F,0x00006 +#define T_HEAD 0x40000,0x4F000,0x47000,0x10000,0x10000,0x9FF7F,0x00007 /* { "HR" , hr_attr, HTML_HR_ATTRIBUTES, SGML_EMPTY }, */ #define T_HR 0x4000, 0x00000,0x00000,0x3FE80,0x3FFBF,0x87F37,0x00001 /* { "HTML" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED }, */ @@ -1238,7 +1238,7 @@ static attr ulist_attr[] = { /* UL attributes */ /* { "INPUT" , input_attr, HTML_INPUT_ATTRIBUTES, SGML_EMPTY }, */ #define T_INPUT 0x0040, 0x00000,0x00000,0x03F87,0x37F87,0x8904F,0x00001 /* { "INS" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED }, */ -#define T_INS 0x0002, 0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00003,0x00000 +#define T_INS 0x0002, 0x8BBCF,0x8FFFF,0xA7F8F,0xF7FBF,0x00003,0x00000 /* { "ISINDEX" , isindex_attr, HTML_ISINDEX_ATTRIBUTES,SGML_EMPTY }, */ #define T_ISINDEX 0x8000, 0x00000,0x00000,0x7778F,0x7FFAF,0x80007,0x00001 /* { "KBD" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED }, */ @@ -1246,8 +1246,8 @@ static attr ulist_attr[] = { /* UL attributes */ /* { "KEYGEN" , keygen_attr, HTML_KEYGEN_ATTRIBUTES, SGML_EMPTY }, */ #define T_KEYGEN 0x0040, 0x00000,0x00000,0x07FB7,0x37FB7,0x80070,0x00001 /* { "LABEL" , label_attr, HTML_LABEL_ATTRIBUTES, SGML_MIXED }, */ -#define T_LABEL 0x0020, 0x9FFFF,0x9FFFF,0x9FFFF,0x9FFFF,0x00007,0x00000 -#define T_LEGEND 0x0002, 0x0B04F,0x0FF7F,0x00200,0x37FA7,0x00003,0x00000 +#define T_LABEL 0x0002, 0x0304F,0x0FFFF,0x0679F,0x36FBF,0x00007,0x00000 +#define T_LEGEND 0x0002, 0x0B04F,0x8FF7F,0x00200,0xB7FA7,0x00003,0x00000 /* { "LH" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_EMPTY }, */ #define T_LH 0x0400, 0x0BB7F,0x8FFFF,0x00800,0x97FFF,0x8071F,0x00001 /* { "LI" , list_attr, HTML_LI_ATTRIBUTES, SGML_EMPTY }, */ @@ -1273,7 +1273,8 @@ static attr ulist_attr[] = { /* UL attributes */ /* { "NOTE" , note_attr, HTML_NOTE_ATTRIBUTES, SGML_MIXED }, */ #define T_NOTE 0x0200, 0x0BBAF,0x8FFFF,0x376B0,0xB7FFF,0x8031F,0x00000 /* { "OBJECT" , object_attr, HTML_OBJECT_ATTRIBUTES, SGML_LITTERAL }, */ -#define T_OBJECT 0x2000, 0x8FBCF,0x8FFFF,0xB679F,0xB6FBF,0x83F5F,0x00000 +#define T_OBJECT 0x2000, 0x8FBCF,0x8FFFF,0xB679F,0xB6FBF,0x83F5F,0x00020 +#define T_OBJECT_PCDATA 0x2000, 0x8FBCF,0x8FFFF,0xB679F,0xB6FBF,0x83F5F,0x00008 /* { "OL" , olist_attr, HTML_OL_ATTRIBUTES, SGML_MIXED }, */ #define T_OL 0x0800, 0x0C400,0x8FFFF,0x37680,0xB7FB7,0x88F7F,0x00000 /* { "OPTION" , option_attr, HTML_OPTION_ATTRIBUTES, SGML_EMPTY }, */ @@ -1297,7 +1298,7 @@ static attr ulist_attr[] = { /* UL attributes */ /* { "SCRIPT" , script_attr, HTML_SCRIPT_ATTRIBUTES, SGML_LITTERAL }, */ #define T_SCRIPT 0x2000, 0x00000,0x00000,0x77F9F,0x77FFF,0x87F5F,0x00000 /* { "SELECT" , select_attr, HTML_SELECT_ATTRIBUTES, SGML_MIXED }, */ -#define T_SELECT 0x0040, 0x08000,0x08000,0x03FAF,0x13FBF,0x80F5F,0x00008 +#define T_SELECT 0x0040, 0x08000,0x08000,0x03FAF,0x33FBF,0x80F5F,0x00008 #define T_SHY 0x1000, 0x00000,0x00000,0x3779F,0x77FBF,0x8101F,0x00001 /* { "SMALL" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED }, */ #define T_SMALL 0x0001, 0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00001,0x00014 @@ -1334,7 +1335,7 @@ static attr ulist_attr[] = { /* UL attributes */ /* { "THEAD" , tr_attr, HTML_TR_ATTRIBUTES, SGML_EMPTY }, */ #define T_THEAD 0x0020, 0x00020,0x8FFFF,0x00800,0xB7FB7,0x8CF5F,0x00001 /* { "TITLE", gen_attr, HTML_GEN_ATTRIBUTES, SGML_RCDATA }, */ -#define T_TITLE 0x40000,0x00000,0x00000,0x50000,0x50000,0x0031F,0x00004 +#define T_TITLE 0x40000,0x00000,0x00000,0x50000,0x50000,0x0031F,0x0000C /* { "TR" , tr_attr, HTML_TR_ATTRIBUTES, SGML_EMPTY }, */ #define T_TR 0x0020, 0x00400,0x8FFFF,0x00820,0xB7FB7,0x8C75F,0x00001 /* { "TT" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED }, */ @@ -1357,10 +1358,10 @@ static attr ulist_attr[] = { /* UL attributes */ ** Must match definitions in HTMLDTD.html! ** Must be in alphabetical order. ** -** The T_* extra info is listed here, but it won't matter (is not used -** in SGML.c if Old_DTD is not set). This mainly simplifies comparison -** of the tags_old[] table (otherwise unchanged from original Lynx treatment) -** with the tags_new[] table below. - kw +** The T_* extra info is listed here, even though most fields are not used +** in SGML.c if Old_DTD is set (with the exception of some Tgf_* flags). +** This simplifies comparison of the tags_old[] table (otherwise unchanged +** from original Lynx treatment) with the tags_new[] table below. - kw ** ** Name*, Attributes, No. of attributes, content, extra info... */ @@ -1577,7 +1578,7 @@ static CONST HTTag tags_new[HTML_ELEMENTS] = { { P("LISTING") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_LITTERAL,T_LISTING}, { P("MAP") , map_attr, HTML_MAP_ATTRIBUTES, SGML_ELEMENT,T_MAP}, { P("MARQUEE") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_MARQUEE}, - { P("MATH") , math_attr, HTML_MATH_ATTRIBUTES, SGML_LITTERAL,T_MATH}, + { P("MATH") , math_attr, HTML_MATH_ATTRIBUTES, SGML_PCDATA,T_MATH}, { P("MENU") , ulist_attr, HTML_UL_ATTRIBUTES, SGML_MIXED,T_MENU}, { P("META") , meta_attr, HTML_META_ATTRIBUTES, SGML_EMPTY,T_META}, { P("NEXTID") , nextid_attr, 1, SGML_EMPTY,T_NEXTID}, @@ -1594,7 +1595,7 @@ static CONST HTTag tags_new[HTML_ELEMENTS] = { { P("Q") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_Q}, { P("S") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_S}, { P0("SAMP") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_SAMP}, - { P("SCRIPT") , script_attr, HTML_SCRIPT_ATTRIBUTES, SGML_LITTERAL,T_SCRIPT}, + { P("SCRIPT") , script_attr, HTML_SCRIPT_ATTRIBUTES, SGML_CDATA,T_SCRIPT}, { P("SELECT") , select_attr, HTML_SELECT_ATTRIBUTES, SGML_ELEMENT,T_SELECT}, { P("SHY") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_EMPTY,T_SHY}, { P("SMALL") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_SMALL}, @@ -1602,14 +1603,14 @@ static CONST HTTag tags_new[HTML_ELEMENTS] = { { P("SPOT") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_EMPTY,T_SPOT}, { P("STRIKE") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_STRIKE}, { P("STRONG") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_STRONG}, - { P("STYLE") , style_attr, HTML_STYLE_ATTRIBUTES, SGML_LITTERAL,T_STYLE}, + { P("STYLE") , style_attr, HTML_STYLE_ATTRIBUTES, SGML_CDATA,T_STYLE}, { P("SUB") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_SUB}, { P("SUP") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED,T_SUP}, { P("TAB") , tab_attr, HTML_TAB_ATTRIBUTES, SGML_EMPTY,T_TAB}, { P("TABLE") , table_attr, HTML_TABLE_ATTRIBUTES, SGML_ELEMENT,T_TABLE}, { P("TBODY") , tr_attr, HTML_TR_ATTRIBUTES, SGML_ELEMENT,T_TBODY}, { P("TD") , td_attr, HTML_TD_ATTRIBUTES, SGML_MIXED,T_TD}, - { P("TEXTAREA"), textarea_attr,HTML_TEXTAREA_ATTRIBUTES, SGML_LITTERAL,T_TEXTAREA}, + { P("TEXTAREA"), textarea_attr,HTML_TEXTAREA_ATTRIBUTES, SGML_PCDATA,T_TEXTAREA}, { P("TEXTFLOW"), bodytext_attr,HTML_BODYTEXT_ATTRIBUTES, SGML_MIXED,T_TEXTFLOW}, { P("TFOOT") , tr_attr, HTML_TR_ATTRIBUTES, SGML_ELEMENT,T_TFOOT}, { P("TH") , td_attr, HTML_TD_ATTRIBUTES, SGML_MIXED,T_TH}, @@ -1624,6 +1625,11 @@ static CONST HTTag tags_new[HTML_ELEMENTS] = { { P0("XMP") , gen_attr, HTML_GEN_ATTRIBUTES, SGML_LITTERAL,T_XMP}, }; +/* This one will be used as a temporary substitute within the parser when + it has been signalled to parse OBJECT content (again) as MIXED. - kw */ +PUBLIC HTTag HTTag_mixedObject = + { P("OBJECT") , object_attr, HTML_OBJECT_ATTRIBUTES, SGML_MIXED,T_OBJECT_PCDATA}; + #undef P #undef P0 #undef P_ @@ -1665,6 +1671,7 @@ PUBLIC HTTag HTTag_unrecognized = { NULL_HTTag, NULL, 0, SGML_EMPTY,T__UNREC_}; + /* ** Utility Routine: Useful for people building HTML objects. */ @@ -1705,6 +1712,37 @@ PUBLIC void HTStartAnchor ARGS3( (*obj->isa->start_element)(obj, HTML_A, present, value, -1, 0); } + +PUBLIC void HTStartAnchor5 ARGS5( + HTStructured *, obj, + CONST char *, name, + CONST char *, href, + CONST char *, linktype, + int, tag_charset) +{ + BOOL present[HTML_A_ATTRIBUTES]; + CONST char * value[HTML_A_ATTRIBUTES]; + int i; + + for (i = 0; i < HTML_A_ATTRIBUTES; i++) + present[i] = NO; + + if (name && *name) { + present[HTML_A_NAME] = YES; + value[HTML_A_NAME] = name; + } + if (href) { + present[HTML_A_HREF] = YES; + value[HTML_A_HREF] = href; + } + if (linktype) { + present[HTML_A_TYPE] = YES; + value[HTML_A_TYPE] = linktype; + } + + (*obj->isa->start_element)(obj, HTML_A, present, value, tag_charset, 0); +} + PUBLIC void HTStartIsIndex ARGS3( HTStructured *, obj, CONST char *, prompt, diff --git a/WWW/Library/Implementation/HTMLDTD.h b/WWW/Library/Implementation/HTMLDTD.h index 8b1cd4fb..9c81774a 100644 --- a/WWW/Library/Implementation/HTMLDTD.h +++ b/WWW/Library/Implementation/HTMLDTD.h @@ -28,6 +28,19 @@ #endif /* !LY_SOFT_HYPHEN */ /* +** Valid mane chars for tag parsing. +*/ +#define IsNmStart(c) (isalpha((unsigned char)c)) +#define IsNmChar(c) (isalnum((unsigned char)c) || \ + c == '_' || c=='-' || c == '.' || c==':') + + +#define ReallyEmptyTagNum(e) ((HTML_dtd.tags[e].contents == SGML_EMPTY) && \ + !(HTML_dtd.tags[e].flags & Tgf_nreie)) +#define ReallyEmptyTag(t) ((t->contents == SGML_EMPTY) && \ + !(t->flags & Tgf_nreie)) + +/* Element Numbers @@ -972,6 +985,7 @@ extern void HTSwitchDTD PARAMS(( BOOL new)); extern HTTag HTTag_unrecognized; +extern HTTag HTTag_mixedObject; /* @@ -993,6 +1007,13 @@ extern void HTStartAnchor PARAMS(( CONST char * name, CONST char * href)); +extern void HTStartAnchor5 PARAMS(( + HTStructured * targetstream, + CONST char * name, + CONST char * href, + CONST char * linktype, + int tag_charset)); + /* Start IsIndex element - FM diff --git a/WWW/Library/Implementation/HTMLGen.c b/WWW/Library/Implementation/HTMLGen.c index 6af8a344..32e83dde 100644 --- a/WWW/Library/Implementation/HTMLGen.c +++ b/WWW/Library/Implementation/HTMLGen.c @@ -83,7 +83,7 @@ struct _HTStructured { */ PRIVATE void flush_breaks ARGS1( - HTStructured *, me) + HTStructured *, me) { int i; for (i=0; i<= MAX_CLEANNESS; i++) { @@ -92,7 +92,7 @@ PRIVATE void flush_breaks ARGS1( } PRIVATE void HTMLGen_flush ARGS1( - HTStructured *, me) + HTStructured *, me) { (*me->targetClass.put_block)(me->target, me->buffer, @@ -132,7 +132,7 @@ PRIVATE void HTMLGen_flush ARGS1( * -preparsed flag. - kw */ PRIVATE void do_cstyle_flush ARGS1( - HTStructured *, me) + HTStructured *, me) { if (!me->text && LYPreparsedSource) { me->text = HTMainText; @@ -177,7 +177,7 @@ PRIVATE void allow_break ARGS3( ** by hand, too, though this is not a primary design consideration. TBL */ PRIVATE void HTMLGen_put_character ARGS2( - HTStructured *, me, + HTStructured *, me, char, c) { if (me->escape_specials && (unsigned char)c < 32) { @@ -288,7 +288,7 @@ PRIVATE void HTMLGen_put_character ARGS2( ** --------------- */ PRIVATE void HTMLGen_put_string ARGS2( - HTStructured *, me, + HTStructured *, me, CONST char *, s) { CONST char * p; @@ -298,7 +298,7 @@ PRIVATE void HTMLGen_put_string ARGS2( } PRIVATE void HTMLGen_write ARGS3( - HTStructured *, me, + HTStructured *, me, CONST char *, s, int, l) { @@ -314,8 +314,8 @@ PRIVATE void HTMLGen_write ARGS3( ** Within the opening tag, there may be spaces ** and the line may be broken at these spaces. */ -PRIVATE void HTMLGen_start_element ARGS6( - HTStructured *, me, +PRIVATE int HTMLGen_start_element ARGS6( + HTStructured *, me, int, element_number, CONST BOOL*, present, CONST char **, value, @@ -487,10 +487,8 @@ PRIVATE void HTMLGen_start_element ARGS6( */ /* end really empty tags straight away */ -#define REALLY_EMPTY(e) ((HTML_dtd.tags[e].contents == SGML_EMPTY) && \ - !(HTML_dtd.tags[e].flags & Tgf_nreie)) - if (LYPreparsedSource && REALLY_EMPTY(element_number)) + if (LYPreparsedSource && ReallyEmptyTagNum(element_number)) { CTRACE((tfp, "STYLE:begin_element:ending EMPTY element style\n")); do_cstyle_flush(me); @@ -503,6 +501,24 @@ PRIVATE void HTMLGen_start_element ARGS6( Style_className, &hcode); } #endif /* USE_COLOR_STYLE */ + if (element_number == HTML_OBJECT && tag->contents == SGML_LITTERAL) { + /* + * These conditions only approximate the ones used in HTML.c. + * Let our SGML parser know that further content is to be parsed + * normally not literally. - kw + */ + if (!present) { + return HT_PARSER_OTHER_CONTENT; + } else if (!present[HTML_OBJECT_DECLARE] && + !(present[HTML_OBJECT_NAME] && + value[HTML_OBJECT_NAME] && *value[HTML_OBJECT_NAME])) { + if (present[HTML_OBJECT_SHAPES] || + !(present[HTML_OBJECT_USEMAP] && + value[HTML_OBJECT_USEMAP] && *value[HTML_OBJECT_USEMAP])) + return HT_PARSER_OTHER_CONTENT; + } + } + return HT_OK; } /* End Element @@ -516,8 +532,8 @@ PRIVATE void HTMLGen_start_element ARGS6( ** should be linked to the whole stack not just the top one.) ** TBL 921119 */ -PRIVATE void HTMLGen_end_element ARGS3( - HTStructured *, me, +PRIVATE int HTMLGen_end_element ARGS3( + HTStructured *, me, int, element_number, char **, insert GCC_UNUSED) { @@ -544,7 +560,7 @@ PRIVATE void HTMLGen_end_element ARGS3( TrimColorClass(HTML_dtd.tags[element_number].name, Style_className, &hcode); - if (LYPreparsedSource && !REALLY_EMPTY(element_number)) + if (LYPreparsedSource && !ReallyEmptyTagNum(element_number)) { CTRACE((tfp, "STYLE:end_element: ending non-EMPTY style\n")); do_cstyle_flush(me); @@ -555,6 +571,7 @@ PRIVATE void HTMLGen_end_element ARGS3( #endif /* USE_HASH */ } #endif /* USE_COLOR_STYLE */ + return HT_OK; } /* Expanding entities @@ -562,7 +579,7 @@ PRIVATE void HTMLGen_end_element ARGS3( ** */ PRIVATE int HTMLGen_put_entity ARGS2( - HTStructured *, me, + HTStructured *, me, int, entity_number) { int nent = HTML_dtd.number_of_entities; @@ -580,7 +597,7 @@ PRIVATE int HTMLGen_put_entity ARGS2( ** */ PRIVATE void HTMLGen_free ARGS1( - HTStructured *, me) + HTStructured *, me) { (*me->targetClass.put_character)(me->target, '\n'); HTMLGen_flush(me); @@ -592,14 +609,14 @@ PRIVATE void HTMLGen_free ARGS1( } PRIVATE void PlainToHTML_free ARGS1( - HTStructured *, me) + HTStructured *, me) { HTMLGen_end_element(me, HTML_PRE, 0); HTMLGen_free(me); } PRIVATE void HTMLGen_abort ARGS2( - HTStructured *, me, + HTStructured *, me, HTError, e GCC_UNUSED) { HTMLGen_free(me); @@ -609,7 +626,7 @@ PRIVATE void HTMLGen_abort ARGS2( } PRIVATE void PlainToHTML_abort ARGS2( - HTStructured *, me, + HTStructured *, me, HTError, e GCC_UNUSED) { PlainToHTML_free(me); @@ -633,7 +650,7 @@ PRIVATE CONST HTStructuredClass HTMLGeneration = /* As opposed to print etc */ */ extern int LYcols; /* LYCurses.h, set in LYMain.c */ extern BOOL dump_output_immediately; /* TRUE if no interactive user */ -extern int dump_output_width; /* -width instead of 80 */ +extern int dump_output_width; /* -width instead of 80 */ PUBLIC HTStructured * HTMLGenerator ARGS1( HTStream *, output) diff --git a/WWW/Library/Implementation/HTNews.c b/WWW/Library/Implementation/HTNews.c index 2cd431e1..c98d8329 100644 --- a/WWW/Library/Implementation/HTNews.c +++ b/WWW/Library/Implementation/HTNews.c @@ -2134,11 +2134,12 @@ PRIVATE int HTLoadNews ARGS4( HTFormat, format_out, HTStream*, stream) { - char command[260]; /* The whole command */ + char command[262]; /* The whole command */ char proxycmd[260]; /* The proxy command */ char groupName[GROUP_NAME_LENGTH]; /* Just the group name */ int status; /* tcp return */ int retries; /* A count of how hard we have tried */ + BOOL normal_url; /* Flag: "news:" or "nntp:" (physical) URL */ BOOL group_wanted; /* Flag: group was asked for, not article */ BOOL list_wanted; /* Flag: list was asked for, not article */ BOOL post_wanted; /* Flag: new post to group was asked for */ @@ -2183,12 +2184,13 @@ PRIVATE int HTLoadNews ARGS4( ** xxxxx News group (no "@") ** group/n1-n2 Articles n1 to n2 in group */ - spost_wanted = (BOOL) (strstr(arg, "snewspost:") != NULL); - sreply_wanted = (BOOL) (!(spost_wanted) && + normal_url = (!strncmp(arg, "news:", 5) || !strncmp(arg, "nntp:", 5)); + spost_wanted = (BOOL) (!normal_url && strstr(arg, "snewspost:") != NULL); + sreply_wanted = (BOOL) (!(normal_url || spost_wanted) && strstr(arg, "snewsreply:") != NULL); - post_wanted = (BOOL) (!(spost_wanted || sreply_wanted) && + post_wanted = (BOOL) (!(normal_url || spost_wanted || sreply_wanted) && strstr(arg, "newspost:") != NULL); - reply_wanted = (BOOL) (!(spost_wanted || sreply_wanted || + reply_wanted = (BOOL) (!(normal_url || spost_wanted || sreply_wanted || post_wanted) && strstr(arg, "newsreply:") != NULL); group_wanted = (BOOL) ((!(spost_wanted || sreply_wanted || @@ -2377,7 +2379,7 @@ PRIVATE int HTLoadNews ARGS4( } } else { /* - ** Reset p1 so that it points to the newgroup + ** Reset p1 so that it points to the newsgroup ** (or a wildcard), or the article. */ if (!(cp = strrchr((p1 + 6), '/')) || *(cp + 1) == '\0') { @@ -2396,6 +2398,12 @@ PRIVATE int HTLoadNews ARGS4( if (post_wanted || reply_wanted || spost_wanted || sreply_wanted) { strcpy(command, "POST"); } else if (list_wanted) { + if (strlen(p1) > 249) { + FREE(ProxyHost); + FREE(ProxyHREF); + HTAlert(URL_TOO_LONG); + return -400; + } SnipIn(command, "XGTITLE %.*s", 11, p1); } else if (group_wanted) { char * slash = strchr(p1, '/'); @@ -2403,7 +2411,13 @@ PRIVATE int HTLoadNews ARGS4( last = 0; if (slash) { *slash = '\0'; - LYstrncpy(groupName, p1, sizeof(groupName)-1); + if (strlen(p1) >= sizeof(groupName)) { + FREE(ProxyHost); + FREE(ProxyHREF); + HTAlert(URL_TOO_LONG); + return -400; + } + strcpy(groupName, p1); *slash = '/'; (void)sscanf(slash+1, "%d-%d", &first, &last); if ((first > 0) && (isdigit(*(slash+1))) && @@ -2419,17 +2433,29 @@ PRIVATE int HTLoadNews ARGS4( last = -1; } } else { - LYstrncpy(groupName, p1, sizeof(groupName)-1); + if (strlen(p1) >= sizeof(groupName)) { + FREE(ProxyHost); + FREE(ProxyHREF); + HTAlert(URL_TOO_LONG); + return -400; + } + strcpy(groupName, p1); } SnipIn(command, "GROUP %.*s", 9, groupName); } else { - char *left = (strrchr(p1, '<') == 0) ? "<" : ""; - char *right = (strrchr(p1, '>') == 0) ? ">" : ""; + int add_open=(strchr(p1, '<') == 0); + int add_close=(strchr(p1, '>') == 0); + if (strlen(p1) + add_open + add_close >= 252) { + FREE(ProxyHost); + FREE(ProxyHREF); + HTAlert(URL_TOO_LONG); + return -400; + } sprintf(command, "ARTICLE %s%.*s%s", - left, - (int) (sizeof(command) - (11 + strlen(left) + strlen(right))), + add_open ? "<" : "", + (int) (sizeof(command) - (11 + add_open + add_close)), p1, - right); + add_close ? ">" : ""); } { @@ -2455,7 +2481,7 @@ PRIVATE int HTLoadNews ARGS4( if (!(post_wanted || reply_wanted || spost_wanted || sreply_wanted || (group_wanted && last != -1) || list_wanted)) { head_wanted = anAnchor->isHEAD; - if (head_wanted && !strncmp(command, "ARTICLE_", 8)) { + if (head_wanted && !strncmp(command, "ARTICLE ", 8)) { /* overwrite "ARTICLE" - hack... */ strcpy(command, "HEAD "); for (cp = command + 5; ; cp++) @@ -2581,13 +2607,23 @@ PRIVATE int HTLoadNews ARGS4( } if (retries < 1) continue; + FREE(ProxyHost); + FREE(ProxyHREF); + FREE(ListArg); + FREE(postfile); if (!(post_wanted || reply_wanted || spost_wanted || sreply_wanted)) { ABORT_TARGET; } - HTSprintf0(&dbuf, + if (response_text[0]) { + HTSprintf0(&dbuf, gettext("Can't read news info. News host %.20s responded: %.200s"), NewsHost, response_text); + } else { + HTSprintf0(&dbuf, + gettext("Can't read news info, empty response from host %s"), + NewsHost); + } return HTLoadError(stream, 500, dbuf); } if (status == 200) { diff --git a/WWW/Library/Implementation/HTParse.c b/WWW/Library/Implementation/HTParse.c index 74197460..32ae6fa5 100644 --- a/WWW/Library/Implementation/HTParse.c +++ b/WWW/Library/Implementation/HTParse.c @@ -320,9 +320,10 @@ PUBLIC char * HTParse ARGS3( if (*h == '.') *h = '\0'; /* chop final . */ } - } else { + } else if (p2 != result) { h = p2; - h--; /* End of hostname */ + if (h != result) + h--; /* End of hostname */ if (*h == '.') { /* ** Slide p2 over h. diff --git a/WWW/Library/Implementation/HTPlain.c b/WWW/Library/Implementation/HTPlain.c index c82e6365..968ded4c 100644 --- a/WWW/Library/Implementation/HTPlain.c +++ b/WWW/Library/Implementation/HTPlain.c @@ -35,6 +35,7 @@ extern BOOL HTPassHighCtrlRaw; extern HTCJKlang HTCJK; PUBLIC int HTPlain_lastraw = -1; +PRIVATE int HTPlain_bs_pending = 0; /* 1:bs 2:underline 3:underline+bs - kw */ /* HTML Object ** ----------- @@ -45,8 +46,8 @@ struct _HTStream { /* ** The node_anchor UCInfo and handle for the input (PARSER) stage. - FM */ - LYUCcharset * inUCI; - int inUCLYhndl; + LYUCcharset * inUCI; + int inUCLYhndl; /* ** The node_anchor UCInfo and handle for the output (HTEXT) stage. - FM */ @@ -131,7 +132,11 @@ PRIVATE void HTPlain_put_character ARGS2( HTPlain_lastraw = -1; return; } - HTPlain_lastraw = c; + if (c == '\b' || c == '_' || HTPlain_bs_pending) { + HTPlain_write(me, &c, 1); + return; + } + HTPlain_lastraw = (unsigned char)c; if (c == '\r') { HText_appendCharacter(me->text, '\n'); } else if (TOASCII((unsigned char)c) >= 127) { /* S/390 -- gil -- 0305 */ @@ -228,6 +233,34 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) HText_appendCharacter(me->text, *p); } #else + if (*p == '\b') { + if (HTPlain_lastraw >= (unsigned char)' ' && + HTPlain_lastraw != '\r' && HTPlain_lastraw != '\n') { + if (!HTPlain_bs_pending) { + HTPlain_bs_pending = 1; + continue; + } else if (HTPlain_bs_pending == 2) { + HTPlain_bs_pending = 3; + continue; + } + } + if (HTPlain_bs_pending >= 2) + HText_appendCharacter(me->text, '_'); + HTPlain_bs_pending = 0; + } else if (*p == '_') { + if (!HTPlain_bs_pending) { + HTPlain_bs_pending = 2; + HTPlain_lastraw = (unsigned char)*p; + continue; +#if 0 + } else if (HTPlain_bs_pending != 2) { + HTPlain_bs_pending--; /* 1 -> 0, 3 -> 2 */ + HTPlain_lastraw = (unsigned char)*p; + continue; +#endif + } + } + /* ** Try to handle lone LFs, CRLFs and lone CRs ** as newline, and to deal with control, ASCII, @@ -238,7 +271,35 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) HTPlain_lastraw = -1; continue; } - HTPlain_lastraw = *p; + + if (HTPlain_bs_pending && + !((unsigned char)*p >= ' ' && *p != '\r' && *p != '\n' && + (HTPlain_lastraw == (unsigned char)*p || + HTPlain_lastraw == (unsigned char)'_' || + *p == '_'))) { + if (HTPlain_bs_pending >= 2) + HText_appendCharacter(me->text, '_'); + HTPlain_bs_pending = 0; + } else if (HTPlain_bs_pending == 1) { + HTPlain_bs_pending = 0; + continue; /* ignore last two of "X\bX" or "X\b_" - kw */ + } else if (HTPlain_bs_pending == 3) { + if (*p == '_') { + HTPlain_bs_pending = 2; + continue; /* ignore last two of "_\b_" - kw */ + } else { + HTPlain_bs_pending = 0; + /* ignore first two of "_\bX" - kw */ + } + } else if (HTPlain_bs_pending == 2) { + HText_appendCharacter(me->text, '_'); + if (*p == '_') + continue; /* keep second of "__" pending - kw */ + HTPlain_bs_pending = 0; + } else { + HTPlain_bs_pending = 0; + } + HTPlain_lastraw = (unsigned char)*p; if (*p == '\r') { HText_appendCharacter(me->text, '\n'); continue; @@ -454,8 +515,15 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) /* ** If neither HTPassHighCtrlRaw nor CJK is set, play it safe ** and ignore 173 (shy). - FM + ** Now only ignore it for color style, which doesn't handle it anyway. + ** Otherwise pass it on as LY_SOFT_HYPHEN and let HText deal with it. + ** It should be either ignored, or displayed as a hyphen if it was + ** indeed at the end of a line. Well it should. - kw */ } else if (code == CH_SHY) { +#ifndef USE_COLOR_STYLE + HText_appendCharacter(me->text, LY_SOFT_HYPHEN); +#endif continue; /* ** If we get to here, pass the displayable ASCII characters. - FM @@ -600,6 +668,8 @@ PRIVATE void HTPlain_write ARGS3(HTStream *, me, CONST char*, s, int, l) PRIVATE void HTPlain_free ARGS1( HTStream *, me) { + if (HTPlain_bs_pending >= 2) + HText_appendCharacter(me->text, '_'); FREE(me); } diff --git a/WWW/Library/Implementation/HTString.c b/WWW/Library/Implementation/HTString.c index 03a73cfd..824ab937 100644 --- a/WWW/Library/Implementation/HTString.c +++ b/WWW/Library/Implementation/HTString.c @@ -151,7 +151,7 @@ PUBLIC int strncasecomp ARGS3( return ((long)n < 0 ? 0 : cm[*us1] - cm[*--us2]); } -#else /* SH_EX */ +#else /* SH_EX */ /* Strings of any length ** --------------------- @@ -355,7 +355,7 @@ PUBLIC char * HTNextField ARGS1( ** On entry, ** *pstr points to a string to be parsed. ** delims lists characters to be recognized as delimiters. -** If NULL default is white white space "," ";" or "=". +** If NULL, default is white space "," ";" or "=". ** The word can optionally be quoted or enclosed with ** chars from bracks. ** Comments surrrounded by '(' ')' are filtered out @@ -363,7 +363,7 @@ PUBLIC char * HTNextField ARGS1( ** ' ' or '(' in delims or bracks. ** bracks lists bracketing chars. Some are recognized as ** special, for those give the opening char. -** If NULL defaults to <"> and "<" ">". +** If NULL, defaults to <"> and "<" ">". ** found points to location to fill with the ending delimiter ** found, or is NULL. ** @@ -378,7 +378,7 @@ PUBLIC char * HTNextTok ARGS4( char **, pstr, CONST char *, delims, CONST char *, bracks, - char *, found) + char *, found) { char * p = *pstr; char * start = NULL; @@ -432,7 +432,7 @@ PUBLIC char * HTNextTok ARGS4( } else get_closing_char_too = (BOOL) (strchr(bracks,*p) != NULL); } - } else if (strchr(bracks,*p)) { /* quoted or bracketted field */ + } else if (strchr(bracks,*p)) { /* quoted or bracketed field */ switch (*p) { case '<': closer = '>'; break; case '[': closer = ']'; break; @@ -452,7 +452,7 @@ PUBLIC char * HTNextTok ARGS4( } else break; /* kr95-10-9: needs to stop here */ #if 0 - } else if (*p == '<') { /* quoted field */ + } else if (*p == '<') { /* quoted field */ if (!start) start = ++p; for(;*p && *p!='>'; p++) if (*p == '\\' && *(p+1)) p++; /* Skip escaped chars */ @@ -643,7 +643,7 @@ PRIVATE char * StrAllocVsprintf ARGS4( break; default: CTRACE((tfp, "unknown format character '%c' in %s\n", - *fmt, format)); + *fmt, format)); break; } } else if (*fmt == '.') { @@ -707,7 +707,7 @@ PRIVATE char * StrAllocVsprintf ARGS4( FREE(fmt_ptr); dst_ptr[dst_len] = '\0'; if (pstr) - *pstr = dst_ptr; + *pstr = dst_ptr; return (dst_ptr); } @@ -886,7 +886,7 @@ PUBLIC void HTAddXpand ARGS4( if (HTIsParam(next)) { if (next != last) { size_t len = (next - last) - + ((*result != 0) ? strlen(*result) : 0); + + ((*result != 0) ? strlen(*result) : 0); HTSACat(result, last); (*result)[len] = 0; } @@ -933,7 +933,7 @@ PUBLIC void HTAddParam ARGS4( if (HTIsParam(next)) { if (next != last) { size_t len = (next - last) - + ((*result != 0) ? strlen(*result) : 0); + + ((*result != 0) ? strlen(*result) : 0); HTSACat(result, last); (*result)[len] = 0; } @@ -989,11 +989,11 @@ PUBLIC void HTSABCopy ARGS3( CTRACE((tfp, "HTSABCopy(%p, %p, %d)\n", dest, src, len)); /* if we already have a bstring ** ... */ if (dest) { - /* ... with a valid bstring *, free it ... */ - if (*dest) { + /* ... with a valid bstring *, free it ... */ + if (*dest) { FREE((*dest)->str); FREE(*dest); - } + } *dest = malloc(sizeof(bstring)); if (src) { CTRACE((tfp, "%% [%s]\n", src)); diff --git a/WWW/Library/Implementation/HTString.h b/WWW/Library/Implementation/HTString.h index 3190b88a..e3a83660 100644 --- a/WWW/Library/Implementation/HTString.h +++ b/WWW/Library/Implementation/HTString.h @@ -81,7 +81,7 @@ extern char * HTSprintf () GCC_PRINTFLIKE(2,3); extern char * HTSprintf0 () GCC_PRINTFLIKE(2,3); #endif -#if defined(VMS) || defined(DOSPATH) || defined(__EMX__) +#if (defined(VMS) || defined(DOSPATH) || defined(__EMX__)) && !defined(__CYGWIN__) #define USE_QUOTED_PARAMETER 0 #else #define USE_QUOTED_PARAMETER 1 diff --git a/WWW/Library/Implementation/HTStyle.h b/WWW/Library/Implementation/HTStyle.h index 0cc15895..be958f07 100644 --- a/WWW/Library/Implementation/HTStyle.h +++ b/WWW/Library/Implementation/HTStyle.h @@ -116,6 +116,7 @@ typedef struct _HTStyle { } HTStyle; +#define HT_ALIGN_NONE (-1) /* Style functions: */ diff --git a/WWW/Library/Implementation/HTTCP.c b/WWW/Library/Implementation/HTTCP.c index 9bd4bcd3..18de1134 100644 --- a/WWW/Library/Implementation/HTTCP.c +++ b/WWW/Library/Implementation/HTTCP.c @@ -1180,6 +1180,7 @@ PUBLIC int HTParseInet ARGS2( { char *port; int dotcount_ip = 0; /* for dotted decimal IP addr */ + char *strptr; #ifndef _WINDOWS_NSL char *host = NULL; #endif /* _WINDOWS_NSL */ @@ -1203,17 +1204,18 @@ PUBLIC int HTParseInet ARGS2( */ if ((port = strchr(host, ':')) != NULL) { *port++ = 0; /* Chop off port */ + strptr = port; if (port[0] >= '0' && port[0] <= '9') { #ifdef UNIX - soc_in->sin_port = htons(atol(port)); + soc_in->sin_port = htons(strtol(port, &strptr, 10)); #else /* VMS: */ #ifdef DECNET - soc_in->sdn_objnum = (unsigned char)(strtol(port, (char**)0, 10)); + soc_in->sdn_objnum = (unsigned char)(strtol(port, &strptr, 10)); #else - soc_in->sin_port = htons((unsigned short)strtol(port,(char**)0,10)); + soc_in->sin_port = htons((unsigned short)strtol(port,&strptr,10)); #endif /* Decnet */ #endif /* Unix vs. VMS */ -#ifdef SUPPRESS /* 1. crashes!?!. 2. Not recommended */ +#ifdef SUPPRESS /* 1. crashes!?!. 2. URL syntax has number not name */ } else { struct servent * serv = getservbyname(port, (char*)0); if (serv) { @@ -1223,6 +1225,13 @@ PUBLIC int HTParseInet ARGS2( } #endif /* SUPPRESS */ } + if (strptr && *strptr != '\0') { +#ifndef _WINDOWS_NSL + FREE(host); +#endif /* _WINDOWS_NSL */ + HTAlwaysAlert(NULL, gettext("Address has invalid port")); + return -1; + } } #ifdef DECNET @@ -1237,7 +1246,7 @@ PUBLIC int HTParseInet ARGS2( #else /* parse Internet host: */ if (*host >= '0' && *host <= '9') { /* Test for numeric node address: */ - char *strptr = host; + strptr = host; while (*strptr) { if (*strptr == '.') { dotcount_ip++; diff --git a/WWW/Library/Implementation/HTTP.c b/WWW/Library/Implementation/HTTP.c index cb53e06f..193cf4bd 100644 --- a/WWW/Library/Implementation/HTTP.c +++ b/WWW/Library/Implementation/HTTP.c @@ -537,7 +537,11 @@ try_again: } if (LYUserAgent && *LYUserAgent) { - sprintf(line, "User-Agent: %s%c%c", LYUserAgent, CR, LF); + char *cp = LYSkipBlanks(LYUserAgent); + /* Won't send it at all if all blank - kw */ + if (*cp != '\0') + sprintf(line, "User-Agent: %.*s%c%c", + INIT_LINE_SIZE-15, LYUserAgent, CR, LF); } else { sprintf(line, "User-Agent: %s/%s libwww-FM/%s%c%c", HTAppName ? HTAppName : "unknown", diff --git a/WWW/Library/Implementation/HTUtils.h b/WWW/Library/Implementation/HTUtils.h index 0c6368e6..fa19ea51 100644 --- a/WWW/Library/Implementation/HTUtils.h +++ b/WWW/Library/Implementation/HTUtils.h @@ -384,6 +384,8 @@ are generally not the response status from any specific protocol. #define HT_FORBIDDEN -403 /* Access forbidden */ #define HT_NOT_ACCEPTABLE -406 /* Not Acceptable */ +#define HT_PARSER_REOPEN_ELT 700 /* tells SGML parser to keep tag open */ +#define HT_PARSER_OTHER_CONTENT 701 /* tells SGML to change content model */ #define HT_H_ERRNO_VALID -800 /* see h_errno for resolver error */ #define HT_INTERNAL -900 /* Weird -- should never happen. */ diff --git a/WWW/Library/Implementation/SGML.c b/WWW/Library/Implementation/SGML.c index 7568f2e2..9b018177 100644 --- a/WWW/Library/Implementation/SGML.c +++ b/WWW/Library/Implementation/SGML.c @@ -138,12 +138,13 @@ struct _HTStream { HTStructured *target; /* target object */ HTTag *current_tag; + HTTag *slashtag; CONST HTTag *unknown_tag; BOOL inSELECT; int current_attribute_number; HTChunk *string; HTElement *element_stack; - enum sgml_state { S_text, S_litteral, + enum sgml_state { S_text, S_tagname_slash, S_pcdata, S_litteral, S_tag, S_tag_gap, S_attr, S_attr_gap, S_equals, S_value, S_ero, S_cro, S_incro, S_exclamation, S_comment, S_doctype, S_marked, @@ -185,14 +186,19 @@ struct _HTStream { char * recover; int recover_index; char * include; + char * active_include; int include_index; char * url; char * csi; int csi_index; +#ifdef USE_PSRC + BOOL cur_attr_is_href; + BOOL cur_attr_is_name; + BOOL seen_nonwhite_in_junk_tag; +#endif }; #ifdef USE_PSRC -static BOOL seen_letter_in_junk_tag; PRIVATE void HTMLSRC_apply_markup ARGS3( HTStream *, context, @@ -236,8 +242,8 @@ PRIVATE void HTMLSRC_apply_markup ARGS3( # define PSRCSTOP(x) HTMLSRC_apply_markup(context,HTL_/**/x,STOP) #endif -PRIVATE BOOL cur_attr_is_href; -PRIVATE BOOL cur_attr_is_name; +#define attr_is_href context->cur_attr_is_href +#define attr_is_name context->cur_attr_is_name #endif PRIVATE void set_chartrans_handling ARGS3( @@ -248,7 +254,7 @@ PRIVATE void set_chartrans_handling ARGS3( if (chndl < 0) { /* ** Nothing was set for the parser in earlier stages, - ** so the HTML parser's UCLYhndl should still be it's + ** so the HTML parser's UCLYhndl should still be its ** default. - FM */ chndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_STRUCTURED); @@ -371,8 +377,8 @@ PRIVATE void handle_attribute_name ARGS2( #ifdef USE_PSRC if (psrc_view) { - cur_attr_is_href = FALSE; - cur_attr_is_name = FALSE; + attr_is_href = FALSE; + attr_is_name = FALSE; } #endif /* @@ -403,11 +409,11 @@ PRIVATE void handle_attribute_name ARGS2( FREE(context->value[i]); #ifdef USE_COLOR_STYLE # ifdef USE_PSRC -# if !OPT1 +# if !OPT1 current_is_class = (!strcasecomp("class", s)); -# else +# else current_is_class = IS_C(attributes[i]); -# endif +# endif # else current_is_class = (!strcasecomp("class", s)); # endif @@ -416,8 +422,8 @@ PRIVATE void handle_attribute_name ARGS2( #ifdef USE_PSRC } else { - cur_attr_is_name = (BOOL) (attributes[i].type == HTMLA_ANAME); - cur_attr_is_href = (BOOL) (attributes[i].type == HTMLA_HREF); + attr_is_name = (BOOL) (attributes[i].type == HTMLA_ANAME); + attr_is_href = (BOOL) (attributes[i].type == HTMLA_HREF); } #endif return; @@ -775,6 +781,10 @@ PRIVATE void handle_doctype ARGS1( return; } +PRIVATE void SGML_write PARAMS(( + HTStream * me, + CONST char * s, + int l)); /* Handle marked ** ------------- @@ -786,6 +796,19 @@ PRIVATE void handle_marked ARGS1( CTRACE((tfp, "SGML Marked Section:\n<%s>\n", s)); + if (!strncmp(context->string->data, "![INCLUDE[", 10)) { + context->string->data[context->string->size - 3] = '\0'; + StrAllocCat(context->include, context->string->data + 10); + /* @@@ This needs to take charset into account! @@@ + the wrong assumptions will be made about the data's + charset once it is in include - kw */ + + } else if (!strncmp(context->string->data, "![CDATA[", 8)) { + (*context->actions->_write)(context->target, + context->string->data + 8, + context->string->size - 11); + + } return; } @@ -878,14 +901,20 @@ PRIVATE void do_close_stacked ARGS1( HTStream *, context) { HTElement * stacked = context->element_stack; + HTMLElement e; if (!stacked) return; /* stack was empty */ if (context->inSELECT && !strcasecomp(stacked->tag->name, "SELECT")) { context->inSELECT = FALSE; } + e = (stacked->tag == &HTTag_mixedObject) ? + HTML_OBJECT : stacked->tag - context->dtd->tags; +#ifdef USE_PSRC + if (!psrc_view) /* Don't actually pass call on if viewing psrc - kw */ +#endif (*context->actions->end_element)( context->target, - stacked->tag - context->dtd->tags, + e, (char **)&context->include); context->element_stack = stacked->next; FREE(stacked); @@ -898,7 +927,9 @@ PRIVATE int is_on_stack ARGS2( HTElement * stacked = context->element_stack; int i = 1; for (; stacked; stacked = stacked->next, i++) { - if (stacked->tag == old_tag) + if (stacked->tag == old_tag || + (stacked->tag == &HTTag_mixedObject && + old_tag - context->dtd->tags == HTML_OBJECT)) return i; } return 0; @@ -922,6 +953,11 @@ PRIVATE void end_element ARGS2( while (canclose_check != close_NO && context->element_stack && (stackpos > 1 || (!extra_action_taken && stackpos == 0))) { + if (stackpos == 0 && (old_tag->flags & Tgf_startO) && + element_valid_within(old_tag, context->element_stack->tag, YES)) { + CTRACE((tfp, "SGML: </%s> ignored\n", old_tag->name)); + return; + } canclose_check = can_close(old_tag, context->element_stack->tag); if (canclose_check != close_NO) { CTRACE((tfp, "SGML: End </%s> \t<- %s end </%s>\n", @@ -931,11 +967,13 @@ PRIVATE void end_element ARGS2( do_close_stacked(context); extra_action_taken = YES; stackpos = is_on_stack(context, old_tag); +#if 0 /* done below with more specific message - kw */ } else { CTRACE((tfp, "SGML: Still open %s \t<- ***invalid end </%s>\n", context->element_stack->tag->name, old_tag->name)); return; +#endif } } @@ -993,8 +1031,11 @@ PRIVATE void end_element ARGS2( if (context->element_stack) /* Substitute and remove one stack element */ #endif /* WIND_DOWN_STACK */ { + int status = HT_OK; + HTMLElement e; HTElement * N = context->element_stack; - HTTag * t = N->tag; + HTTag * t = (N->tag == &HTTag_mixedObject && N->tag != old_tag) ? + context->dtd->tags + HTML_OBJECT : N->tag; if (old_tag != t) { /* Mismatch: syntax error */ if (context->element_stack->next) { /* This is not the last level */ @@ -1007,10 +1048,29 @@ PRIVATE void end_element ARGS2( } } - context->element_stack = N->next; /* Remove from stack */ - FREE(N); - (*context->actions->end_element)(context->target, - t - context->dtd->tags, (char **)&context->include); + e = (t == &HTTag_mixedObject) ? + HTML_OBJECT : t - context->dtd->tags; +#ifdef USE_PSRC + if (!psrc_view) /* Don't actually pass call on if viewing psrc - kw */ +#endif + status = (*context->actions->end_element)(context->target, + e, (char **)&context->include); + if (status == HT_PARSER_REOPEN_ELT) { + CTRACE((tfp, "SGML: Restart <%s>\n", t->name)); + (*context->actions->start_element)( + context->target, + e, + NULL, + NULL, + context->current_tag_charset, + (char **)&context->include); + } else if (status == HT_PARSER_OTHER_CONTENT) { + CTRACE((tfp, "SGML: Continue with other content model for <%s>\n", t->name)); + context->element_stack->tag = &HTTag_mixedObject; + } else { + context->element_stack = N->next; /* Remove from stack */ + FREE(N); + } #ifdef WIND_DOWN_STACK if (old_tag == t) return; /* Correct sequence */ @@ -1031,6 +1091,7 @@ PRIVATE void end_element ARGS2( PRIVATE void start_element ARGS1( HTStream *, context) { + int status; HTTag * new_tag = context->current_tag; #if OPT1 HTMLElement e = new_tag - context->dtd->tags; @@ -1176,13 +1237,15 @@ PRIVATE void start_element ARGS1( ** Handle the start tag. - FM */ CTRACE((tfp, "SGML: Start <%s>\n", new_tag->name)); - (*context->actions->start_element)( + status = (*context->actions->start_element)( context->target, new_tag - context->dtd->tags, context->present, (CONST char**) context->value, /* coerce type for think c */ context->current_tag_charset, (char **)&context->include); + if (status == HT_PARSER_OTHER_CONTENT) + new_tag = &HTTag_mixedObject; /* this is only returned for OBJECT */ if (new_tag->contents != SGML_EMPTY) { /* i.e., tag not empty */ HTElement * N = (HTElement *)malloc(sizeof(HTElement)); if (N == NULL) @@ -1230,7 +1293,7 @@ PUBLIC HTTag * SGMLFindTag ARGS2( return &dtd->tags[i]; } } - if (isalpha((unsigned char)string[0])) { + if (IsNmStart(string[0])) { /* ** Unrecognized, but may be valid. - KW */ @@ -1261,6 +1324,7 @@ PRIVATE void SGML_free ARGS1( FREE(context->url); FREE(context->csi); FREE(context->include); + FREE(context->active_include); /* ** Wind down stack if any elements are open. - FM @@ -1270,8 +1334,13 @@ PRIVATE void SGML_free ARGS1( t = cur->tag; context->element_stack = cur->next; /* Remove from stack */ FREE(cur); - (*context->actions->end_element)(context->target, - t - context->dtd->tags, (char **)&context->include); +#ifdef USE_PSRC + if (!psrc_view) /* Don't actually call on target if viewing psrc - kw */ +#endif + (*context->actions->end_element)(context->target, + (t == &HTTag_mixedObject) ? + HTML_OBJECT : t - context->dtd->tags, + (char **)&context->include); FREE(context->include); } @@ -1310,6 +1379,7 @@ PRIVATE void SGML_abort ARGS2( */ FREE(context->recover); FREE(context->include); + FREE(context->active_include); FREE(context->url); FREE(context->csi); @@ -1368,6 +1438,7 @@ PRIVATE void SGML_character ARGS2( HTChunk *string = context->string; CONST char * EntityName; char * p; + HTTag * testtag = NULL; BOOLEAN chk; /* Helps (?) walk through all the else ifs... */ UCode_t clong, uck = 0; /* Enough bits for UCS4 ... */ #ifdef CJK_EX @@ -1561,6 +1632,9 @@ PRIVATE void SGML_character ARGS2( ** its recover buffer, but it might not be for ** stuff other functions added to the insert or ** csi buffer, so bear that in mind. - FM +** Stuff from the recover buffer is now handled +** as UTF-8 if we can expect that's what it is, +** and in that case we don't come back up here. - kw */ top: saved_char_in = '\0'; @@ -1595,7 +1669,7 @@ top1: if (TOASCII(unsign_c) < 32 && c != '\t' && c != '\n' && c != '\r' && HTCJK == NOCJK) - return; + goto after_switch; #ifdef CJK_EX /* 1998/11/24 (Tue) 17:02:31 */ if (HTCJK == JAPANESE && last_kcode == SJIS) { @@ -1609,7 +1683,7 @@ top1: JISx0201TO0208_SJIS(c, &sjis_hi, &sjis_lo); PUTC(sjis_hi); PUTC(sjis_lo); - return; + goto after_switch; } } } @@ -1624,7 +1698,7 @@ top1: unsign_c >= LYlowest_eightbit[context->inUCLYhndl]) if (TOASCII(c) == 127 && /* S/390 -- gil -- 0830 */ !(PASSHICTRL || HTCJK != NOCJK)) - return; + goto after_switch; /* ** Ignore 8-bit control characters 128 - 159 if @@ -1632,7 +1706,7 @@ top1: */ if (TOASCII(unsign_c) > 127 && TOASCII(unsign_c) < 160 && /* S/390 -- gil -- 0847 */ !(PASSHICTRL || HTCJK != NOCJK)) - return; + goto after_switch; /* ** Handle character based on context->state. @@ -1654,6 +1728,11 @@ top1: PUTC(c); break; + case S_tagname_slash: + if (!(c == '>' && context->slashtag && TOASCII(unsign_c) < 127)) { + context->state = S_text; + } + case S_text: if (HTCJK != NOCJK && (TOASCII(c) & 0200) != 0) { /* S/390 -- gil -- 0864 */ /* @@ -1678,13 +1757,26 @@ top1: PUTC(c); break; } + + if (c == '&' || c == '<') { +#ifdef USE_PSRC + if (psrc_view) { /*there is nothing useful in the element_stack*/ + testtag = context->current_tag; + } else +#endif + testtag = context->element_stack ? + context->element_stack->tag : NULL; + } + if (c == '&' && TOASCII(unsign_c) < 127 && /* S/390 -- gil -- 0898 */ - (!context->element_stack || - (context->element_stack->tag && - (context->element_stack->tag->contents == SGML_MIXED || - context->element_stack->tag->contents == SGML_ELEMENT || - context->element_stack->tag->contents == SGML_PCDATA || - context->element_stack->tag->contents == SGML_RCDATA)))) { + (!testtag || + (testtag->contents == SGML_MIXED || + testtag->contents == SGML_ELEMENT || + testtag->contents == SGML_PCDATA || +#ifdef USE_PSRC + testtag->contents == SGML_EMPTY || +#endif + testtag->contents == SGML_RCDATA))) { /* ** Setting up for possible entity, without the leading '&'. - FM */ @@ -1695,17 +1787,75 @@ top1: ** Setting up for possible tag. - FM */ string->size = 0; - context->state = (context->element_stack && - context->element_stack->tag && - context->element_stack->tag->contents == SGML_LITTERAL) - ? - S_litteral : S_tag; + if (testtag && testtag->contents == SGML_PCDATA) + context->state = S_pcdata; + else if (testtag && (testtag->contents == SGML_LITTERAL || + testtag->contents == SGML_CDATA)) + context->state = S_litteral; + else + context->state = S_tag; + context->slashtag = NULL; + } else if (context->slashtag && + (c == '/' || + (c == '>' && context->state == S_tagname_slash)) && + TOASCII(unsign_c) < 127) { + /* + ** Pending tag close from shortref tag. - kw + */ #ifdef USE_PSRC - if (psrc_view) /*there is nothing useful in the element_stack*/ - if (context->current_tag && - context->current_tag->contents == SGML_LITTERAL) - context->state = S_litteral; + if (psrc_view) { + PSRCSTART(abracket); + PUTC(c); + PSRCSTOP(abracket); + } else #endif + if (context->slashtag != context->unknown_tag && + !ReallyEmptyTag(context->slashtag)) { + if (context->recover == NULL) { + StrAllocCopy(context->recover, "</"); + context->recover_index = 0; + } else { + StrAllocCat(context->recover, "</"); + } + StrAllocCat(context->recover, context->slashtag->name); + StrAllocCat(context->recover, ">"); + } + context->slashtag = NULL; + + } else if (context->element_stack && + (context->element_stack->tag->flags & Tgf_frecyc)) { + /* + * The element stack says we are within the contents of an + * element that the next stage (HTML.c) may want to feed + * us back again (via the *include string). So try to output + * text in UTF-8 if possible, using the same logic as for + * attribute values (which should be in line with what + * context->current_tag_charset indicates). - kw + */ + if (context->T.decode_utf8 && + *context->utf_buf) { + PUTS(context->utf_buf); + context->utf_buf_p = context->utf_buf; + *(context->utf_buf_p) = '\0'; + } else if (HTCJK == NOCJK && + (context->T.output_utf8 || + context->T.trans_from_uni)) { + if (LYIsASCII(clong)) { + PUTC(c); + } else if (clong == 0xfffd && saved_char_in && + HTPassEightBitRaw && + (unsigned char)saved_char_in >= + LYlowest_eightbit[context->outUCLYhndl]) { + PUTUTF8((0xf000 | (unsigned char)saved_char_in)); + } else { + PUTUTF8(clong); + } + } else if (saved_char_in && context->T.use_raw_char_in) { + PUTC(saved_char_in); + } else { + PUTC(c); + } + #define PASS8859SPECL context->T.pass_160_173_raw /* ** Convert 160 (nbsp) to Lynx special character if @@ -1900,25 +2050,50 @@ top1: break; /* + ** Found '<' in SGML_PCDATA content; treat this mode nearly like + ** S_litteral, but recognize '<!' to filter out comments etc. - kw + */ + case S_pcdata: + if (c == '!' && !string->size) { /* <! */ + /* + ** Terminate and set up for possible comment, + ** identifier, declaration, or marked section + ** as under S_tag. - kw + */ + context->state = S_exclamation; + context->lead_exclamation = TRUE; + context->doctype_bracket = FALSE; + context->first_bracket = FALSE; + HTChunkPutc(string, c); + break; + } + /* Fall through to S_litteral - kw */ + + /* ** In litteral mode, waits only for specific end tag (for ** compatibility with old servers, and for Lynx). - FM */ case S_litteral: /*PSRC:this case not understood completely by HV, not done*/ HTChunkPutc(string, c); - if (TOUPPER(c) != ((string->size == 1) ? - '/' : #ifdef USE_PSRC - psrc_view ? context->current_tag->name[string->size-2]: + if (psrc_view) { /*there is nothing useful in the element_stack*/ + testtag = context->current_tag; + } else #endif - context->element_stack->tag->name[string->size-2])) { + testtag = context->element_stack ? + context->element_stack->tag : NULL; + + if (TOUPPER(c) != ((string->size == 1) ? + '/' : + testtag->name[string->size-2])) { int i; /* ** If complete match, end litteral. */ + if ((c == '>') && testtag && !testtag->name[string->size-2]) { #ifdef USE_PSRC - if (psrc_view) { - if ((c == '>') && context->current_tag && !context->current_tag->name[string->size-2]) { + if (psrc_view) { PSRCSTART(abracket);PUTC('<');PUTC('/');PSRCSTOP(abracket); PSRCSTART(tag); strcpy(string->data,context->current_tag->name); @@ -1938,24 +2113,37 @@ top1: context->state = S_text; break; } - PUTC('<'); - for (i = 0; i < string->size-1; i++) /* recover, except last c */ - PUTC(string->data[i]); - string->size = 0; - context->state = S_text; - goto top1; /* to recover last c */ - } else #endif - if ((c == '>') && - (!context->element_stack->tag->name[string->size-2])) { end_element(context, context->element_stack->tag); string->size = 0; context->current_attribute_number = INVALID; context->state = S_text; break; } + + if (((testtag->contents != SGML_LITTERAL && + (testtag->flags & Tgf_strict)) || + (context->state == S_pcdata && + (testtag->flags & (Tgf_strict|Tgf_endO)))) && + (string->size > 1 && + (c == '>' || string->size > 2 || IsNmStart(c)))) { + context->state = S_end; + string->size--; + for (i = 0; i < string->size; i++) /* remove '/' */ + string->data[i] = string->data[i+1]; + if ((string->size == 1) ? IsNmStart(c) : IsNmChar(c)) + break; + string->size--; + goto top1; + } + if (context->state == S_pcdata && + (testtag->flags & (Tgf_strict|Tgf_endO)) && + (string->size == 1 && IsNmStart(c))) { + context->state = S_tag; + break; + } /* - ** If Mismatch: recover string. + ** If Mismatch: recover string literally. */ PUTC('<'); for (i = 0; i < string->size-1; i++) /* recover, except last c */ @@ -1984,7 +2172,7 @@ top1: */ case S_entity: if (TOASCII(unsign_c) < 127 && (string->size ? /* S/390 -- gil -- 1029 */ - isalnum((unsigned char)c) : isalpha((unsigned char)c))) { + IsNmChar(c) : isalpha((unsigned char)c))) { /* ** Accept valid ASCII character. - FM */ @@ -2631,7 +2819,7 @@ top1: */ case S_tag: /* new tag */ if (TOASCII(unsign_c) < 127 && (string->size ? /* S/390 -- gil -- 1179 */ - isalnum((unsigned char)c) : isalpha((unsigned char)c))) { + IsNmChar(c) : IsNmStart(c))) { /* ** Add valid ASCII character. - FM */ @@ -2675,28 +2863,29 @@ top1: */ HTTag * t; if (c == '/') { - if (string->size != 0) - CTRACE((tfp,"SGML: `<%s/' found!\n", string->data)); - context->state = S_end; - break; + if (string->size == 0) { + context->state = S_end; + break; + } + CTRACE((tfp,"SGML: `<%.*s/' found!\n", string->size, string->data)); } HTChunkTerminate(string) ; t = SGMLFindTag(dtd, string->data); - if (t == context->unknown_tag && c == ':' && - 0 == strcasecomp(string->data, "URL")) { + if (t == context->unknown_tag && + ((c == ':' && + string->size == 4 && 0 == strcasecomp(string->data, "URL")) || + (string->size > 4 && 0 == strncasecomp(string->data, "URL:", 4)))) { /* ** Treat <URL: as text rather than a junk tag, ** so we display it and the URL (Lynxism 8-). - FM */ - int i; #ifdef USE_PSRC if (psrc_view) PSRCSTART(badseq); #endif PUTC('<'); - for (i = 0; i < 3; i++) /* recover */ - PUTC(string->data[i]); + PUTS(string->data); /* recover */ PUTC(c); #ifdef USE_PSRC if (psrc_view) @@ -2707,7 +2896,17 @@ top1: string->size = 0; context->state = S_text; break; - } else if (!t) { + } + if (c == '/' && t) { + /* + * Element name was ended by '/'. Remember the tag that + * ended thusly, we'll interpret as either an indication + * of an empty element (if '>' follows directly) or do + * some SGMLshortref-ish treatment. - kw + */ + context->slashtag = t; + } + if (!t) { CTRACE((tfp, "SGML: *** Invalid element %s\n", string->data)); @@ -2777,24 +2976,30 @@ top1: context->current_attribute_number = INVALID; #ifdef USE_PSRC if (psrc_view) { - if (c == '>') { + if (c == '>' || c == '<' || (c == '/' && context->slashtag)) { if (t != context->unknown_tag) PSRCSTOP(tag); else PSRCSTOP(badtag); - PSRCSTART(abracket); - PUTC('>'); - PSRCSTOP(abracket); - context->state = S_text; + if (c != '<') { + PSRCSTART(abracket); + PUTC(c); + PSRCSTOP(abracket); + context->state = (c == '>') ? S_text : S_tagname_slash; + } else + context->state = S_tag; } else { + if (!WHITE(c)) + PUTC(c); context->state = S_tag_gap; } } else #endif - if (c == '>') { + if (c == '>' || c == '<' || (c == '/' && context->slashtag)) { if (context->current_tag->name) start_element(context); - context->state = S_text; + context->state = (c == '>') ? S_text : + (c == '<') ? S_tag : S_tagname_slash; } else { context->state = S_tag_gap; } @@ -3331,18 +3536,18 @@ top1: #ifdef USE_PSRC if (psrc_view) { /*PSRCSTART(attrval);*/ - if (cur_attr_is_name) { + if (attr_is_name) { HTStartAnchor(context->target, string->data, NULL); (*context->actions->end_element)( context->target, HTML_A, (char **)&context->include); - } else if (cur_attr_is_href) { + } else if (attr_is_href) { PSRCSTART(href); HTStartAnchor(context->target,NULL,string->data); } PUTS_TR(string->data); - if (cur_attr_is_href) { + if (attr_is_href) { (*context->actions->end_element)( context->target, HTML_A, @@ -3413,18 +3618,18 @@ top1: #ifdef USE_PSRC if (psrc_view) { /*PSRCSTART(attrval);*/ - if (cur_attr_is_name) { + if (attr_is_name) { HTStartAnchor(context->target,string->data,NULL); (*context->actions->end_element)( context->target, HTML_A, (char **)&context->include); - } else if (cur_attr_is_href) { + } else if (attr_is_href) { PSRCSTART(href); HTStartAnchor(context->target,NULL,string->data); } PUTS_TR(string->data); - if (cur_attr_is_href) { + if (attr_is_href) { (*context->actions->end_element)( context->target, HTML_A, @@ -3477,18 +3682,18 @@ top1: #ifdef USE_PSRC if (psrc_view) { /*PSRCSTART(attrval);*/ - if (cur_attr_is_name) { + if (attr_is_name) { HTStartAnchor(context->target,string->data,NULL); (*context->actions->end_element)( context->target, HTML_A, (char **)&context->include); - } else if (cur_attr_is_href) { + } else if (attr_is_href) { PSRCSTART(href); HTStartAnchor(context->target,NULL,string->data); } PUTS_TR(string->data); - if (cur_attr_is_href) { + if (attr_is_href) { (*context->actions->end_element)( context->target, HTML_A, @@ -3537,7 +3742,8 @@ top1: break; case S_end: /* </ */ - if (TOASCII(unsign_c) < 127 && isalnum((unsigned char)c)) { /* S/390 -- gil -- 1247 */ + if (TOASCII(unsign_c) < 127 && (string->size ? /* S/390 -- gil -- 1247 */ + IsNmChar(c) : IsNmStart(c))) { HTChunkPutc(string, c); } else { /* End of end tag name */ HTTag * t = 0; @@ -3576,10 +3782,14 @@ top1: } else { BOOL tag_OK = (BOOL) (c == '>' || WHITE(c)); #if OPT - HTMLElement e = context->current_tag - context->dtd->tags; + HTMLElement e = t - context->dtd->tags; int branch = 2; /* it can be 0,1,2*/ #endif context->current_tag = t; + if (t - context->dtd->tags == HTML_OBJECT && + context->element_stack && + context->element_stack->tag == &HTTag_mixedObject) + context->element_stack->tag = t; #if OPT if (tag_OK #ifdef EXTENDED_HTMLDTD @@ -3632,13 +3842,14 @@ top1: ** Don't treat these end tags as invalid, ** nor act on them. - FM */ - CTRACE((tfp, "SGML: `</%s%c' found! ***Ignoring it.\n", + CTRACE((tfp, "SGML: `</%s%c' found! Ignoring it.\n", string->data, c)); string->size = 0; context->current_attribute_number = INVALID; if (c != '>') { context->state = S_junk_tag; } else { + context->current_tag = NULL; context->state = S_text; } break; @@ -3688,6 +3899,9 @@ top1: SGMLFindTag(context->dtd, "SELECT")); CTRACE((tfp, "SGML: End </%s>\n", string->data)); +#ifdef USE_PSRC + if (!psrc_view) /* Don't actually call if viewing psrc - kw */ +#endif (*context->actions->end_element) (context->target, (context->current_tag - context->dtd->tags), @@ -3698,7 +3912,7 @@ top1: ** Treat a P end tag like a P start tag (Ugh, ** what a hack! 8-). - FM */ - CTRACE((tfp, "SGML: `</%s%c' found! ***Treating as '<%s%c'.\n", + CTRACE((tfp, "SGML: `</%s%c' found! Treating as '<%s%c'.\n", string->data, c, string->data, c)); { int i; @@ -3713,6 +3927,9 @@ top1: } else { CTRACE((tfp, "SGML: End </%s>\n", string->data)); +#ifdef USE_PSRC + if (!psrc_view) /* Don't actually call if viewing psrc - kw */ +#endif (*context->actions->end_element) (context->target, (context->current_tag - context->dtd->tags), @@ -3723,6 +3940,7 @@ top1: if (c != '>') { context->state = S_junk_tag; } else { + context->current_tag = NULL; context->state = S_text; } break; @@ -3765,6 +3983,7 @@ top1: CTRACE((tfp,"SGML: `</%s%c' found!\n", string->data, c)); context->state = S_junk_tag; } else { + context->current_tag = NULL; context->state = S_text; } } @@ -3920,17 +4139,18 @@ top1: PSRCSTART(abracket); PUTC('>'); PSRCSTOP(abracket); - seen_letter_in_junk_tag = FALSE; + context->seen_nonwhite_in_junk_tag = FALSE; } #endif + context->current_tag = NULL; context->state = S_text; } #ifdef USE_PSRC else if (psrc_view) { /*pack spaces until first non-space is seen*/ - if (!seen_letter_in_junk_tag) { + if (!context->seen_nonwhite_in_junk_tag) { if (!WHITE(c)) { - seen_letter_in_junk_tag = TRUE; + context->seen_nonwhite_in_junk_tag = TRUE; PUTC(c); } } else @@ -3940,6 +4160,27 @@ top1: } /* switch on context->state */ +after_switch: + /* + ** Check whether an external function has added + ** anything to the include buffer. If so, move the + ** new stuff to the beginning of active_include. - kw + */ + if (context->include != NULL) { + if (context->include[0] == '\0') { + FREE(context->include); + } else { + if (context->active_include && + context->active_include[context->include_index] != '\0') + StrAllocCat(context->include, + context->active_include + context->include_index); + FREE(context->active_include); + context->active_include = context->include; + context->include_index = 0; + context->include = NULL; + } + } + /* ** Check whether we've added anything to the recover buffer. - FM */ @@ -3955,17 +4196,42 @@ top1: } /* - ** Check whether an external function has added - ** anything to the include buffer. - FM + ** Check whether an external function had added + ** anything to the include buffer; it should now be + ** in active_include. - FM / kw */ - if (context->include != NULL) { - if (context->include[context->include_index] == '\0') { - FREE(context->include); + if (context->active_include != NULL) { + if (context->active_include[context->include_index] == '\0') { + FREE(context->active_include); context->include_index = 0; } else { - c = context->include[context->include_index]; - context->include_index++; - goto top; + if (context->current_tag_charset == UTF8 || + context->T.trans_from_uni) { + /* + * If it looks like we would have fed UTF-8 to the + * next processing stage, assume that whatever we were + * fed back is in UTF-8 form, too. This won't be always + * true for all uses of the include buffer, but it's a + * start. - kw + */ + char *puni = context->active_include + context->include_index; + c = *puni; + clong = UCGetUniFromUtf8String(&puni); + if (clong < 256 && clong >= 0) { + c = ((char)(clong & 0xff)); + } + saved_char_in = '\0'; + context->include_index = puni - context->active_include + 1; + goto top1; + } else { + /* + * Otherwise assume no UTF-8 - do charset-naive processing + * and hope for the best. - kw + */ + c = context->active_include[context->include_index]; + context->include_index++; + goto top; + } } } @@ -4049,6 +4315,7 @@ PUBLIC HTStream* SGML_new ARGS3( context->actions = (CONST HTStructuredClass*)(((HTStream*)target)->isa); /* Ugh: no OO */ context->unknown_tag = &HTTag_unrecognized; + context->current_tag = context->slashtag = NULL; context->state = S_text; context->element_stack = 0; /* empty */ context->inSELECT = FALSE; @@ -4089,6 +4356,7 @@ PUBLIC HTStream* SGML_new ARGS3( context->recover = NULL; context->recover_index = 0; context->include = NULL; + context->active_include = NULL; context->include_index = 0; context->url = NULL; context->csi = NULL; @@ -4103,7 +4371,7 @@ PUBLIC HTStream* SGML_new ARGS3( psrc_view = TRUE; psrc_convert_string = FALSE; sgml_in_psrc_was_initialized = TRUE; - seen_letter_in_junk_tag = FALSE; + context->seen_nonwhite_in_junk_tag = FALSE; } #endif diff --git a/WWW/Library/Implementation/SGML.h b/WWW/Library/Implementation/SGML.h index 1a1334ba..85278f44 100644 --- a/WWW/Library/Implementation/SGML.h +++ b/WWW/Library/Implementation/SGML.h @@ -30,12 +30,18 @@ typedef enum _SGMLContent { SGML_EMPTY, /* No content. */ SGML_LITTERAL, /* Literal character data. Recognize exact close tag only. Old www server compatibility only! Not SGML */ - SGML_CDATA, /* Character data. Recognize </ only. */ - SGML_RCDATA, /* Replaceable character data. Recognize </ and &ref; */ + SGML_CDATA, /* Character data. Recognize </ only. + (But we treat it just as SGML_LITTERAL.) */ + SGML_RCDATA, /* Replaceable character data. Should recognize </ and &ref; + (but we treat it like SGML_MIXED for old times' sake). */ SGML_MIXED, /* Elements and parsed character data. Recognize all markup. */ - SGML_ELEMENT, /* Any data found will be returned as an error. */ - SGML_PCDATA /* Added by KW. */ + SGML_ELEMENT, /* Any data found should be regarded as an error. + (But we treat it just like SGML_MIXED.) */ + SGML_PCDATA /* Should contain no elements but &ref; is parsed. + (We treat it like SGML_CDATA wrt. contained tags + i.e. pass them on literally, i.e. like we should + treat SGML_RCDATA) (added by KW). */ } SGMLContent; @@ -70,7 +76,7 @@ typedef int TagClass; text directly */ /* insertions */ #define Tgc_BRlike 0x01000 /* BR,IMG,TAB allowed in any text */ -#define Tgc_APPLETlike 0x02000 /* APPLET,OBJECT,EMBED,SCRIPT */ +#define Tgc_APPLETlike 0x02000 /* APPLET,OBJECT,EMBED,SCRIPT;BUTTON */ #define Tgc_HRlike 0x04000 /* HR,MARQUEE can contain all kinds of things and/or are not allowed (?) in running text */ #define Tgc_MAPlike 0x08000 /* MAP,AREA some specials that never contain @@ -90,9 +96,14 @@ typedef int TagFlags; #define Tgf_mafse 0x00004 /* Make Attribute-Free Start-tag End instead (if found invalid) */ #define Tgf_strict 0x00008 /* Ignore contained invalid elements, - don't pass them on */ + don't pass them on; or other variant + handling for some content types */ #define Tgf_nreie 0x00010 /* Not Really Empty If Empty, used by color style code */ +#define Tgf_frecyc 0x00020 /* Pass element content on in a form that + allows recycling, i.e. don't translate to + output (display) character set yet (treat + content similar to attribute values) */ /* A tag structure describes an SGML element. ** ----------------------------------------- @@ -200,7 +211,7 @@ typedef struct _HTStructuredClass{ CONST char * str, int len)); - void (*start_element) PARAMS(( + int (*start_element) PARAMS(( HTStructured* me, int element_number, CONST BOOL* attribute_present, @@ -208,7 +219,7 @@ typedef struct _HTStructuredClass{ int charset, char ** include)); - void (*end_element) PARAMS(( + int (*end_element) PARAMS(( HTStructured* me, int element_number, char ** include)); diff --git a/WWW/Library/Implementation/UCAux.h b/WWW/Library/Implementation/UCAux.h index 50cdf9ba..44c9e881 100644 --- a/WWW/Library/Implementation/UCAux.h +++ b/WWW/Library/Implementation/UCAux.h @@ -34,7 +34,7 @@ typedef struct _UCTransParams UCTransParams; #endif /* UCDEFS_H */ extern void UCSetTransParams PARAMS(( - UCTransParams * pT, + UCTransParams * pT, int cs_in, CONST LYUCcharset * p_in, int cs_out, @@ -66,9 +66,11 @@ extern BOOL UCPutUtf8_charstring PARAMS(( HTStream * target, putc_func_t * actions, UCode_t code)); - + extern BOOL UCConvertUniToUtf8 PARAMS(( UCode_t code, char * buffer)); +extern UCode_t UCGetUniFromUtf8String PARAMS((char ** ppuni)); + #endif /* UCAUX_H */ |