diff options
author | Thomas E. Dickey <dickey@invisible-island.net> | 1997-11-07 12:30:00 -0500 |
---|---|---|
committer | Thomas E. Dickey <dickey@invisible-island.net> | 1997-11-07 12:30:00 -0500 |
commit | b63d287c6f3e67f8574ca2155c661288bc7dcd05 (patch) | |
tree | 6a3b376424faf4d50058e91988c2d6eaa49cfbdc /WWW/Library | |
parent | 8f8c57cc7c0e876cd291e2b4de23a52e060b30ba (diff) | |
download | lynx-snapshots-b63d287c6f3e67f8574ca2155c661288bc7dcd05.tar.gz |
snapshot of project "lynx", label v2-7-1ac_0-93
Diffstat (limited to 'WWW/Library')
-rw-r--r-- | WWW/Library/Implementation/HTAccess.c | 42 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTAnchor.c | 22 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTAnchor.h | 9 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTDOS.c | 8 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTFile.c | 135 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTMIME.c | 150 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTMLDTD.c | 6 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTNews.c | 92 | ||||
-rw-r--r-- | WWW/Library/Implementation/HTParse.c | 20 | ||||
-rw-r--r-- | WWW/Library/Implementation/SGML.c | 10 | ||||
-rw-r--r-- | WWW/Library/Implementation/SGML.h | 4 |
11 files changed, 381 insertions, 117 deletions
diff --git a/WWW/Library/Implementation/HTAccess.c b/WWW/Library/Implementation/HTAccess.c index 6dedbeb9..879b34c9 100644 --- a/WWW/Library/Implementation/HTAccess.c +++ b/WWW/Library/Implementation/HTAccess.c @@ -632,6 +632,8 @@ PUBLIC HTStream *HTSaveStream ARGS1( return (*p->saveStream)(anchor); } +extern char LYinternal_flag; /* from LYMainLoop.c */ + /* Load a document - with logging etc HTLoadDocument() ** ---------------------------------- ** @@ -695,6 +697,26 @@ PRIVATE BOOL HTLoadDocument ARGS4( } /* + * If this is marked as an internal link but we don't have the + * document loaded any more, and we haven't explicitly flagged + * that we want to reload with LYforce_no_cache, then something + * has disappeared from the cache when we expected it to be still + * there. The user probably doesn't expect a new network access. + * So if we have POST data and safe is not set in the anchor, + * ask for confirmation, and fail if not granted. The exception + * are LYNXIMGMAP documents, for which we defer to LYLoadIMGmap + * for prompting if necessary. - kw + */ + if (LYinternal_flag && !LYforce_no_cache && + anchor->post_data && !anchor->safe && + (text = (HText *)HTAnchor_document(anchor)) == NULL && + strncmp(full_address, "LYNXIMGMAP:", 11) && + HTConfirm("Document with POST content not found in cache. Resubmit?") + != TRUE) { + return NO; + } + + /* ** If we don't have POST content, check whether this is a previous ** redirecting URL, and keep re-checking until we get to the final ** destination or redirection limit. If we do have POST content, @@ -783,13 +805,16 @@ PRIVATE BOOL HTLoadDocument ARGS4( * testing whether we are just repositioning. For an internal * link, the potential callers of this function from mainloop() * down will either avoid making the call (and do the repositioning - * differently) or set LYoverride_no_cache. + * differently) or set LYinternal_flag (or LYoverride_no_cache). * Note that (a) LYNXIMGMAP pseudo-documents and (b) The "List Page" * document are treated logically as being part of the document on * which they are based, for the purpose of whether to treat a link - * as internal, but the logic for this (by setting LYoverride_no_cache - * as necessary) is implemented elsewhere. For LYNXIMGMAP the same - * caveat as above applies. + * as internal, but the logic for this (by setting LYinternal_flag + * as necessary) is implemented elsewhere. There is a specific + * test for LYNXIMGMAP here so that the generated pseudo-document + * will not be re-used unless LYoverride_no_cache is set. The same + * caveat as above applies w.r.t. reloading of the underlying + * resource. * ** We also should be checking other aspects of cache ** regulation (e.g., based on an If-Modified-Since check, @@ -800,7 +825,9 @@ PRIVATE BOOL HTLoadDocument ARGS4( if (LYoverride_no_cache || !HText_hasNoCacheSet(text) || !HText_AreDifferent(anchor, full_address)) #else - if (LYoverride_no_cache || !HText_hasNoCacheSet(text)) + if (LYoverride_no_cache || + ((LYinternal_flag || !HText_hasNoCacheSet(text)) && + strncmp(full_address, "LYNXIMGMAP:", 11))) #endif /* TRACK_INTERNAL_LINKS */ { if (TRACE) @@ -814,7 +841,12 @@ PRIVATE BOOL HTLoadDocument ARGS4( redirection_attempts = 0; return YES; } else { +#if NOT_USED_CODE + /* disabled 1997-10-28 - kw + callers already do this when requested + */ reloading = TRUE; +#endif ForcingNoCache = YES; if (TRACE) fprintf(stderr, "HTAccess: Auto-reloading document.\n"); diff --git a/WWW/Library/Implementation/HTAnchor.c b/WWW/Library/Implementation/HTAnchor.c index d3c3f34e..c2cf273c 100644 --- a/WWW/Library/Implementation/HTAnchor.c +++ b/WWW/Library/Implementation/HTAnchor.c @@ -238,11 +238,21 @@ PUBLIC HTChildAnchor * HTAnchor_findChildAndLink ARGS4( HTAnchor * dest; parsed_doc.address = HTParse(href, relative_to, PARSE_ALL); - parsed_doc.post_data = NULL; - parsed_doc.post_content_type = NULL; - parsed_doc.bookmark = NULL; - parsed_doc.isHEAD = FALSE; - parsed_doc.safe = FALSE; +#ifndef DONT_TRACK_INTERNAL_LINKS + if (ltype && parent->post_data && ltype == LINK_INTERNAL) { + /* for internal links, find a destination with the same + post data if the source of the link has post data. - kw */ + parsed_doc.post_data = parent->post_data; + parsed_doc.post_content_type = parent->post_content_type; + } else +#endif + { + parsed_doc.post_data = NULL; + parsed_doc.post_content_type = NULL; + } + parsed_doc.bookmark = NULL; + parsed_doc.isHEAD = FALSE; + parsed_doc.safe = FALSE; dest = HTAnchor_findAddress(&parsed_doc); HTAnchor_link((HTAnchor *)child, dest, ltype); @@ -690,6 +700,8 @@ PUBLIC BOOL HTAnchor_delete ARGS1( #ifdef EXP_CHARTRANS FREE (me->UCStages); #endif + ImageMapList_free(me->imaps); + /* * Finally, kill the parent anchor passed in. diff --git a/WWW/Library/Implementation/HTAnchor.h b/WWW/Library/Implementation/HTAnchor.h index 458cac40..b86c372c 100644 --- a/WWW/Library/Implementation/HTAnchor.h +++ b/WWW/Library/Implementation/HTAnchor.h @@ -125,9 +125,8 @@ struct _HTParentAnchor { char * expires; /* Expires */ char * last_modified; /* Last-Modified */ char * server; /* Server */ -#ifdef EXP_CHARTRANS - UCAnchorInfo * UCStages; -#endif + UCAnchorInfo * UCStages; /* chartrans stages */ + HTList * imaps; /* client side image maps */ }; typedef struct { @@ -411,7 +410,6 @@ extern void HTAnchor_setPhysical PARAMS(( #endif /* HTANCHOR_H */ -#ifdef EXP_CHARTRANS extern LYUCcharset * HTAnchor_getUCInfoStage PARAMS((HTParentAnchor * me, int which_stage)); @@ -433,7 +431,8 @@ extern LYUCcharset * HTAnchor_copyUCInfoStage PARAMS((HTParentAnchor * me, int to_stage, int from_stage, int set_by)); -#endif + +extern void ImageMapList_free PARAMS((HTList * list)); /* */ diff --git a/WWW/Library/Implementation/HTDOS.c b/WWW/Library/Implementation/HTDOS.c index c5687808..1e67a1a0 100644 --- a/WWW/Library/Implementation/HTDOS.c +++ b/WWW/Library/Implementation/HTDOS.c @@ -51,7 +51,7 @@ char * HTDOS_wwwName (char *dosname) ** ON EXIT: ** returns dos file specification ** -** Bug: Returns pointer to static -- non-reentrant +** Bug(?): Returns pointer to input string, which is modified */ char * HTDOS_name(char *dosname) { @@ -82,13 +82,15 @@ char * HTDOS_name(char *dosname) { printf("\n\n%s = i%\n\n",cp_url,strlen(cp_url)); sleep(5); /**/ - return(cp_url); + strcpy(dosname, cp_url); + return(dosname); /* return(cp_url); */ } else { /* printf("\n\n%s = %i\n\n",cp_url+1,strlen(cp_url)); sleep(5); /**/ - return(cp_url+1); + strcpy(dosname, cp_url+1); + return(dosname); /* return(cp_url+1); */ } } diff --git a/WWW/Library/Implementation/HTFile.c b/WWW/Library/Implementation/HTFile.c index 3ec307fc..45bd6464 100644 --- a/WWW/Library/Implementation/HTFile.c +++ b/WWW/Library/Implementation/HTFile.c @@ -74,11 +74,9 @@ #include "HTBTree.h" #include "HTAlert.h" #include "HTCJK.h" -#ifdef EXP_CHARTRANS #include "UCDefs.h" #include "UCMap.h" #include "UCAux.h" -#endif /* EXP_CHARTRANS */ #include "LYexit.h" #include "LYLeaks.h" @@ -130,11 +128,9 @@ PUBLIC int HTDirReadme = HT_DIR_README_TOP; extern int current_char_set; extern CONST char *LYchar_set_names[]; +extern BOOLEAN LYRawMode; extern BOOL HTPassEightBitRaw; extern HTCJKlang HTCJK; -#ifndef EXP_CHARTRANS -#define UCLYhndl_HTFile_for_unspec 0 /* a dummy define */ -#endif PRIVATE char *HTMountRoot = "/Net/"; /* Where to find mounts */ #ifdef VMS @@ -573,7 +569,16 @@ PUBLIC char * HTLocalName ARGS1( if (TRACE) fprintf(stderr, "Node `%s' means path `%s'\n", name, path); #ifdef DOSPATH - return(HTDOS_name(path)); + { + char *ret_path = NULL; + StrAllocCopy(ret_path, HTDOS_name(path)); + if (TRACE) { + fprintf(stderr, "HTDOS_name changed `%s' to `%s'\n", + path, ret_path); + } + FREE(path); + return(ret_path); + } #else return(path); #endif /* DOSPATH */ @@ -686,12 +691,12 @@ PUBLIC CONST char * HTFileSuffix ARGS2( suff = (HTSuffix *)HTList_objectAt(HTSuffixes, i); if (suff->rep == rep && #if defined(VMS) || defined(FNAMES_8_3) - /* Don't return a suffix whose first char is a dot and which - has more dots or with asterisks, for + /* Don't return a suffix whose first char is a dot, and which + has more dots or asterisks after that, for these systems - kw */ (!suff->suffix || !suff->suffix[0] || suff->suffix[0] != '.' || (strchr(suff->suffix + 1, '.') == NULL && - strchr(suff->suffix + 1, '.') == NULL)) && + strchr(suff->suffix + 1, '*') == NULL)) && #endif ((trivial_enc && IsUnityEnc(suff->encoding)) || (!trivial_enc && !IsUnityEnc(suff->encoding) && @@ -859,7 +864,6 @@ PUBLIC HTFormat HTCharsetFormat ARGS3( cp2 += 7; while (*cp2 == ' ' || *cp2 == '=') cp2++; -#ifdef EXP_CHARTRANS StrAllocCopy(cp3, cp2); /* copy to mutilate more */ for (cp4 = cp3; (*cp4 != '\0' && *cp4 != '"' && *cp4 != ';' && *cp4 != ':' && @@ -885,7 +889,8 @@ PUBLIC HTFormat HTCharsetFormat ARGS3( format = HTAtom_for(cp); StrAllocCopy(anchor->charset, cp4); HTAnchor_setUCInfoStage(anchor, chndl, - UCT_STAGE_MIME, UCT_SETBY_MIME); + UCT_STAGE_MIME, + UCT_SETBY_MIME); } else if (chndl < 0) { /* ** Got something but we don't recognize it. @@ -894,10 +899,10 @@ PUBLIC HTFormat HTCharsetFormat ARGS3( if (UCCanTranslateFromTo(chndl, current_char_set)) { chartrans_ok = YES; HTAnchor_setUCInfoStage(anchor, chndl, - UCT_STAGE_MIME, UCT_SETBY_DEFAULT); + UCT_STAGE_MIME, + UCT_SETBY_DEFAULT); } } - FREE(cp3); if (chartrans_ok) { LYUCcharset *p_in = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_MIME); @@ -927,18 +932,58 @@ PUBLIC HTFormat HTCharsetFormat ARGS3( UCT_STAGE_HTEXT, UCT_SETBY_DEFAULT); } - if ((p_in->enc != UCT_ENC_CJK) && - (p_in->codepoints & UCT_CP_SUBSETOF_LAT1)) { + if (p_in->enc != UCT_ENC_CJK) { HTCJK = NOCJK; - } else if (chndl == current_char_set) { - HTPassEightBitRaw = TRUE; + if (!(p_in->codepoints & + UCT_CP_SUBSETOF_LAT1) && + chndl == current_char_set) { + HTPassEightBitRaw = TRUE; + } + } else if (p_out->enc == UCT_ENC_CJK) { + if (LYRawMode) { + if ((!strcmp(p_in->MIMEname, "euc-jp") || + !strcmp(p_in->MIMEname, "shift_jis")) && + (!strcmp(p_out->MIMEname, "euc-jp") || + !strcmp(p_out->MIMEname, "shift_jis"))) { + HTCJK = JAPANESE; + } else if (!strcmp(p_in->MIMEname, "euc-cn") && + !strcmp(p_out->MIMEname, "euc-cn")) { + HTCJK = CHINESE; + } else if (!strcmp(p_in->MIMEname, "big-5") && + !strcmp(p_out->MIMEname, "big-5")) { + HTCJK = TAIPEI; + } else if (!strcmp(p_in->MIMEname, "euc-kr") && + !strcmp(p_out->MIMEname, "euc-kr")) { + HTCJK = KOREAN; + } else { + HTCJK = NOCJK; + } + } else { + HTCJK = NOCJK; + } } /* - ** Fall through to old behavior. + ** Check for an iso-8859-# we don't know. - FM */ - } else -#endif /* EXP_CHARTRANS */ - if (!strncmp(cp2, "us-ascii", 8) || + } else if (!strncmp(cp4, "iso-8859-", 9) && + isdigit((unsigned char)cp4[9]) && + !strncmp(LYchar_set_names[current_char_set], + "Other ISO Latin", 15)) { + /* + ** Hope it's a match, for now. - FM + */ + *cp1 = '\0'; + format = HTAtom_for(cp); + cp1 = &cp4[10]; + while (*cp1 && + isdigit((unsigned char)(*cp1))) + cp1++; + *cp1 = '\0'; + StrAllocCopy(anchor->charset, cp4); + HTPassEightBitRaw = TRUE; + HTAlert(anchor->charset); +#ifdef NOT_USED /* pre chartrans */ + } else if (!strncmp(cp2, "us-ascii", 8) || !strncmp(cp2, "iso-8859-1", 10)) { *cp1 = '\0'; format = HTAtom_for(cp); @@ -1022,7 +1067,9 @@ PUBLIC HTFormat HTCharsetFormat ARGS3( *cp1 = '\0'; format = HTAtom_for(cp); StrAllocCopy(anchor->charset, "iso-2022-cn"); +#endif /* NOT_USED */ } + FREE(cp3); } else if (cp1 != NULL) { /* ** No charset parameter is present. @@ -1034,15 +1081,14 @@ PUBLIC HTFormat HTCharsetFormat ARGS3( } FREE(cp); -#ifdef EXP_CHARTRANS if (!chartrans_ok && !anchor->charset && default_LYhndl >= 0) { HTAnchor_setUCInfoStage(anchor, default_LYhndl, - UCT_STAGE_MIME, UCT_SETBY_DEFAULT); + UCT_STAGE_MIME, + UCT_SETBY_DEFAULT); } HTAnchor_copyUCInfoStage(anchor, UCT_STAGE_PARSER, UCT_STAGE_MIME, -1); -#endif return format; } @@ -1628,6 +1674,7 @@ PUBLIC int HTLoadFile ARGS4( HTList_addObject(methods, put); } } + /* ** Trim vmsname at semicolon if a version number was ** included, so it doesn't interfere with the check @@ -1638,7 +1685,34 @@ PUBLIC int HTLoadFile ARGS4( /* ** Fake a Content-Encoding for compressed files. - FM */ - if ((len = strlen(vmsname)) > 2) { + if (!IsUnityEnc(myEncoding)) { + /* + * We already know from the call to HTFileFormat above + * that this is a compressed file, no need to look at + * the filename again. - kw + */ +#ifdef USE_ZLIB + if (strcmp(format_out->name, "www/download") != 0 && + (!strcmp(HTAtom_name(myEncoding), "gzip") || + !strcmp(HTAtom_name(myEncoding), "x-gzip"))) { + fclose(fp); + if (semicolon != NULL) + *semicolon = ';'; + gzfp = gzopen(vmsname, "rb"); + + if (TRACE) + fprintf(stderr, + "HTLoadFile: gzopen of `%s' gives %p\n", + vmsname, (void*)gzfp); + use_gzread = YES; + } else +#endif /* USE_ZLIB */ + { + StrAllocCopy(anchor->content_type, format->name); + StrAllocCopy(anchor->content_encoding, HTAtom_name(myEncoding)); + format = HTAtom_for("www/compressed"); + } + } else if ((len = strlen(vmsname)) > 2) { if ((vmsname[len - 1] == 'Z') && (vmsname[len - 2] == '.' || vmsname[len - 2] == '-' || @@ -1670,12 +1744,14 @@ PUBLIC int HTLoadFile ARGS4( #ifdef USE_ZLIB if (strcmp(format_out->name, "www/download") != 0) { fclose(fp); - gzfp = gzopen(localname, "rb"); + if (semicolon != NULL) + *semicolon = ';'; + gzfp = gzopen(vmsname, "rb"); if (TRACE) fprintf(stderr, "HTLoadFile: gzopen of `%s' gives %p\n", - localname, (void*)gzfp); + vmsname, (void*)gzfp); use_gzread = YES; } #else /* USE_ZLIB */ @@ -1990,15 +2066,14 @@ PUBLIC int HTLoadFile ARGS4( } } FREE(pathname); - -#ifdef EXP_CHARTRANS + if (UCLYhndl_HTFile_for_unspec >= 0) { HTAnchor_setUCInfoStage(anchor, UCLYhndl_HTFile_for_unspec, UCT_STAGE_PARSER, UCT_SETBY_DEFAULT); } -#endif + target = HTML_new(anchor, format_out, sink); targetClass = *target->isa; /* Copy routine entry points */ diff --git a/WWW/Library/Implementation/HTMIME.c b/WWW/Library/Implementation/HTMIME.c index 67795a22..ecd61ac2 100644 --- a/WWW/Library/Implementation/HTMIME.c +++ b/WWW/Library/Implementation/HTMIME.c @@ -15,11 +15,9 @@ #include "HTMIME.h" /* Implemented here */ #include "HTAlert.h" #include "HTCJK.h" -#ifdef EXP_CHARTRANS #include "UCMap.h" #include "UCDefs.h" #include "UCAux.h" -#endif #include "LYLeaks.h" @@ -27,6 +25,7 @@ extern int current_char_set; extern CONST char *LYchar_set_names[]; +extern BOOLEAN LYRawMode; extern BOOL HTPassEightBitRaw; extern HTCJKlang HTCJK; @@ -370,28 +369,26 @@ PRIVATE void HTMIME_put_character ARGS2( cp2 += 7; while (*cp2 == ' ' || *cp2 == '=' || *cp2 == '\"') cp2++; -#ifdef EXP_CHARTRANS StrAllocCopy(cp3, cp2); /* copy to mutilate more */ - for (cp4=cp3; (*cp4 != '\0' && *cp4 != '\"' && - *cp4 != ';' && *cp4 != ':' && - !WHITE(*cp4)); cp4++) - /* nothing */ ; + for (cp4 = cp3; (*cp4 != '\0' && *cp4 != '\"' && + *cp4 != ';' && *cp4 != ':' && + !WHITE(*cp4)); cp4++) + ; /* do nothing */ *cp4 = '\0'; cp4 = cp3; chndl = UCGetLYhndl_byMIME(cp3); if (chndl < 0) { - if (0==strcmp(cp4, "cn-big5")) { + if (!strcmp(cp4, "cn-big5")) { cp4 += 3; chndl = UCGetLYhndl_byMIME(cp4); - } - else if (0==strncmp(cp4, "cn-gb", 5)) { + } else if (!strncmp(cp4, "cn-gb", 5)) { StrAllocCopy(cp3, "gb2312"); cp4 = cp3; chndl = UCGetLYhndl_byMIME(cp4); } } - if (UCCanTranslateFromTo(chndl, current_char_set)) - { + if (UCCanTranslateFromTo(chndl, + current_char_set)) { chartrans_ok = YES; *cp1 = '\0'; me->format = HTAtom_for(cp); @@ -403,8 +400,7 @@ PRIVATE void HTMIME_put_character ARGS2( recognize it */ chndl = UCLYhndl_for_unrec; if (UCCanTranslateFromTo(chndl, - current_char_set)) - { + current_char_set)) { chartrans_ok = YES; *cp1 = '\0'; me->format = HTAtom_for(cp); @@ -416,42 +412,97 @@ PRIVATE void HTMIME_put_character ARGS2( if (chartrans_ok) { LYUCcharset * p_in = HTAnchor_getUCInfoStage(me->anchor, - UCT_STAGE_MIME); + UCT_STAGE_MIME); LYUCcharset * p_out = HTAnchor_setUCInfoStage(me->anchor, current_char_set, - UCT_STAGE_HTEXT, UCT_SETBY_DEFAULT); - if (!p_out) /* try again */ + UCT_STAGE_HTEXT, + UCT_SETBY_DEFAULT); + if (!p_out) + /* + ** Try again. + */ p_out = HTAnchor_getUCInfoStage(me->anchor, - UCT_STAGE_HTEXT); + UCT_STAGE_HTEXT); - if (0==strcmp(p_in->MIMEname,"x-transparent")) - { + if (!strcmp(p_in->MIMEname, + "x-transparent")) { HTPassEightBitRaw = TRUE; HTAnchor_setUCInfoStage(me->anchor, HTAnchor_getUCLYhndl(me->anchor, UCT_STAGE_HTEXT), UCT_STAGE_MIME, UCT_SETBY_DEFAULT); } - if (0==strcmp(p_out->MIMEname,"x-transparent")) - { + if (!strcmp(p_out->MIMEname, + "x-transparent")) { HTPassEightBitRaw = TRUE; HTAnchor_setUCInfoStage(me->anchor, - HTAnchor_getUCLYhndl(me->anchor, - UCT_STAGE_MIME), - UCT_STAGE_HTEXT, UCT_SETBY_DEFAULT); + HTAnchor_getUCLYhndl(me->anchor, + UCT_STAGE_MIME), + UCT_STAGE_HTEXT, + UCT_SETBY_DEFAULT); } - if ((p_in->enc != UCT_ENC_CJK) && - (p_in->codepoints & - UCT_CP_SUBSETOF_LAT1)) { + if (p_in->enc != UCT_ENC_CJK) { HTCJK = NOCJK; - } else if (chndl == current_char_set) { - HTPassEightBitRaw = TRUE; + if (!(p_in->codepoints & + UCT_CP_SUBSETOF_LAT1) && + chndl == current_char_set) { + HTPassEightBitRaw = TRUE; + } + } else if (p_out->enc == UCT_ENC_CJK) { + if (LYRawMode) { + if ((!strcmp(p_in->MIMEname, + "euc-jp") || + !strcmp(p_in->MIMEname, + "shift_jis")) && + (!strcmp(p_out->MIMEname, + "euc-jp") || + !strcmp(p_out->MIMEname, + "shift_jis"))) { + HTCJK = JAPANESE; + } else if (!strcmp(p_in->MIMEname, + "euc-cn") && + !strcmp(p_out->MIMEname, + "euc-cn")) { + HTCJK = CHINESE; + } else if (!strcmp(p_in->MIMEname, + "big-5") && + !strcmp(p_out->MIMEname, + "big-5")) { + HTCJK = TAIPEI; + } else if (!strcmp(p_in->MIMEname, + "euc-kr") && + !strcmp(p_out->MIMEname, + "euc-kr")) { + HTCJK = KOREAN; + } else { + HTCJK = NOCJK; + } + } else { + HTCJK = NOCJK; + } } - } else /* Fall through to old behavior */ -#endif /* EXP_CHARTRANS */ - if (!strncmp(cp2, "us-ascii", 8) || + } else if + (!strncmp(cp4, "iso-8859-", 9) && + isdigit((unsigned char)cp4[9]) && + !strncmp(LYchar_set_names[current_char_set], + "Other ISO Latin", 15)) { + /* + ** Hope it's a match, for now. - FM + */ + *cp1 = '\0'; + me->format = HTAtom_for(cp); + cp1 = &cp4[10]; + while (*cp1 && + isdigit((unsigned char)(*cp1))) + cp1++; + *cp1 = '\0'; + StrAllocCopy(me->anchor->charset, cp4); + HTPassEightBitRaw = TRUE; + HTAlert(me->anchor->charset); +#ifdef NOT_USED /* pre-chartrans */ + } else if (!strncmp(cp2, "us-ascii", 8) || !strncmp(cp2, "iso-8859-1", 10)) { *cp1 = '\0'; me->format = HTAtom_for(cp); @@ -562,7 +613,9 @@ PRIVATE void HTMIME_put_character ARGS2( me->format = HTAtom_for(cp); StrAllocCopy(me->anchor->charset, "iso-2022-cn"); +#endif /* NOT_USED */ } + FREE(cp3); } else { /* ** No charset parameter is present. @@ -1326,6 +1379,16 @@ PRIVATE void HTMIME_put_character ARGS2( "HTMIME: Was CONTENT_L, found E, checking for 'ngth:'\n"); break; + case 'o': + case 'O': + me->check_pointer = "cation:"; + me->if_ok = miCONTENT_LOCATION; + me->state = miCHECK; + if (TRACE) + fprintf(stderr, + "HTMIME: Was CONTENT_L, found O, checking for 'cation:'\n"); + break; + default: if (TRACE) fprintf (stderr, @@ -2207,6 +2270,8 @@ PUBLIC HTStream* HTNetMIME ARGS3( ** ** Written by S. Ichikawa, ** partially inspired by encdec.c of <jh@efd.lth.se>. +** +** Generalized HTmmdecode for chartrans - K. Weide 1997-03-06 */ #define BUFLEN 1024 #ifdef ESC @@ -2292,8 +2357,6 @@ PUBLIC void HTmmdec_quote ARGS2( strcpy(t, buf); } -/* Generalized HTmmdecode for chartrans - kweide 1997-03-06 */ - PUBLIC void HTmmdecode ARGS2( char *, trg, char *, str) @@ -2304,8 +2367,10 @@ PUBLIC void HTmmdecode ARGS2( buf[0] = '\0'; -/* encoded-words look like =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= */ - + /* + ** Encoded-words look like + ** =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= + */ for (s = str, u = buf; *s; ) { base64 = quote = 0; if (*s == '=' && s[1] == '?' && @@ -2313,7 +2378,8 @@ PUBLIC void HTmmdecode ARGS2( { /* must be beginning of word */ qm2 = strchr(s+2, '?'); /* 2nd question mark */ if (qm2 && - (qm2[1] == 'B' || qm2[1] == 'b' || qm2[1] == 'Q' || qm2[1] == 'q') && + (qm2[1] == 'B' || qm2[1] == 'b' || qm2[1] == 'Q' || + qm2[1] == 'q') && qm2[2] == '?') { /* 3rd question mark */ char * qm4 = strchr(qm2 + 3, '?'); /* 4th question mark */ if (qm4 && qm4 - s < 74 && /* RFC 2047 length restriction */ @@ -2327,15 +2393,13 @@ PUBLIC void HTmmdecode ARGS2( } if (!invalid) { int LYhndl; + *qm2 = '\0'; -#ifdef EXP_CHARTRANS for (p = s+2; *p; p++) *p = TOLOWER(*p); invalid = ((LYhndl = UCGetLYhndl_byMIME(s+2)) < 0 || - !UCCanTranslateFromTo(LYhndl, current_char_set)); -#else - invalid = (0!=strncasecomp(s+2, "ISO-2022-JP", 11)); -#endif + !UCCanTranslateFromTo(LYhndl, + current_char_set)); *qm2 = '?'; } if (!invalid) { diff --git a/WWW/Library/Implementation/HTMLDTD.c b/WWW/Library/Implementation/HTMLDTD.c index a519fb82..cbeb35f9 100644 --- a/WWW/Library/Implementation/HTMLDTD.c +++ b/WWW/Library/Implementation/HTMLDTD.c @@ -1430,7 +1430,7 @@ static attr ulist_attr[] = { /* UL attributes */ /* { "BLOCKQUOTE", bq_attr, HTML_BQ_ATTRIBUTES, SGML_MIXED }, */ #define T_BLOCKQUOTE 0x0200, 0xAFBCF,0xAFFFF,0xB6680,0xB6FAF,0x8031F,0x00000 /* { "BODY" , body_attr, HTML_BODY_ATTRIBUTES, SGML_MIXED }, */ -#define T_BODY 0x20000,0x2FB8F,0x2FFFF,0x30000,0x30000,0xDFFFF,0x00003 +#define T_BODY 0x20000,0x2FB8F,0x2FFFF,0x30000,0x30000,0xDFF7F,0x00003 /* { "BODYTEXT", bodytext_attr,HTML_BODYTEXT_ATTRIBUTES, SGML_MIXED }, */ #define T_BODYTEXT 0x20000,0x0FB8F,0xAFFFF,0x30200,0xB7FAF,0x8F17F,0x00003 /* { "BQ" , bq_attr, HTML_BQ_ATTRIBUTES, SGML_MIXED }, */ @@ -1501,7 +1501,7 @@ static attr ulist_attr[] = { /* UL attributes */ /* { "H6" , h_attr, HTML_H_ATTRIBUTES, SGML_MIXED }, */ #define T_H6 0x0100, 0x0B04F,0x0B05F,0x36680,0x37FAF,0x80317,0x00000 /* { "HEAD" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED }, */ -#define T_HEAD 0x40000,0x4F000,0x47000,0x10000,0x10000,0x9FFFF,0x00006 +#define T_HEAD 0x40000,0x4F000,0x47000,0x10000,0x10000,0x9FF7F,0x00006 /* { "HR" , hr_attr, HTML_HR_ATTRIBUTES, SGML_EMPTY }, */ #define T_HR 0x4000, 0x00000,0x00000,0x3FE80,0x3FFBF,0x87F37,0x00001 /* { "HTML" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED }, */ @@ -1574,7 +1574,7 @@ static attr ulist_attr[] = { /* UL attributes */ /* { "SCRIPT" , script_attr, HTML_SCRIPT_ATTRIBUTES, SGML_LITTERAL }, */ #define T_SCRIPT 0x2000, 0x00000,0x00000,0x77F9F,0x77FFF,0x87F5F,0x00000 /* { "SELECT" , select_attr, HTML_SELECT_ATTRIBUTES, SGML_MIXED }, */ -#define T_SELECT 0x0040, 0x08000,0x08000,0x03FAF,0x13FBF,0x80F5F,0x00000 +#define T_SELECT 0x0040, 0x08000,0x08000,0x03FAF,0x13FBF,0x80F5F,0x00008 #define T_SHY 0x1000, 0x00000,0x00000,0x3779F,0x77FBF,0x8101F,0x00001 /* { "SMALL" , gen_attr, HTML_GEN_ATTRIBUTES, SGML_MIXED }, */ #define T_SMALL 0x0001, 0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00001,0x00004 diff --git a/WWW/Library/Implementation/HTNews.c b/WWW/Library/Implementation/HTNews.c index bfd64e5a..adc9568b 100644 --- a/WWW/Library/Implementation/HTNews.c +++ b/WWW/Library/Implementation/HTNews.c @@ -54,6 +54,10 @@ struct _HTStructured { CONST HTStructuredClass * isa; /* ... */ }; +struct _HTStream +{ + HTStreamClass * isa; +}; #define LINE_LENGTH 512 /* Maximum length of line of ARTICLE etc */ #define GROUP_NAME_LENGTH 256 /* Maximum length of group name */ @@ -62,6 +66,7 @@ extern BOOLEAN LYListNewsNumbers; extern BOOLEAN LYListNewsDates; extern HTCJKlang HTCJK; extern int interrupted_in_htgetcharacter; +extern BOOL keep_mime_headers; /* Include mime headers and force raw text */ extern BOOL using_proxy; /* Are we using an NNTP proxy? */ /* @@ -76,16 +81,24 @@ PRIVATE char response_text[LINE_LENGTH+1]; /* Last response */ /* PRIVATE HText * HT; */ /* the new hypertext */ PRIVATE HTStructured * target; /* The output sink */ PRIVATE HTStructuredClass targetClass; /* Copy of fn addresses */ +PRIVATE HTStream * rawtarget = NULL; /* The output sink for rawtext */ +PRIVATE HTStreamClass rawtargetClass; /* Copy of fn addresses */ PRIVATE HTParentAnchor *node_anchor; /* Its anchor */ PRIVATE int diagnostic; /* level: 0=none 2=source */ +PRIVATE BOOL rawtext = NO; /* Flag: HEAD or -mime_headers */ PRIVATE HTList *NNTP_AuthInfo = NULL; /* AUTHINFO database */ #define PUTC(c) (*targetClass.put_character)(target, c) #define PUTS(s) (*targetClass.put_string)(target, s) +#define RAW_PUTS(s) (*rawtargetClass.put_string)(rawtarget, s) #define START(e) (*targetClass.start_element)(target, e, 0, 0, -1, 0) #define END(e) (*targetClass.end_element)(target, e, 0) #define MAYBE_END(e) if (HTML_dtd.tags[e].contents != SGML_EMPTY) \ (*targetClass.end_element)(target, e, 0) +#define FREE_TARGET if (rawtext) (*rawtargetClass._free)(rawtarget); \ + else (*targetClass._free)(target) +#define ABORT_TARGET if (rawtext) (*rawtargetClass._abort)(rawtarget, NULL); \ + else (*targetClass._abort)(target, NULL) typedef struct _NNTPAuth { char * host; @@ -958,7 +971,7 @@ PRIVATE int read_article NOARGS ** The header fields are either ignored, ** or formatted and put into the text. */ - if (!diagnostic) { + if (!diagnostic && !rawtext) { while (!done) { char ch = *p++ = NEXT_CHAR; if (ch == (char)EOF) { @@ -1217,20 +1230,26 @@ PRIVATE int read_article NOARGS FREE(href); } - if (diagnostic) { + if (rawtext) { + /* + * No tags - kw + */ + ; + } else if (diagnostic) { /* ** Read in the HEAD and BODY of the Article ** as XMP formatted text. - FM */ START(HTML_XMP); + PUTC('\n'); } else { /* ** Read in the BODY of the Article ** as PRE formatted text. - FM */ START(HTML_PRE); + PUTC('\n'); } - PUTC('\n'); p = line; while (!done) { @@ -1258,10 +1277,15 @@ PRIVATE int read_article NOARGS done = YES; break; } else { /* Line starts with dot */ - PUTS(&line[1]); /* Ignore first dot */ + if (rawtext) + RAW_PUTS(&line[1]); + else + PUTS(&line[1]); /* Ignore first dot */ } } else { - if (diagnostic || !scan_for_buried_news_references) { + if (rawtext) { + RAW_PUTS(line); + } else if (diagnostic || !scan_for_buried_news_references) { /* ** All lines are passed as unmodified source. - FM */ @@ -1361,7 +1385,10 @@ PRIVATE int read_article NOARGS p = line; /* Restart at beginning */ } /* if end of line */ } /* Loop over characters */ - + + if (rawtext) + return(HT_LOADED); + if (diagnostic) END(HTML_XMP); else @@ -1955,6 +1982,7 @@ PUBLIC int HTLoadNews ARGS4( BOOL reply_wanted; /* Flag: followup post was asked for */ BOOL spost_wanted; /* Flag: new SSL post to group was asked for */ BOOL sreply_wanted; /* Flag: followup SSL post was asked for */ + BOOL head_wanted = NO; /* Flag: want HEAD of single article */ int first, last; /* First and last articles asked for */ char *cp = 0; char *ListArg = NULL; @@ -1965,6 +1993,7 @@ PUBLIC int HTLoadNews ARGS4( diagnostic = (format_out == WWW_SOURCE || /* set global flag */ format_out == HTAtom_for("www/download") || format_out == HTAtom_for("www/dump")); + rawtext = NO; if (TRACE) fprintf(stderr, "HTNews: Looking for %s\n", arg); @@ -2260,6 +2289,34 @@ PUBLIC int HTLoadNews ARGS4( return NO; /* Ignore if no name */ } + if (!(post_wanted || reply_wanted || spost_wanted || sreply_wanted || + (group_wanted && last != -1) || list_wanted)) { + head_wanted = anAnchor->isHEAD; + if (head_wanted && !strncmp(command, "ARTICLE_", 8)) { + /* overwrite "ARTICLE" - hack... */ + strcpy(command, "HEAD "); + for (cp = command + 5; ; cp++) + if ((*cp = *(cp + 3)) == '\0') + break; + } + rawtext = (head_wanted || keep_mime_headers); + } + if (rawtext) { + node_anchor = anAnchor; + rawtarget = HTStreamStack(WWW_PLAINTEXT, + format_out, + stream, anAnchor); + if (!rawtarget) { + FREE(NewsHost); + FREE(NewsHREF); + FREE(ProxyHost); + FREE(ProxyHREF); + FREE(ListArg); + HTAlert("No target for raw text!"); + return(HT_NOT_LOADED); + } /* Copy routine entry points */ + rawtargetClass = *rawtarget->isa; + } else /* ** Make a hypertext object with an anchor list. */ @@ -2298,8 +2355,9 @@ PUBLIC int HTLoadNews ARGS4( "HTNews: Interrupted on connect; recovering cleanly.\n"); _HTProgress("Connection interrupted."); if (!(post_wanted || reply_wanted || - spost_wanted || sreply_wanted)) - (*targetClass._abort)(target, NULL); + spost_wanted || sreply_wanted)) { + ABORT_TARGET; + } FREE(NewsHost); FREE(NewsHREF); FREE(ProxyHost); @@ -2360,8 +2418,9 @@ PUBLIC int HTLoadNews ARGS4( if (status == HT_INTERRUPTED) { _HTProgress("Connection interrupted."); if (!(post_wanted || reply_wanted || - spost_wanted || sreply_wanted)) - (*targetClass._abort)(target, NULL); + spost_wanted || sreply_wanted)) { + ABORT_TARGET; + } FREE(NewsHost); FREE(NewsHREF); FREE(ProxyHost); @@ -2609,7 +2668,9 @@ Send_NNTP_command: ** the number (first) as the command and go back ** to send it and check the response. - FM */ - sprintf(command, "ARTICLE %d%c%c", first, CR, LF); + sprintf(command, "%s %d%c%c", + head_wanted ? "HEAD" : "ARTICLE", + first, CR, LF); group_wanted = FALSE; retries = 2; goto Send_NNTP_command; @@ -2630,9 +2691,9 @@ Send_NNTP_command: if (!(post_wanted || reply_wanted || spost_wanted || sreply_wanted)) { if (status == HT_NOT_LOADED) { - (*targetClass._abort)(target, NULL); + ABORT_TARGET; } else { - (*targetClass._free)(target); + FREE_TARGET; } } FREE(NewsHREF); @@ -2660,8 +2721,9 @@ Send_NNTP_command: NULL,NULL,NULL, arg);No -- message earlier wil have covered it */ if (!(post_wanted || reply_wanted || - spost_wanted || sreply_wanted)) - (*targetClass._abort)(target, NULL); + spost_wanted || sreply_wanted)) { + ABORT_TARGET; + } FREE(NewsHREF); if (ProxyHREF) { StrAllocCopy(NewsHost, ProxyHost); diff --git a/WWW/Library/Implementation/HTParse.c b/WWW/Library/Implementation/HTParse.c index 3c5fafc5..6f558ef5 100644 --- a/WWW/Library/Implementation/HTParse.c +++ b/WWW/Library/Implementation/HTParse.c @@ -98,6 +98,7 @@ PRIVATE void scan ARGS2( if (*p =='#') { parts->anchor = (p + 1); *p = '\0'; /* terminate the rest */ + break; /* leave things after first # alone - kw */ } } @@ -137,8 +138,12 @@ PRIVATE void scan ARGS2( * or it's an nntp or snews URL, or news URL with a host. * Restore the '#' in the address. */ - *(parts->anchor - 1) = '#'; - parts->anchor = NULL; + /* but only if we have found a path component of which this will + * become part. - kw */ + if (parts->relative || parts->absolute) { + *(parts->anchor - 1) = '#'; + parts->anchor = NULL; + } } } @@ -693,12 +698,13 @@ PUBLIC char * HTUnEscape ARGS1( return str; while (*p != '\0') { - if (*p == HEX_ESCAPE) { + if (*p == HEX_ESCAPE && + p[1] && p[2] && /* tests shouldn't be needed, but.. */ + isxdigit((unsigned char)p[1]) && + isxdigit((unsigned char)p[2])) { p++; - if (*p) - *q = from_hex(*p++) * 16; - if (*p) - *q = FROMASCII(*q + from_hex(*p++)); + *q = from_hex(*p++) * 16; + *q = FROMASCII(*q + from_hex(*p++)); q++; } else { *q++ = *p++; diff --git a/WWW/Library/Implementation/SGML.c b/WWW/Library/Implementation/SGML.c index c2cadecd..6e64714f 100644 --- a/WWW/Library/Implementation/SGML.c +++ b/WWW/Library/Implementation/SGML.c @@ -792,6 +792,16 @@ PRIVATE void start_element ARGS1( new_tag->name); } } + if (context->element_stack && !valid && + (context->element_stack->tag->flags & Tgf_strict) && + !(valid = element_valid_within(new_tag, context->element_stack->tag, + direct_container))) { + if (TRACE) + fprintf(stderr, "SGML: Still open %s \t<- ignoring start <%s>\n", + context->element_stack->tag->name, + new_tag->name); + return; + } if (context->element_stack && !extra_action_taken && canclose_check == close_NO && !valid && (new_tag->flags & Tgf_mafse)) { diff --git a/WWW/Library/Implementation/SGML.h b/WWW/Library/Implementation/SGML.h index d8f90c19..645bf266 100644 --- a/WWW/Library/Implementation/SGML.h +++ b/WWW/Library/Implementation/SGML.h @@ -90,8 +90,10 @@ typedef enum _TagClass { typedef enum _TagFlags { Tgf_endO = 0x00001, /* end tag can be Omitted */ Tgf_startO = 0x00002, /* start tag can be Omitted */ - Tgf_mafse = 0x00004 /* Make Attribute-Free Start-tag End instead + Tgf_mafse = 0x00004, /* Make Attribute-Free Start-tag End instead (if found invalid) */ + Tgf_strict = 0x00008 /* Ignore contained invalid elements, + don't pass them on */ } TagFlags; /* A tag structure describes an SGML element. |