diff options
Diffstat (limited to 'WWW/Library/Implementation/HTAccess.c')
-rw-r--r-- | WWW/Library/Implementation/HTAccess.c | 1447 |
1 files changed, 1447 insertions, 0 deletions
diff --git a/WWW/Library/Implementation/HTAccess.c b/WWW/Library/Implementation/HTAccess.c new file mode 100644 index 00000000..2b287e9d --- /dev/null +++ b/WWW/Library/Implementation/HTAccess.c @@ -0,0 +1,1447 @@ +/* + * $LynxId: HTAccess.c,v 1.76 2012/02/04 00:15:53 tom Exp $ + * + * Access Manager HTAccess.c + * ============== + * + * Authors + * TBL Tim Berners-Lee timbl@info.cern.ch + * JFG Jean-Francois Groff jfg@dxcern.cern.ch + * DD Denis DeLaRoca (310) 825-4580 <CSP1DWD@mvs.oac.ucla.edu> + * FM Foteos Macrides macrides@sci.wfeb.edu + * PDM Danny Mayer mayer@ljo.dec.com + * + * History + * 8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL + * 26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG + * 6 Oct 92 Moved HTClientHost and logfile into here. TBL + * 17 Dec 92 Tn3270 added, bug fix. DD + * 4 Feb 93 Access registration, Search escapes bad chars TBL + * PARAMETERS TO HTSEARCH AND HTLOADRELATIVE CHANGED + * 28 May 93 WAIS gateway explicit if no WAIS library linked in. + * 31 May 94 Added DIRECT_WAIS support for VMS. FM + * 27 Jan 95 Fixed proxy support to use NNTPSERVER for checking + * whether or not to use the proxy server. PDM + * 27 Jan 95 Ensured that proxy service will be overridden for files + * on the local host (because HTLoadFile() doesn't try ftp + * for those) and will substitute ftp for remote files. FM + * 28 Jan 95 Tweaked PDM's proxy override mods to handle port info + * for news and wais URL's. FM + * + * Bugs + * This module assumes that that the graphic object is hypertext, as it + * needs to select it when it has been loaded. A superclass needs to be + * defined which accepts select and select_anchor. + */ + +#ifdef VMS +#define DIRECT_WAIS +#endif /* VMS */ + +#include <HTUtils.h> +#include <HTTP.h> +#include <HTAlert.h> +/* + * Implements: + */ +#include <HTAccess.h> + +/* + * Uses: + */ +#include <HTParse.h> +#include <HTML.h> /* SCW */ + +#ifndef NO_RULES +#include <HTRules.h> +#endif + +#include <HTList.h> +#include <HText.h> /* See bugs above */ +#include <HTCJK.h> +#include <UCMap.h> +#include <GridText.h> + +#include <LYGlobalDefs.h> +#include <LYexit.h> +#include <LYStrings.h> +#include <LYUtils.h> +#include <LYLeaks.h> + +/* + * These flags may be set to modify the operation of this module + */ +char *HTClientHost = NULL; /* Name of remote login host if any */ +FILE *HTlogfile = NULL; /* File to which to output one-liners */ +BOOL HTSecure = NO; /* Disable access for telnet users? */ +BOOL HTPermitRedir = NO; /* Always allow redirection in getfile()? */ + +BOOL using_proxy = NO; /* are we using a proxy gateway? */ + +/* + * To generate other things, play with these: + */ +HTFormat HTOutputFormat = NULL; +HTStream *HTOutputStream = NULL; /* For non-interactive, set this */ + +static HTList *protocols = NULL; /* List of registered protocol descriptors */ + +char *use_this_url_instead = NULL; + +static int pushed_assume_LYhndl = -1; /* see LYUC* functions below - kw */ +static char *pushed_assume_MIMEname = NULL; + +#ifdef LY_FIND_LEAKS +static void free_protocols(void) +{ + HTList_delete(protocols); + protocols = NULL; + FREE(pushed_assume_MIMEname); /* shouldn't happen, just in case - kw */ +} +#endif /* LY_FIND_LEAKS */ + +/* Register a Protocol. HTRegisterProtocol() + * -------------------- + */ +BOOL HTRegisterProtocol(HTProtocol * protocol) +{ + if (!protocols) { + protocols = HTList_new(); +#ifdef LY_FIND_LEAKS + atexit(free_protocols); +#endif + } + HTList_addObject(protocols, protocol); + return YES; +} + +/* Register all known protocols. HTAccessInit() + * ----------------------------- + * + * Add to or subtract from this list if you add or remove protocol + * modules. This routine is called the first time the protocol list + * is needed, unless any protocols are already registered, in which + * case it is not called. Therefore the application can override + * this list. + * + * Compiling with NO_INIT prevents all known protocols from being + * forced in at link time. + */ +#ifndef NO_INIT +#ifdef GLOBALREF_IS_MACRO +extern GLOBALREF (HTProtocol, HTTP); +extern GLOBALREF (HTProtocol, HTTPS); +extern GLOBALREF (HTProtocol, HTFile); +extern GLOBALREF (HTProtocol, HTTelnet); +extern GLOBALREF (HTProtocol, HTTn3270); +extern GLOBALREF (HTProtocol, HTRlogin); + +#ifndef DECNET +#ifndef DISABLE_FTP +extern GLOBALREF (HTProtocol, HTFTP); +#endif /* DISABLE_FTP */ +#ifndef DISABLE_NEWS +extern GLOBALREF (HTProtocol, HTNews); +extern GLOBALREF (HTProtocol, HTNNTP); +extern GLOBALREF (HTProtocol, HTNewsPost); +extern GLOBALREF (HTProtocol, HTNewsReply); +extern GLOBALREF (HTProtocol, HTSNews); +extern GLOBALREF (HTProtocol, HTSNewsPost); +extern GLOBALREF (HTProtocol, HTSNewsReply); +#endif /* not DISABLE_NEWS */ +#ifndef DISABLE_GOPHER +extern GLOBALREF (HTProtocol, HTGopher); +extern GLOBALREF (HTProtocol, HTCSO); +#endif /* not DISABLE_GOPHER */ +#ifndef DISABLE_FINGER +extern GLOBALREF (HTProtocol, HTFinger); +#endif /* not DISABLE_FINGER */ +#ifdef DIRECT_WAIS +extern GLOBALREF (HTProtocol, HTWAIS); +#endif /* DIRECT_WAIS */ +#endif /* !DECNET */ +#else +GLOBALREF HTProtocol HTTP, HTTPS, HTFile, HTTelnet, HTTn3270, HTRlogin; + +#ifndef DECNET +#ifndef DISABLE_FTP +GLOBALREF HTProtocol HTFTP; +#endif /* DISABLE_FTP */ +#ifndef DISABLE_NEWS +GLOBALREF HTProtocol HTNews, HTNNTP, HTNewsPost, HTNewsReply; +GLOBALREF HTProtocol HTSNews, HTSNewsPost, HTSNewsReply; +#endif /* not DISABLE_NEWS */ +#ifndef DISABLE_GOPHER +GLOBALREF HTProtocol HTGopher, HTCSO; +#endif /* not DISABLE_GOPHER */ +#ifndef DISABLE_FINGER +GLOBALREF HTProtocol HTFinger; +#endif /* not DISABLE_FINGER */ +#ifdef DIRECT_WAIS +GLOBALREF HTProtocol HTWAIS; +#endif /* DIRECT_WAIS */ +#endif /* !DECNET */ +#endif /* GLOBALREF_IS_MACRO */ + +static void HTAccessInit(void) /* Call me once */ +{ + HTRegisterProtocol(&HTTP); + HTRegisterProtocol(&HTTPS); + HTRegisterProtocol(&HTFile); + HTRegisterProtocol(&HTTelnet); + HTRegisterProtocol(&HTTn3270); + HTRegisterProtocol(&HTRlogin); +#ifndef DECNET +#ifndef DISABLE_FTP + HTRegisterProtocol(&HTFTP); +#endif /* DISABLE_FTP */ +#ifndef DISABLE_NEWS + HTRegisterProtocol(&HTNews); + HTRegisterProtocol(&HTNNTP); + HTRegisterProtocol(&HTNewsPost); + HTRegisterProtocol(&HTNewsReply); + HTRegisterProtocol(&HTSNews); + HTRegisterProtocol(&HTSNewsPost); + HTRegisterProtocol(&HTSNewsReply); +#endif /* not DISABLE_NEWS */ +#ifndef DISABLE_GOPHER + HTRegisterProtocol(&HTGopher); + HTRegisterProtocol(&HTCSO); +#endif /* not DISABLE_GOPHER */ +#ifndef DISABLE_FINGER + HTRegisterProtocol(&HTFinger); +#endif /* not DISABLE_FINGER */ +#ifdef DIRECT_WAIS + HTRegisterProtocol(&HTWAIS); +#endif /* DIRECT_WAIS */ +#endif /* !DECNET */ + LYRegisterLynxProtocols(); +} +#endif /* !NO_INIT */ + +/* Check for proxy override. override_proxy() + * ------------------------- + * + * Check the no_proxy environment variable to get the list + * of hosts for which proxy server is not consulted. + * + * no_proxy is a comma- or space-separated list of machine + * or domain names, with optional :port part. If no :port + * part is present, it applies to all ports on that domain. + * + * Example: + * no_proxy="cern.ch,some.domain:8001" + * + * Use "*" to override all proxy service: + * no_proxy="*" + */ +BOOL override_proxy(const char *addr) +{ + const char *no_proxy = getenv("no_proxy"); + char *p = NULL; + char *at = NULL; + char *host = NULL; + char *Host = NULL; + char *acc_method = NULL; + int port = 0; + int h_len = 0; + + /* + * Check for global override. + */ + if (no_proxy) { + if (!strcmp(no_proxy, "*")) + return YES; + } + + /* + * Never proxy file:// URLs if they are on the local host. HTLoadFile() + * will not attempt ftp for those if direct access fails. We'll check that + * first, in case no_proxy hasn't been defined. - FM + */ + if (!addr) + return NO; + if (!(host = HTParse(addr, "", PARSE_HOST))) + return NO; + if (!*host) { + FREE(host); + return NO; + } + Host = (((at = strchr(host, '@')) != NULL) ? (at + 1) : host); + + if ((acc_method = HTParse(addr, "", PARSE_ACCESS))) { + if (!strcmp("file", acc_method) && + (LYSameHostname(Host, "localhost") || + LYSameHostname(Host, HTHostName()))) { + FREE(host); + FREE(acc_method); + return YES; + } + FREE(acc_method); + } + + if (!no_proxy) { + FREE(host); + return NO; + } + + if (NULL != (p = HTParsePort(Host, &port))) { /* Port specified */ + *p = 0; /* Chop off port */ + } else { /* Use default port */ + acc_method = HTParse(addr, "", PARSE_ACCESS); + if (acc_method != NULL) { + /* *INDENT-OFF* */ + if (!strcmp(acc_method, "http")) port = 80; + else if (!strcmp(acc_method, "https")) port = 443; + else if (!strcmp(acc_method, "ftp")) port = 21; +#ifndef DISABLE_GOPHER + else if (!strcmp(acc_method, "gopher")) port = 70; +#endif + else if (!strcmp(acc_method, "cso")) port = 105; +#ifndef DISABLE_NEWS + else if (!strcmp(acc_method, "news")) port = 119; + else if (!strcmp(acc_method, "nntp")) port = 119; + else if (!strcmp(acc_method, "newspost")) port = 119; + else if (!strcmp(acc_method, "newsreply")) port = 119; + else if (!strcmp(acc_method, "snews")) port = 563; + else if (!strcmp(acc_method, "snewspost")) port = 563; + else if (!strcmp(acc_method, "snewsreply")) port = 563; +#endif + else if (!strcmp(acc_method, "wais")) port = 210; +#ifndef DISABLE_FINGER + else if (!strcmp(acc_method, "finger")) port = 79; +#endif + else if (!strcmp(acc_method, "telnet")) port = 23; + else if (!strcmp(acc_method, "tn3270")) port = 23; + else if (!strcmp(acc_method, "rlogin")) port = 513; + /* *INDENT-ON* */ + + FREE(acc_method); + } + } + if (!port) + port = 80; /* Default */ + h_len = (int) strlen(Host); + + while (*no_proxy) { + const char *end; + const char *colon = NULL; + int templ_port = 0; + int t_len; + int brackets = 0; + + while (*no_proxy && (WHITE(*no_proxy) || *no_proxy == ',')) + no_proxy++; /* Skip whitespace and separators */ + + end = no_proxy; + while (*end && !WHITE(*end) && *end != ',') { /* Find separator */ + if (!brackets && (*end == ':')) + colon = end; /* Port number given */ + else if (*end == '[') + ++brackets; + else if (*end == ']') + --brackets; + end++; + } + + if (colon) { + /* unlike HTParsePort(), this may be followed by non-digits */ + templ_port = atoi(colon + 1); + t_len = (int) (colon - no_proxy); + } else { + t_len = (int) (end - no_proxy); + } + + if ((!templ_port || templ_port == port) && + (t_len > 0 && t_len <= h_len && + !strncasecomp(Host + h_len - t_len, no_proxy, t_len))) { + FREE(host); + return YES; + } +#ifdef CJK_EX /* ASATAKU PROXY HACK */ + if ((!templ_port || templ_port == port) && + (t_len > 0 && t_len <= h_len && + isdigit(UCH(*no_proxy)) && + !StrNCmp(host, no_proxy, t_len))) { + FREE(host); + return YES; + } +#endif /* ASATAKU PROXY HACK */ + + if (*end) + no_proxy = (end + 1); + else + break; + } + + FREE(host); + return NO; +} + +/* Find physical name and access protocol get_physical() + * -------------------------------------- + * + * On entry, + * addr must point to the fully qualified hypertext reference. + * anchor a parent anchor with whose address is addr + * + * On exit, + * returns HT_NO_ACCESS Error has occurred. + * HT_OK Success + */ +static int get_physical(const char *addr, + HTParentAnchor *anchor) +{ + int result; + char *acc_method = NULL; /* Name of access method */ + char *physical = NULL; + char *Server_addr = NULL; + BOOL override_flag = NO; + + CTRACE((tfp, "get_physical %s\n", addr)); + + /* + * Make sure the using_proxy variable is FALSE. + */ + using_proxy = NO; + +#ifndef NO_RULES + if ((physical = HTTranslate(addr)) == 0) { + if (redirecting_url) { + return HT_REDIRECTING; + } + return HT_FORBIDDEN; + } + if (anchor->isISMAPScript == TRUE) { + StrAllocCat(physical, "?0,0"); + CTRACE((tfp, "HTAccess: Appending '?0,0' coordinate pair.\n")); + } + if (!StrNCmp(physical, "Proxied=", 8)) { + HTAnchor_setPhysical(anchor, physical + 8); + using_proxy = YES; + } else if (!StrNCmp(physical, "NoProxy=", 8)) { + HTAnchor_setPhysical(anchor, physical + 8); + override_flag = YES; + } else { + HTAnchor_setPhysical(anchor, physical); + } + FREE(physical); /* free our copy */ +#else + if (anchor->isISMAPScript == TRUE) { + StrAllocCopy(physical, addr); + StrAllocCat(physical, "?0,0"); + CTRACE((tfp, "HTAccess: Appending '?0,0' coordinate pair.\n")); + HTAnchor_setPhysical(anchor, physical); + FREE(physical); /* free our copy */ + } else { + HTAnchor_setPhysical(anchor, addr); + } +#endif /* NO_RULES */ + + acc_method = HTParse(HTAnchor_physical(anchor), STR_FILE_URL, PARSE_ACCESS); + + /* + * Check whether gateway access has been set up for this. + * + * This function can be replaced by the rule system above. + * + * If the rule system has already determined that we should use a proxy, or + * that we shouldn't, ignore proxy-related settings, don't use no_proxy + * either. + */ +#define USE_GATEWAYS +#ifdef USE_GATEWAYS + + if (!override_flag && !using_proxy) { /* else ignore no_proxy env var */ + char *host = NULL; + int port; + + if (!strcasecomp(acc_method, "news")) { + /* + * News is different, so we need to check the name of the server, + * as well as the default port for selective exclusions. + */ + if ((host = HTParse(addr, "", PARSE_HOST))) { + if (HTParsePort(host, &port) == NULL) { + StrAllocCopy(Server_addr, "news://"); + StrAllocCat(Server_addr, host); + StrAllocCat(Server_addr, ":119/"); + } + FREE(host); + } else if (LYGetEnv("NNTPSERVER") != NULL) { + StrAllocCopy(Server_addr, "news://"); + StrAllocCat(Server_addr, LYGetEnv("NNTPSERVER")); + StrAllocCat(Server_addr, ":119/"); + } + } else if (!strcasecomp(acc_method, "wais")) { + /* + * Wais also needs checking of the default port for selective + * exclusions. + */ + if ((host = HTParse(addr, "", PARSE_HOST))) { + if (!(HTParsePort(host, &port))) { + StrAllocCopy(Server_addr, "wais://"); + StrAllocCat(Server_addr, host); + StrAllocCat(Server_addr, ":210/"); + } + FREE(host); + } else + StrAllocCopy(Server_addr, addr); + } else { + StrAllocCopy(Server_addr, addr); + } + override_flag = override_proxy(Server_addr); + } + + if (!override_flag && !using_proxy) { + char *gateway_parameter = NULL, *gateway, *proxy; + + /* + * Search for gateways. + */ + HTSprintf0(&gateway_parameter, "WWW_%s_GATEWAY", acc_method); + gateway = LYGetEnv(gateway_parameter); /* coerce for decstation */ + + /* + * Search for proxy servers. + */ + if (!strcmp(acc_method, "file")) + /* + * If we got to here, a file URL is for ftp on a remote host. - FM + */ + strcpy(gateway_parameter, "ftp_proxy"); + else + sprintf(gateway_parameter, "%s_proxy", acc_method); + proxy = LYGetEnv(gateway_parameter); + FREE(gateway_parameter); + + if (gateway) + CTRACE((tfp, "Gateway found: %s\n", gateway)); + if (proxy) + CTRACE((tfp, "proxy server found: %s\n", proxy)); + + /* + * Proxy servers have precedence over gateway servers. + */ + if (proxy) { + char *gatewayed = NULL; + + StrAllocCopy(gatewayed, proxy); + if (!StrNCmp(gatewayed, "http", 4)) { + char *cp = strrchr(gatewayed, '/'); + + /* Append a slash to the proxy specification if it doesn't + * end in one but otherwise looks normal (starts with "http", + * has no '/' other than ones before the hostname). - kw */ + if (cp && (cp - gatewayed) <= 7) + LYAddHtmlSep(&gatewayed); + } + /* + * Ensure that the proxy server uses ftp for file URLs. - FM + */ + if (!StrNCmp(addr, "file", 4)) { + StrAllocCat(gatewayed, "ftp"); + StrAllocCat(gatewayed, (addr + 4)); + } else + StrAllocCat(gatewayed, addr); + using_proxy = YES; + if (anchor->isISMAPScript == TRUE) + StrAllocCat(gatewayed, "?0,0"); + HTAnchor_setPhysical(anchor, gatewayed); + FREE(gatewayed); + FREE(acc_method); + + acc_method = HTParse(HTAnchor_physical(anchor), + STR_HTTP_URL, PARSE_ACCESS); + + } else if (gateway) { + char *path = HTParse(addr, "", + PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION); + + /* Chop leading / off to make host into part of path */ + char *gatewayed = HTParse(path + 1, gateway, PARSE_ALL); + + FREE(path); + HTAnchor_setPhysical(anchor, gatewayed); + FREE(gatewayed); + FREE(acc_method); + + acc_method = HTParse(HTAnchor_physical(anchor), + STR_HTTP_URL, PARSE_ACCESS); + } + } + FREE(Server_addr); +#endif /* use gateways */ + + /* + * Search registered protocols to find suitable one. + */ + result = HT_NO_ACCESS; + { + int i, n; + +#ifndef NO_INIT + if (!protocols) + HTAccessInit(); +#endif + n = HTList_count(protocols); + for (i = 0; i < n; i++) { + HTProtocol *p = (HTProtocol *) HTList_objectAt(protocols, i); + + if (!strcmp(p->name, acc_method)) { + HTAnchor_setProtocol(anchor, p); + FREE(acc_method); + result = HT_OK; + break; + } + } + } + + FREE(acc_method); + return result; +} + +/* + * Temporarily set the int UCLYhndl_for_unspec and string UCLYhndl_for_unspec + * used for charset "assuming" to the values implied by a HTParentAnchor's + * UCStages, after saving the current values for later restoration. - kw @@@ + * These functions may not really belong here, but where else? I want the + * "pop" to occur as soon as possible after loading has finished. - kw @@@ + */ +void LYUCPushAssumed(HTParentAnchor *anchor) +{ + int anchor_LYhndl = -1; + LYUCcharset *anchor_UCI = NULL; + + if (anchor) { + anchor_LYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_PARSER); + if (anchor_LYhndl >= 0) + anchor_UCI = HTAnchor_getUCInfoStage(anchor, + UCT_STAGE_PARSER); + if (anchor_UCI && anchor_UCI->MIMEname) { + pushed_assume_MIMEname = UCAssume_MIMEcharset; + UCAssume_MIMEcharset = NULL; + if (HTCJK == JAPANESE) + StrAllocCopy(UCAssume_MIMEcharset, pushed_assume_MIMEname); + else + StrAllocCopy(UCAssume_MIMEcharset, anchor_UCI->MIMEname); + pushed_assume_LYhndl = anchor_LYhndl; + /* some diagnostics */ + if (UCLYhndl_for_unspec != anchor_LYhndl) + CTRACE((tfp, + "LYUCPushAssumed: UCLYhndl_for_unspec changed %d -> %d\n", + UCLYhndl_for_unspec, + anchor_LYhndl)); + UCLYhndl_for_unspec = anchor_LYhndl; + return; + } + } + pushed_assume_LYhndl = -1; + FREE(pushed_assume_MIMEname); +} + +/* + * Restore the int UCLYhndl_for_unspec and string UCLYhndl_for_unspec used for + * charset "assuming" from the values saved by LYUCPushAssumed, if any. - kw + */ +int LYUCPopAssumed(void) +{ + if (pushed_assume_LYhndl >= 0) { + /* some diagnostics */ + if (UCLYhndl_for_unspec != pushed_assume_LYhndl) + CTRACE((tfp, + "LYUCPopAssumed: UCLYhndl_for_unspec changed %d -> %d\n", + UCLYhndl_for_unspec, + pushed_assume_LYhndl)); + UCLYhndl_for_unspec = pushed_assume_LYhndl; + pushed_assume_LYhndl = -1; + FREE(UCAssume_MIMEcharset); + UCAssume_MIMEcharset = pushed_assume_MIMEname; + pushed_assume_MIMEname = NULL; + return UCLYhndl_for_unspec; + } + return -1; +} + +/* Load a document HTLoad() + * --------------- + * + * This is an internal routine, which has an address AND a matching + * anchor. (The public routines are called with one OR the other.) + * + * On entry, + * addr must point to the fully qualified hypertext reference. + * anchor a parent anchor with whose address is addr + * + * On exit, + * returns <0 Error has occurred. + * HT_LOADED Success + * HT_NO_DATA Success, but no document loaded. + * (telnet session started etc) + */ +static int HTLoad(const char *addr, + HTParentAnchor *anchor, + HTFormat format_out, + HTStream *sink) +{ + HTProtocol *p; + int status = get_physical(addr, anchor); + + if (reloading) { + FREE(anchor->charset); + FREE(anchor->UCStages); + } + + if (status == HT_FORBIDDEN) { + /* prevent crash if telnet or similar was forbidden by rule. - kw */ + LYFixCursesOn("show alert:"); + status = HTLoadError(sink, 500, gettext("Access forbidden by rule")); + } else if (status == HT_REDIRECTING) { + ; /* fake redirection by rule, to redirecting_url */ + } else if (status >= 0) { + /* prevent crash if telnet or similar mapped or proxied by rule. - kw */ + LYFixCursesOnForAccess(addr, HTAnchor_physical(anchor)); + p = (HTProtocol *) HTAnchor_protocol(anchor); + anchor->parent->underway = TRUE; /* Hack to deal with caching */ + status = p->load(HTAnchor_physical(anchor), + anchor, format_out, sink); + anchor->parent->underway = FALSE; + LYUCPopAssumed(); + } + return status; +} + +/* Get a save stream for a document HTSaveStream() + * -------------------------------- + */ +HTStream *HTSaveStream(HTParentAnchor *anchor) +{ + HTProtocol *p = (HTProtocol *) HTAnchor_protocol(anchor); + + if (!p) + return NULL; + + return p->saveStream(anchor); +} + +int redirection_attempts = 0; /* counter in HTLoadDocument */ + +/* Load a document - with logging etc HTLoadDocument() + * ---------------------------------- + * + * - Checks or documents already loaded + * - Logs the access + * - Allows stdin filter option + * - Trace output and error messages + * + * On Entry, + * anchor is the node_anchor for the document + * full_address The address of the document to be accessed. + * filter if YES, treat stdin as HTML + * + * On Exit, + * returns YES Success in opening document + * NO Failure + */ +static BOOL HTLoadDocument(const char *full_address, /* may include #fragment */ + HTParentAnchor *anchor, + HTFormat format_out, + HTStream *sink) +{ + int status; + HText *text; + const char *address_to_load = full_address; + char *cp; + BOOL ForcingNoCache = LYforce_no_cache; + + CTRACE((tfp, "HTAccess: loading document %s\n", address_to_load)); + + /* + * Free use_this_url_instead and reset permanent_redirection if not done + * elsewhere. - FM + */ + FREE(use_this_url_instead); + permanent_redirection = FALSE; + + /* + * Make sure some yoyo doesn't send us 'round in circles with redirecting + * URLs that point back to themselves. We'll set the original Lynx limit + * of 10 redirections per requested URL from a user, because the HTTP/1.1 + * will no longer specify a restriction to 5, but will leave it up to the + * browser's discretion, in deference to Microsoft. - FM + */ + if (redirection_attempts > 10) { + redirection_attempts = 0; + HTAlert(TOO_MANY_REDIRECTIONS); + return NO; + } + + /* + * If this is marked as an internal link but we don't have the document + * loaded any more, and we haven't explicitly flagged that we want to + * reload with LYforce_no_cache, then something has disappeared from the + * cache when we expected it to be still there. The user probably doesn't + * expect a new network access. So if we have POST data and safe is not + * set in the anchor, ask for confirmation, and fail if not granted. The + * exception are LYNXIMGMAP documents, for which we defer to LYLoadIMGmap + * for prompting if necessary. - kw + */ + text = (HText *) HTAnchor_document(anchor); + if (LYinternal_flag && !text && !LYforce_no_cache && + anchor->post_data && !anchor->safe && + !isLYNXIMGMAP(full_address) && + HTConfirm(gettext("Document with POST content not found in cache. Resubmit?")) + != TRUE) { + return NO; + } + + /* + * If we don't have POST content, check whether this is a previous + * redirecting URL, and keep re-checking until we get to the final + * destination or redirection limit. If we do have POST content, we didn't + * allow permanent redirection, and an interactive user will be deciding + * whether to keep redirecting. - FM + */ + if (!anchor->post_data) { + while ((cp = HTAnchor_physical(anchor)) != NULL && + !StrNCmp(cp, "Location=", 9)) { + DocAddress NewDoc; + + CTRACE((tfp, "HTAccess: '%s' is a redirection URL.\n", + anchor->address)); + CTRACE((tfp, "HTAccess: Redirecting to '%s'\n", cp + 9)); + + /* + * Don't exceed the redirection_attempts limit. - FM + */ + if (++redirection_attempts > 10) { + HTAlert(TOO_MANY_REDIRECTIONS); + redirection_attempts = 0; + FREE(use_this_url_instead); + return NO; + } + + /* + * Set up the redirection. - FM + */ + StrAllocCopy(use_this_url_instead, cp + 9); + NewDoc.address = use_this_url_instead; + NewDoc.post_data = NULL; + NewDoc.post_content_type = NULL; + NewDoc.bookmark = anchor->bookmark; + NewDoc.isHEAD = anchor->isHEAD; + NewDoc.safe = anchor->safe; + anchor = HTAnchor_findAddress(&NewDoc); + } + } + /* + * If we had previous redirection, go back and check out that the URL under + * the current restrictions. - FM + */ + if (use_this_url_instead) { + FREE(redirecting_url); + return (NO); + } + + /* + * See if we can use an already loaded document. + */ + text = (HText *) HTAnchor_document(anchor); + if (text && !LYforce_no_cache) { + /* + * We have a cached rendition of the target document. Check if it's OK + * to re-use it. We consider it OK if: + * (1) the anchor does not have the no_cache element set, or + * (2) we've overridden it, e.g., because we are acting on a PREV_DOC + * command or a link in the History Page and it's not a reply from a + * POST with the LYresubmit_posts flag set, or + * (3) we are repositioning within the currently loaded document based + * on the target anchor's address (URL_Reference). + * + * If DONT_TRACK_INTERNAL_LINKS is defined, HText_AreDifferent() is + * used to determine whether (3) applies. If the target address + * differs from that of the current document only by a fragment and the + * target address has an appended fragment, repositioning without + * reloading is always assumed. Note that HText_AreDifferent() + * currently always returns TRUE if the target has a LYNXIMGMAP URL, so + * that an internally generated pseudo-document will normally not be + * re-used unless condition (2) applies. (Condition (1) cannot apply + * since in LYMap.c, no_cache is always set in the anchor object). + * This doesn't guarantee that the resource from which the MAP element + * is taken will be read again (reloaded) when the list of links for a + * client-side image map is regenerated, when in some cases it should + * (e.g., user requested RELOAD, or HTTP response with no-cache header + * and we are not overriding). + * + * If DONT_TRACK_INTERNAL_LINKS is undefined, a target address that + * points to the same URL as the current document may still result in + * reloading, depending on whether the original URL-Reference was given + * as an internal link in the context of the previously loaded + * document. HText_AreDifferent() is not used here for testing whether + * we are just repositioning. For an internal link, the potential + * callers of this function from mainloop() down will either avoid + * making the call (and do the repositioning differently) or set + * LYinternal_flag (or LYoverride_no_cache). Note that (a) LYNXIMGMAP + * pseudo-documents and (b) The "List Page" document are treated + * logically as being part of the document on which they are based, for + * the purpose of whether to treat a link as internal, but the logic + * for this (by setting LYinternal_flag as necessary) is implemented + * elsewhere. There is a specific test for LYNXIMGMAP here so that the + * generated pseudo-document will not be re-used unless + * LYoverride_no_cache is set. The same caveat as above applies w.r.t. + * reloading of the underlying resource. + * + * We also should be checking other aspects of cache regulation (e.g., + * based on an If-Modified-Since check, etc.) but the code for doing + * those other things isn't available yet. + */ + if (!reloading && + (LYoverride_no_cache || +#ifdef DONT_TRACK_INTERNAL_LINKS + !HText_hasNoCacheSet(text) || + !HText_AreDifferent(anchor, full_address) +#else + ((LYinternal_flag || !HText_hasNoCacheSet(text)) && + !isLYNXIMGMAP(full_address)) +#endif /* TRACK_INTERNAL_LINKS */ + )) { + CTRACE((tfp, "HTAccess: Document already in memory.\n")); + HText_select(text); + +#ifdef DIRED_SUPPORT + if (HTAnchor_format(anchor) == WWW_DIRED) + lynx_edit_mode = TRUE; +#endif + redirection_attempts = 0; + return YES; + } else { + ForcingNoCache = YES; + BStrFree(anchor->post_data); + CTRACE((tfp, "HTAccess: Auto-reloading document.\n")); + } + } + + if (HText_HaveUserChangedForms(text)) { + /* + * Issue a warning. User forms content will be lost. + * Will not restore changed forms, currently. + */ + HTAlert(RELOADING_FORM); + } + + /* + * Get the document from the net. If we are auto-reloading, the mutable + * anchor elements from the previous rendition should be freed in + * conjunction with loading of the new rendition. - FM + */ + LYforce_no_cache = NO; /* reset after each time through */ + if (ForcingNoCache) { + FREE(anchor->title); /* ??? */ + } + status = HTLoad(address_to_load, anchor, format_out, sink); + CTRACE((tfp, "HTAccess: status=%d\n", status)); + + /* + * RECOVERY: if the loading failed, and we had a cached HText copy, and no + * new HText created - use a previous copy, issue a warning. + */ + if (text && status < 0 && (HText *) HTAnchor_document(anchor) == text) { + HTAlert(gettext("Loading failed, use a previous copy.")); + CTRACE((tfp, "HTAccess: Loading failed, use a previous copy.\n")); + HText_select(text); + +#ifdef DIRED_SUPPORT + if (HTAnchor_format(anchor) == WWW_DIRED) + lynx_edit_mode = TRUE; +#endif + redirection_attempts = 0; + return YES; + } + + /* + * Log the access if necessary. + */ + if (HTlogfile) { + time_t theTime; + + time(&theTime); + fprintf(HTlogfile, "%24.24s %s %s %s\n", + ctime(&theTime), + HTClientHost ? HTClientHost : "local", + status < 0 ? "FAIL" : "GET", + full_address); + fflush(HTlogfile); /* Actually update it on disk */ + CTRACE((tfp, "Log: %24.24s %s %s %s\n", + ctime(&theTime), + HTClientHost ? HTClientHost : "local", + status < 0 ? "FAIL" : "GET", + full_address)); + } + + /* + * Check out what we received from the net. + */ + if (status == HT_REDIRECTING) { + /* Exported from HTMIME.c, of all places. */ + /* NO!! - FM */ + /* + * Doing this via HTMIME.c meant that the redirection cover page was + * already loaded before we learned that we want a different URL. + * Also, changing anchor->address, as Lynx was doing, meant we could + * never again access its hash table entry, creating an insolvable + * memory leak. Instead, if we had a 301 status and set + * permanent_redirection, we'll load the new URL in anchor->physical, + * preceded by a token, which we can check to make replacements on + * subsequent access attempts. We'll check recursively, and retrieve + * the final URL if we had multiple redirections to it. If we just + * went to HTLoad now, as Lou originally had this, we couldn't do + * Lynx's security checks and alternate handling of some URL types. + * So, instead, we'll go all the way back to the top of getfile in + * LYGetFile.c when the status is HT_REDIRECTING. This may seem + * bizarre, but it works like a charm! - FM + * + * Actually, the location header for redirections is now again picked + * up in HTMIME.c. But that's an internal matter between HTTP.c and + * HTMIME.c, is still under control of HTLoadHTTP for http URLs, is + * done in a way that doesn't load the redirection response's body + * (except when wanted as an error fallback), and thus need not concern + * us here. - kw 1999-12-02 + */ + CTRACE((tfp, "HTAccess: '%s' is a redirection URL.\n", + address_to_load)); + CTRACE((tfp, "HTAccess: Redirecting to '%s'\n", + redirecting_url)); + /* + * Prevent circular references. + */ + if (strcmp(address_to_load, redirecting_url)) { /* if different */ + /* + * Load token and redirecting url into anchor->physical if we had + * 301 Permanent redirection. HTTP.c does not allow this if we + * have POST content. - FM + */ + if (permanent_redirection) { + StrAllocCopy(anchor->physical, "Location="); + StrAllocCat(anchor->physical, redirecting_url); + } + + /* + * Set up flags before return to getfile. - FM + */ + StrAllocCopy(use_this_url_instead, redirecting_url); + if (ForcingNoCache) + LYforce_no_cache = YES; + ++redirection_attempts; + FREE(redirecting_url); + permanent_redirection = FALSE; + return (NO); + } + ++redirection_attempts; + FREE(redirecting_url); + permanent_redirection = FALSE; + return (YES); + } + + /* + * We did not receive a redirecting URL. - FM + */ + redirection_attempts = 0; + FREE(redirecting_url); + permanent_redirection = FALSE; + + if (status == HT_LOADED) { + CTRACE((tfp, "HTAccess: `%s' has been accessed.\n", + full_address)); + return YES; + } + if (status == HT_PARTIAL_CONTENT) { + HTAlert(gettext("Loading incomplete.")); + CTRACE((tfp, "HTAccess: `%s' has been accessed, partial content.\n", + full_address)); + return YES; + } + + if (status == HT_NO_DATA) { + CTRACE((tfp, "HTAccess: `%s' has been accessed, No data left.\n", + full_address)); + return NO; + } + + if (status == HT_NOT_LOADED) { + CTRACE((tfp, "HTAccess: `%s' has been accessed, No data loaded.\n", + full_address)); + return NO; + } + + if (status == HT_INTERRUPTED) { + CTRACE((tfp, + "HTAccess: `%s' has been accessed, transfer interrupted.\n", + full_address)); + return NO; + } + + if (status > 0) { + /* + * If you get this, then please find which routine is returning a + * positive unrecognized error code! + */ + fprintf(stderr, + gettext("**** HTAccess: socket or file number returned by obsolete load routine!\n")); + fprintf(stderr, + gettext("**** HTAccess: Internal software error. Please mail lynx-dev@nongnu.org!\n")); + fprintf(stderr, gettext("**** HTAccess: Status returned was: %d\n"), status); + exit_immediately(EXIT_FAILURE); + } + + /* Failure in accessing a document */ + cp = NULL; + StrAllocCopy(cp, gettext("Can't Access")); + StrAllocCat(cp, " `"); + StrAllocCat(cp, full_address); + StrAllocCat(cp, "'"); + _HTProgress(cp); + FREE(cp); + + CTRACE((tfp, "HTAccess: Can't access `%s'\n", full_address)); + HTLoadError(sink, 500, gettext("Unable to access document.")); + return NO; +} /* HTLoadDocument */ + +/* Load a document from absolute name. HTLoadAbsolute() + * ----------------------------------- + * + * On Entry, + * addr The absolute address of the document to be accessed. + * filter if YES, treat document as HTML + * + * On Exit, + * returns YES Success in opening document + * NO Failure + */ +BOOL HTLoadAbsolute(const DocAddress *docaddr) +{ + return HTLoadDocument(docaddr->address, + HTAnchor_findAddress(docaddr), + (HTOutputFormat ? HTOutputFormat : WWW_PRESENT), + HTOutputStream); +} + +#ifdef NOT_USED_CODE +/* Load a document from absolute name to stream. HTLoadToStream() + * --------------------------------------------- + * + * On Entry, + * addr The absolute address of the document to be accessed. + * sink if non-NULL, send data down this stream + * + * On Exit, + * returns YES Success in opening document + * NO Failure + */ +BOOL HTLoadToStream(const char *addr, + BOOL filter, + HTStream *sink) +{ + return HTLoadDocument(addr, + HTAnchor_findSimpleAddress(addr), + (HTOutputFormat ? HTOutputFormat : WWW_PRESENT), + sink); +} +#endif /* NOT_USED_CODE */ + +/* Load a document from relative name. HTLoadRelative() + * ----------------------------------- + * + * On Entry, + * relative_name The relative address of the document + * to be accessed. + * + * On Exit, + * returns YES Success in opening document + * NO Failure + */ +BOOL HTLoadRelative(const char *relative_name, + HTParentAnchor *here) +{ + DocAddress full_address; + BOOL result; + char *mycopy = NULL; + char *stripped = NULL; + + full_address.address = NULL; + full_address.post_data = NULL; + full_address.post_content_type = NULL; + full_address.bookmark = NULL; + full_address.isHEAD = FALSE; + full_address.safe = FALSE; + + StrAllocCopy(mycopy, relative_name); + + stripped = HTStrip(mycopy); + full_address.address = + HTParse(stripped, + here->address, + PARSE_ALL_WITHOUT_ANCHOR); + result = HTLoadAbsolute(&full_address); + /* + * If we got redirection, result will be NO, but use_this_url_instead will + * be set. The calling routine should check both and do whatever is + * appropriate. - FM + */ + FREE(full_address.address); + FREE(mycopy); /* Memory leak fixed 10/7/92 -- JFG */ + return result; +} + +/* Load if necessary, and select an anchor. HTLoadAnchor() + * ---------------------------------------- + * + * On Entry, + * destination The child or parent anchor to be loaded. + * + * On Exit, + * returns YES Success + * NO Failure + */ +BOOL HTLoadAnchor(HTAnchor * destination) +{ + HTParentAnchor *parent; + BOOL loaded = NO; + + if (!destination) + return NO; /* No link */ + + parent = HTAnchor_parent(destination); + + if (HTAnchor_document(parent) == NULL) { /* If not already loaded */ + /* TBL 921202 */ + BOOL result; + + result = HTLoadDocument(parent->address, + parent, + HTOutputFormat ? + HTOutputFormat : WWW_PRESENT, + HTOutputStream); + if (!result) + return NO; + loaded = YES; + } { + HText *text = (HText *) HTAnchor_document(parent); + + if ((destination != (HTAnchor *) parent) && + (destination != (HTAnchor *) (parent->parent))) { + /* If child anchor */ + HText_selectAnchor(text, /* Double display? @@ */ + (HTChildAnchor *) destination); + } else { + if (!loaded) + HText_select(text); + } + } + return YES; + +} /* HTLoadAnchor */ + +/* Search. HTSearch() + * ------- + * + * Performs a keyword search on word given by the user. Adds the + * keyword to the end of the current address and attempts to open + * the new address. + * + * On Entry, + * *keywords space-separated keyword list or similar search list + * here is anchor search is to be done on. + */ +static char hex(int i) +{ + const char *hexchars = "0123456789ABCDEF"; + + return hexchars[i]; +} + +BOOL HTSearch(const char *keywords, + HTParentAnchor *here) +{ +#define acceptable \ +"1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_" + + char *q, *u; + const char *p, *s, *e; /* Pointers into keywords */ + char *address = NULL; + BOOL result; + char *escaped = typecallocn(char, (strlen(keywords) * 3) + 1); + static const BOOL isAcceptable[96] = + /* *INDENT-OFF* */ + /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ + { 0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0, /* 2x !"#$%&'()*+,-./ */ + 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 4x @ABCDEFGHIJKLMNO */ + 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* 5X PQRSTUVWXYZ[\]^_ */ + 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 6x `abcdefghijklmno */ + 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; /* 7X pqrstuvwxyz{\}~ DEL */ + /* *INDENT-ON* */ + + if (escaped == NULL) + outofmem(__FILE__, "HTSearch"); + + assert(escaped != NULL); + + StrAllocCopy(address, here->isIndexAction); + + /* + * Convert spaces to + and hex escape unacceptable characters. + */ + for (s = keywords; *s && WHITE(*s); s++) /* Scan */ + ; /* Skip white space */ + for (e = s + strlen(s); e > s && WHITE(*(e - 1)); e--) /* Scan */ + ; /* Skip trailers */ + for (q = escaped, p = s; p < e; p++) { /* Scan stripped field */ + unsigned char c = UCH(TOASCII(*p)); + + if (WHITE(*p)) { + *q++ = '+'; + } else if (IS_CJK_TTY) { + *q++ = *p; + } else if (c >= 32 && c <= UCH(127) && isAcceptable[c - 32]) { + *q++ = *p; /* 930706 TBL for MVS bug */ + } else { + *q++ = '%'; + *q++ = hex((int) (c >> 4)); + *q++ = hex((int) (c & 15)); + } + } /* Loop over string */ + *q = '\0'; /* Terminate escaped string */ + u = strchr(address, '?'); /* Find old search string */ + if (u != NULL) + *u = '\0'; /* Chop old search off */ + + StrAllocCat(address, "?"); + StrAllocCat(address, escaped); + FREE(escaped); + result = HTLoadRelative(address, here); + FREE(address); + + /* + * If we got redirection, result will be NO, but use_this_url_instead will + * be set. The calling routine should check both and do whatever is + * appropriate. Only an http server (not a gopher or wais server) could + * return redirection. Lynx will go all the way back to its mainloop() and + * subject a redirecting URL to all of its security and restrictions + * checks. - FM + */ + return result; +} + +/* Search Given Indexname. HTSearchAbsolute() + * ----------------------- + * + * Performs a keyword search on word given by the user. Adds the + * keyword to the end of the current address and attempts to open + * the new address. + * + * On Entry, + * *keywords space-separated keyword list or similar search list + * *indexname is name of object search is to be done on. + */ +BOOL HTSearchAbsolute(const char *keywords, + char *indexname) +{ + DocAddress abs_doc; + HTParentAnchor *anchor; + + abs_doc.address = indexname; + abs_doc.post_data = NULL; + abs_doc.post_content_type = NULL; + abs_doc.bookmark = NULL; + abs_doc.isHEAD = FALSE; + abs_doc.safe = FALSE; + + anchor = HTAnchor_findAddress(&abs_doc); + return HTSearch(keywords, anchor); +} + +#ifdef NOT_USED_CODE +/* Generate the anchor for the home page. HTHomeAnchor() + * -------------------------------------- + * + * As it involves file access, this should only be done once + * when the program first runs. + * This is a default algorithm -- browser don't HAVE to use this. + * But consistency between browsers is STRONGLY recommended! + * + * Priority order is: + * 1 WWW_HOME environment variable (logical name, etc) + * 2 ~/WWW/default.html + * 3 /usr/local/bin/default.html + * 4 http://www.w3.org/default.html + */ +HTParentAnchor *HTHomeAnchor(void) +{ + char *my_home_document = NULL; + char *home = LYGetEnv(LOGICAL_DEFAULT); + char *ref; + HTParentAnchor *anchor; + + if (home) { + StrAllocCopy(my_home_document, home); +#define MAX_FILE_NAME 1024 /* @@@ */ + } else if (HTClientHost) { /* Telnet server */ + /* + * Someone telnets in, they get a special home. + */ + FILE *fp = fopen(REMOTE_POINTER, "r"); + char *status; + + if (fp) { + my_home_document = typecallocn(char, MAX_FILE_NAME); + + if (my_home_document == NULL) + outofmem(__FILE__, "HTHomeAnchor"); + status = fgets(my_home_document, MAX_FILE_NAME, fp); + if (!status) { + FREE(my_home_document); + } + fclose(fp); + } + if (my_home_document == NULL) + StrAllocCopy(my_home_document, REMOTE_ADDRESS); + } +#ifdef UNIX + if (my_home_document == NULL) { + FILE *fp = NULL; + char *home = LYGetEnv("HOME"); + + if (home != 0) { + HTSprintf0(&my_home_document, "%s/%s", home, PERSONAL_DEFAULT); + fp = fopen(my_home_document, "r"); + } + + if (!fp) { + StrAllocCopy(my_home_document, LOCAL_DEFAULT_FILE); + fp = fopen(my_home_document, "r"); + } + if (fp) { + fclose(fp); + } else { + CTRACE((tfp, "HTBrowse: No local home document ~/%s or %s\n", + PERSONAL_DEFAULT, LOCAL_DEFAULT_FILE)); + FREE(my_home_document); + } + } +#endif /* UNIX */ + ref = HTParse((my_home_document ? + my_home_document : (HTClientHost ? + REMOTE_ADDRESS : LAST_RESORT)), + STR_FILE_URL, + PARSE_ALL_WITHOUT_ANCHOR); + if (my_home_document) { + CTRACE((tfp, "HTAccess: Using custom home page %s i.e., address %s\n", + my_home_document, ref)); + FREE(my_home_document); + } + anchor = HTAnchor_findSimpleAddress(ref); + FREE(ref); + return anchor; +} +#endif /* NOT_USED_CODE */ |