/* Access Manager HTAccess.c ** ============== ** ** Authors ** TBL Tim Berners-Lee timbl@info.cern.ch ** JFG Jean-Francois Groff jfg@dxcern.cern.ch ** DD Denis DeLaRoca (310) 825-4580 ** FM Foteos Macrides macrides@sci.wfeb.edu ** PDM Danny Mayer mayer@ljo.dec.com ** ** History ** 8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL ** 26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG ** 6 Oct 92 Moved HTClientHost and logfile into here. TBL ** 17 Dec 92 Tn3270 added, bug fix. DD ** 4 Feb 93 Access registration, Search escapes bad chars TBL ** PARAMETERS TO HTSEARCH AND HTLOADRELATIVE CHANGED ** 28 May 93 WAIS gateway explicit if no WAIS library linked in. ** 31 May 94 Added DIRECT_WAIS support for VMS. FM ** 27 Jan 95 Fixed proxy support to use NNTPSERVER for checking ** whether or not to use the proxy server. PDM ** 27 Jan 95 Ensured that proxy service will be overridden for files ** on the local host (because HTLoadFile() doesn't try ftp ** for those) and will substitute ftp for remote files. FM ** 28 Jan 95 Tweeked PDM's proxy override mods to handle port info ** for news and wais URL's. FM ** ** Bugs ** This module assumes that that the graphic object is hypertext, as it ** needs to select it when it has been loaded. A superclass needs to be ** defined which accepts select and select_anchor. */ #ifdef VMS #define DIRECT_WAIS #endif /* VMS */ #include #include #include /* ** Implements: */ #include /* ** Uses: */ #include #include /* SCW */ #ifndef NO_RULES #include #endif #include #include /* See bugs above */ #include #include #include #include #include #include #include extern HTCJKlang HTCJK; /* ** These flags may be set to modify the operation of this module */ PUBLIC char * HTClientHost = NULL; /* Name of remote login host if any */ PUBLIC FILE * HTlogfile = NULL; /* File to which to output one-liners */ PUBLIC BOOL HTSecure = NO; /* Disable access for telnet users? */ PUBLIC BOOL HTPermitRedir = NO; /* Always allow redirection in getfile()? */ PUBLIC BOOL using_proxy = NO; /* are we using a proxy gateway? */ /* ** To generate other things, play with these: */ PUBLIC HTFormat HTOutputFormat = NULL; PUBLIC HTStream* HTOutputStream = NULL; /* For non-interactive, set this */ PRIVATE HTList * protocols = NULL; /* List of registered protocol descriptors */ PUBLIC char *use_this_url_instead = NULL; PRIVATE int pushed_assume_LYhndl = -1; /* see LYUC* functions below - kw */ PRIVATE char * pushed_assume_MIMEname = NULL; #ifdef LY_FIND_LEAKS PRIVATE void free_protocols NOARGS { HTList_delete(protocols); protocols = NULL; FREE(pushed_assume_MIMEname); /* shouldn't happen, just in case - kw */ } #endif /* LY_FIND_LEAKS */ /* Register a Protocol. HTRegisterProtocol() ** -------------------- */ PUBLIC BOOL HTRegisterProtocol ARGS1( HTProtocol *, protocol) { if (!protocols) { protocols = HTList_new(); #ifdef LY_FIND_LEAKS atexit(free_protocols); #endif } HTList_addObject(protocols, protocol); return YES; } /* Register all known protocols. HTAccessInit() ** ----------------------------- ** ** Add to or subtract from this list if you add or remove protocol ** modules. This routine is called the first time the protocol list ** is needed, unless any protocols are already registered, in which ** case it is not called. Therefore the application can override ** this list. ** ** Compiling with NO_INIT prevents all known protocols from being ** forced in at link time. */ #ifndef NO_INIT #ifdef GLOBALREF_IS_MACRO extern GLOBALREF (HTProtocol, HTTP); extern GLOBALREF (HTProtocol, HTTPS); extern GLOBALREF (HTProtocol, HTFile); extern GLOBALREF (HTProtocol, HTTelnet); extern GLOBALREF (HTProtocol, HTTn3270); extern GLOBALREF (HTProtocol, HTRlogin); #ifndef DECNET #ifndef DISABLE_FTP extern GLOBALREF (HTProtocol, HTFTP); #endif /* DISABLE_FTP */ #ifndef DISABLE_NEWS extern GLOBALREF (HTProtocol, HTNews); extern GLOBALREF (HTProtocol, HTNNTP); extern GLOBALREF (HTProtocol, HTNewsPost); extern GLOBALREF (HTProtocol, HTNewsReply); extern GLOBALREF (HTProtocol, HTSNews); extern GLOBALREF (HTProtocol, HTSNewsPost); extern GLOBALREF (HTProtocol, HTSNewsReply); #endif /* not DISABLE_NEWS */ #ifndef DISABLE_GOPHER extern GLOBALREF (HTProtocol, HTGopher); extern GLOBALREF (HTProtocol, HTCSO); #endif /* not DISABLE_GOPHER */ #ifndef DISABLE_FINGER extern GLOBALREF (HTProtocol, HTFinger); #endif /* not DISABLE_FINGER */ #ifdef DIRECT_WAIS extern GLOBALREF (HTProtocol, HTWAIS); #endif /* DIRECT_WAIS */ #endif /* !DECNET */ #else GLOBALREF HTProtocol HTTP, HTTPS, HTFile, HTTelnet, HTTn3270, HTRlogin; #ifndef DECNET #ifndef DISABLE_FTP GLOBALREF HTProtocol HTFTP; #endif /* DISABLE_FTP */ #ifndef DISABLE_NEWS GLOBALREF HTProtocol HTNews, HTNNTP, HTNewsPost, HTNewsReply; GLOBALREF HTProtocol HTSNews, HTSNewsPost, HTSNewsReply; #endif /* not DISABLE_NEWS */ #ifndef DISABLE_GOPHER GLOBALREF HTProtocol HTGopher, HTCSO; #endif /* not DISABLE_GOPHER */ #ifndef DISABLE_FINGER GLOBALREF HTProtocol HTFinger; #endif /* not DISABLE_FINGER */ #ifdef DIRECT_WAIS GLOBALREF HTProtocol HTWAIS; #endif /* DIRECT_WAIS */ #endif /* !DECNET */ #endif /* GLOBALREF_IS_MACRO */ PRIVATE void HTAccessInit NOARGS /* Call me once */ { HTRegisterProtocol(&HTTP); HTRegisterProtocol(&HTTPS); HTRegisterProtocol(&HTFile); HTRegisterProtocol(&HTTelnet); HTRegisterProtocol(&HTTn3270); HTRegisterProtocol(&HTRlogin); #ifndef DECNET #ifndef DISABLE_FTP HTRegisterProtocol(&HTFTP); #endif /* DISABLE_FTP */ #ifndef DISABLE_NEWS HTRegisterProtocol(&HTNews); HTRegisterProtocol(&HTNNTP); HTRegisterProtocol(&HTNewsPost); HTRegisterProtocol(&HTNewsReply); HTRegisterProtocol(&HTSNews); HTRegisterProtocol(&HTSNewsPost); HTRegisterProtocol(&HTSNewsReply); #endif /* not DISABLE_NEWS */ #ifndef DISABLE_GOPHER HTRegisterProtocol(&HTGopher); HTRegisterProtocol(&HTCSO); #endif /* not DISABLE_GOPHER */ #ifndef DISABLE_FINGER HTRegisterProtocol(&HTFinger); #endif /* not DISABLE_FINGER */ #ifdef DIRECT_WAIS HTRegisterProtocol(&HTWAIS); #endif /* DIRECT_WAIS */ #endif /* !DECNET */ LYRegisterLynxProtocols(); } #endif /* !NO_INIT */ /* Check for proxy override. override_proxy() ** ------------------------- ** ** Check the no_proxy environment variable to get the list ** of hosts for which proxy server is not consulted. ** ** no_proxy is a comma- or space-separated list of machine ** or domain names, with optional :port part. If no :port ** part is present, it applies to all ports on that domain. ** ** Example: ** no_proxy="cern.ch,some.domain:8001" ** ** Use "*" to override all proxy service: ** no_proxy="*" */ PUBLIC BOOL override_proxy ARGS1( CONST char *, addr) { CONST char * no_proxy = getenv("no_proxy"); char * p = NULL; char * at = NULL; char * host = NULL; char * Host = NULL; char * acc_method = NULL; int port = 0; int h_len = 0; /* * Check for global override. */ if (no_proxy) { if (!strcmp(no_proxy, "*")) return YES; } /* * Never proxy file:// URLs if they are on the local host. * HTLoadFile() will not attempt ftp for those if direct * access fails. We'll check that first, in case no_proxy * hasn't been defined. - FM */ if (!addr) return NO; if (!(host = HTParse(addr, "", PARSE_HOST))) return NO; if (!*host) { FREE(host); return NO; } Host = (((at = strchr(host, '@')) != NULL) ? (at+1) : host); if ((acc_method = HTParse(addr, "", PARSE_ACCESS))) { if (!strcmp("file", acc_method) && (!strcmp(Host, "localhost") || #ifdef VMS !strcasecomp(Host, HTHostName()) #else !strcmp(Host, HTHostName()) #endif /* VMS */ )) { FREE(host); FREE(acc_method); return YES; } FREE(acc_method); } if (!no_proxy) { FREE(host); return NO; } if (NULL != (p = strrchr(Host, ':'))) { /* Port specified */ *p++ = 0; /* Chop off port */ port = atoi(p); } else { /* Use default port */ acc_method = HTParse(addr, "", PARSE_ACCESS); if (acc_method != NULL) { if (!strcmp(acc_method, "http")) port = 80; else if (!strcmp(acc_method, "https")) port = 443; else if (!strcmp(acc_method, "ftp")) port = 21; #ifndef DISABLE_GOPHER else if (!strcmp(acc_method, "gopher")) port = 70; #endif else if (!strcmp(acc_method, "cso")) port = 105; #ifndef DISABLE_NEWS else if (!strcmp(acc_method, "news")) port = 119; else if (!strcmp(acc_method, "nntp")) port = 119; else if (!strcmp(acc_method, "newspost")) port = 119; else if (!strcmp(acc_method, "newsreply")) port = 119; else if (!strcmp(acc_method, "snews")) port = 563; else if (!strcmp(acc_method, "snewspost")) port = 563; else if (!strcmp(acc_method, "snewsreply")) port = 563; #endif else if (!strcmp(acc_method, "wais")) port = 210; #ifndef DISABLE_FINGER else if (!strcmp(acc_method, "finger")) port = 79; #endif else if (!strcmp(acc_method, "telnet")) port = 23; else if (!strcmp(acc_method, "tn3270")) port = 23; else if (!strcmp(acc_method, "rlogin")) port = 513; FREE(acc_method); } } if (!port) port = 80; /* Default */ h_len = strlen(Host); while (*no_proxy) { CONST char * end; CONST char * colon = NULL; int templ_port = 0; int t_len; while (*no_proxy && (WHITE(*no_proxy) || *no_proxy == ',')) no_proxy++; /* Skip whitespace and separators */ end = no_proxy; while (*end && !WHITE(*end) && *end != ',') { /* Find separator */ if (*end == ':') colon = end; /* Port number given */ end++; } if (colon) { templ_port = atoi(colon+1); t_len = colon - no_proxy; } else { t_len = end - no_proxy; } if ((!templ_port || templ_port == port) && (t_len > 0 && t_len <= h_len && !strncasecomp(Host + h_len - t_len, no_proxy, t_len))) { FREE(host); return YES; } #ifdef CJK_EX /* ASATAKU PROXY HACK */ if ((!templ_port || templ_port == port) && (t_len > 0 && t_len <= h_len && isdigit(*no_proxy) && !strncmp(host, no_proxy, t_len))) { FREE(host); return YES; } #endif /* ASATAKU PROXY HACK */ if (*end) no_proxy = (end + 1); else break; } FREE(host); return NO; } /* Find physical name and access protocol get_physical() ** -------------------------------------- ** ** On entry, ** addr must point to the fully qualified hypertext reference. ** anchor a parent anchor with whose address is addr ** ** On exit, ** returns HT_NO_ACCESS Error has occurred. ** HT_OK Success */ PRIVATE int get_physical ARGS2( CONST char *, addr, HTParentAnchor *, anchor) { char * acc_method = NULL; /* Name of access method */ char * physical = NULL; char * Server_addr = NULL; BOOL override_flag = NO; /* ** Make sure the using_proxy variable is FALSE. */ using_proxy = NO; #ifndef NO_RULES physical = HTTranslate(addr); if (!physical) { if (redirecting_url) { return HT_REDIRECTING; } return HT_FORBIDDEN; } if (anchor->isISMAPScript == TRUE) { StrAllocCat(physical, "?0,0"); CTRACE((tfp, "HTAccess: Appending '?0,0' coordinate pair.\n")); } if (!strncmp(physical, "Proxied=", 8)) { HTAnchor_setPhysical(anchor, physical + 8); using_proxy = YES; } else if (!strncmp(physical, "NoProxy=", 8)) { HTAnchor_setPhysical(anchor, physical + 8); override_flag = YES; } else { HTAnchor_setPhysical(anchor, physical); } FREE(physical); /* free our copy */ #else if (anchor->isISMAPScript == TRUE) { StrAllocCopy(physical, addr); StrAllocCat(physical, "?0,0"); CTRACE((tfp, "HTAccess: Appending '?0,0' coordinate pair.\n")); HTAnchor_setPhysical(anchor, physical); FREE(physical); /* free our copy */ } else { HTAnchor_setPhysical(anchor, addr); } #endif /* NO_RULES */ acc_method = HTParse(HTAnchor_physical(anchor), "file:", PARSE_ACCESS); /* ** Check whether gateway access has been set up for this. ** ** This function can be replaced by the rule system above. ** ** If the rule system has already determined that we should ** use a proxy, or that we shouldn't, ignore proxy-related ** settings, don't use no_proxy either. */ #define USE_GATEWAYS #ifdef USE_GATEWAYS if (!override_flag && !using_proxy) { /* else ignore no_proxy env var */ if (!strcasecomp(acc_method, "news")) { /* ** News is different, so we need to check the name of the server, ** as well as the default port for selective exclusions. */ char *host = NULL; if ((host = HTParse(addr, "", PARSE_HOST))) { if (strchr(host, ':') == NULL) { StrAllocCopy(Server_addr, "news://"); StrAllocCat(Server_addr, host); StrAllocCat(Server_addr, ":119/"); } FREE(host); } else if (getenv("NNTPSERVER") != NULL) { StrAllocCopy(Server_addr, "news://"); StrAllocCat(Server_addr, (char *)getenv("NNTPSERVER")); StrAllocCat(Server_addr, ":119/"); } } else if (!strcasecomp(acc_method, "wais")) { /* ** Wais also needs checking of the default port ** for selective exclusions. */ char *host = NULL; if ((host = HTParse(addr, "", PARSE_HOST))) { if (!(strchr(host, ':'))) { StrAllocCopy(Server_addr, "wais://"); StrAllocCat(Server_addr, host); StrAllocCat(Server_addr, ":210/"); } FREE(host); } else StrAllocCopy(Server_addr, addr); } else { StrAllocCopy(Server_addr, addr); } override_flag = override_proxy(Server_addr); } if (!override_flag && !using_proxy) { char * gateway_parameter, *gateway, *proxy; /* ** Search for gateways. */ gateway_parameter = (char *)calloc(1, (strlen(acc_method) + 20)); if (gateway_parameter == NULL) outofmem(__FILE__, "HTLoad"); strcpy(gateway_parameter, "WWW_"); strcat(gateway_parameter, acc_method); strcat(gateway_parameter, "_GATEWAY"); gateway = (char *)getenv(gateway_parameter); /* coerce for decstation */ /* ** Search for proxy servers. */ if (!strcmp(acc_method, "file")) /* ** If we got to here, a file URL is for ftp on a remote host. - FM */ strcpy(gateway_parameter, "ftp"); else strcpy(gateway_parameter, acc_method); strcat(gateway_parameter, "_proxy"); proxy = (char *)getenv(gateway_parameter); FREE(gateway_parameter); if (gateway) CTRACE((tfp, "Gateway found: %s\n", gateway)); if (proxy) CTRACE((tfp, "proxy server found: %s\n", proxy)); /* ** Proxy servers have precedence over gateway servers. */ if (proxy) { char * gatewayed = NULL; StrAllocCopy(gatewayed,proxy); if (!strncmp(gatewayed, "http", 4)) { char *cp = strrchr(gatewayed, '/'); /* Append a slash to the proxy specification if it doesn't * end in one but otherwise looks normal (starts with "http", * has no '/' other than ones before the hostname). - kw */ if (cp && (cp - gatewayed) <= 7) LYAddHtmlSep(&gatewayed); } /* ** Ensure that the proxy server uses ftp for file URLs. - FM */ if (!strncmp(addr, "file", 4)) { StrAllocCat(gatewayed, "ftp"); StrAllocCat(gatewayed, (addr + 4)); } else StrAllocCat(gatewayed, addr); using_proxy = YES; if (anchor->isISMAPScript == TRUE) StrAllocCat(gatewayed, "?0,0"); HTAnchor_setPhysical(anchor, gatewayed); FREE(gatewayed); FREE(acc_method); acc_method = HTParse(HTAnchor_physical(anchor), "http:", PARSE_ACCESS); } else if (gateway) { char * path = HTParse(addr, "", PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION); /* Chop leading / off to make host into part of path */ char * gatewayed = HTParse(path+1, gateway, PARSE_ALL); FREE(path); HTAnchor_setPhysical(anchor, gatewayed); FREE(gatewayed); FREE(acc_method); acc_method = HTParse(HTAnchor_physical(anchor), "http:", PARSE_ACCESS); } } FREE(Server_addr); #endif /* use gateways */ /* ** Search registered protocols to find suitable one. */ { int i, n; #ifndef NO_INIT if (!protocols) HTAccessInit(); #endif n = HTList_count(protocols); for (i = 0; i < n; i++) { HTProtocol *p = (HTProtocol *)HTList_objectAt(protocols, i); if (!strcmp(p->name, acc_method)) { HTAnchor_setProtocol(anchor, p); FREE(acc_method); return (HT_OK); } } } FREE(acc_method); return HT_NO_ACCESS; } /* * Temporarily set the int UCLYhndl_for_unspec and string * UCLYhndl_for_unspec used for charset "assuming" to the values * implied by a HTParentAnchor's UCStages, after saving the current * values for later restoration. - kw * @@@ These functions may not really belong here, but where else? * I want the "pop" to occur as soon as possible after loading * has finished. - kw @@@ */ extern char*UCAssume_MIMEcharset; PUBLIC void LYUCPushAssumed ARGS1( HTParentAnchor *, anchor) { int anchor_LYhndl = -1; LYUCcharset * anchor_UCI = NULL; if (anchor) { anchor_LYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_PARSER); if (anchor_LYhndl >= 0) anchor_UCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_PARSER); if (anchor_UCI && anchor_UCI->MIMEname) { pushed_assume_MIMEname = UCAssume_MIMEcharset; UCAssume_MIMEcharset = NULL; StrAllocCopy(UCAssume_MIMEcharset, anchor_UCI->MIMEname); pushed_assume_LYhndl = anchor_LYhndl; /* some diagnostics */ if (UCLYhndl_for_unspec != anchor_LYhndl) CTRACE((tfp, "LYUCPushAssumed: UCLYhndl_for_unspec changed %d -> %d\n", UCLYhndl_for_unspec, anchor_LYhndl)); UCLYhndl_for_unspec = anchor_LYhndl; return; } } pushed_assume_LYhndl = -1; FREE(pushed_assume_MIMEname); } /* * Restore the int UCLYhndl_for_unspec and string * UCLYhndl_for_unspec used for charset "assuming" from the values * saved by LYUCPushAssumed, if any. - kw */ PUBLIC int LYUCPopAssumed NOARGS { if (pushed_assume_LYhndl >= 0) { /* some diagnostics */ if (UCLYhndl_for_unspec != pushed_assume_LYhndl) CTRACE((tfp, "LYUCPopAssumed: UCLYhndl_for_unspec changed %d -> %d\n", UCLYhndl_for_unspec, pushed_assume_LYhndl)); UCLYhndl_for_unspec = pushed_assume_LYhndl; pushed_assume_LYhndl = -1; FREE(UCAssume_MIMEcharset); UCAssume_MIMEcharset = pushed_assume_MIMEname; pushed_assume_MIMEname = NULL; return UCLYhndl_for_unspec; } return -1; } /* Load a document HTLoad() ** --------------- ** ** This is an internal routine, which has an address AND a matching ** anchor. (The public routines are called with one OR the other.) ** ** On entry, ** addr must point to the fully qualified hypertext reference. ** anchor a parent anchor with whose address is addr ** ** On exit, ** returns <0 Error has occurred. ** HT_LOADED Success ** HT_NO_DATA Success, but no document loaded. ** (telnet session started etc) */ PRIVATE int HTLoad ARGS4( CONST char *, addr, HTParentAnchor *, anchor, HTFormat, format_out, HTStream *, sink) { HTProtocol *p; int status = get_physical(addr, anchor); if (status == HT_FORBIDDEN) { /* prevent crash if telnet or similar was forbidden by rule. - kw */ LYFixCursesOn("show alert:"); return HTLoadError(sink, 500, gettext("Access forbidden by rule")); } else if (status == HT_REDIRECTING) { return status; /* fake redirection by rule, to redirecting_url */ } if (status < 0) return status; /* Can't resolve or forbidden */ /* prevent crash if telnet or similar mapped or proxied by rule. - kw */ LYFixCursesOnForAccess(addr, HTAnchor_physical(anchor)); p = (HTProtocol *)HTAnchor_protocol(anchor); anchor->underway = TRUE; /* Hack to deal with caching */ status= (*(p->load))(HTAnchor_physical(anchor), anchor, format_out, sink); anchor->underway = FALSE; LYUCPopAssumed(); return status; } /* Get a save stream for a document HTSaveStream() ** -------------------------------- */ PUBLIC HTStream *HTSaveStream ARGS1( HTParentAnchor *, anchor) { HTProtocol *p = (HTProtocol *)HTAnchor_protocol(anchor); if (!p) return NULL; return (*p->saveStream)(anchor); } PUBLIC int redirection_attempts = 0; /* counter in HTLoadDocument */ /* Load a document - with logging etc HTLoadDocument() ** ---------------------------------- ** ** - Checks or documents already loaded ** - Logs the access ** - Allows stdin filter option ** - Trace ouput and error messages ** ** On Entry, ** anchor is the node_anchor for the document ** full_address The address of the document to be accessed. ** filter if YES, treat stdin as HTML ** ** On Exit, ** returns YES Success in opening document ** NO Failure */ PRIVATE BOOL HTLoadDocument ARGS4( CONST char *, full_address, HTParentAnchor *, anchor, HTFormat, format_out, HTStream*, sink) { int status; HText * text; CONST char * address_to_load = full_address; char *cp; BOOL ForcingNoCache = LYforce_no_cache; CTRACE((tfp, "HTAccess: loading document %s\n", address_to_load)); /* ** Free use_this_url_instead and reset permanent_redirection ** if not done elsewhere. - FM */ FREE(use_this_url_instead); permanent_redirection = FALSE; /* ** Make sure some yoyo doesn't send us 'round in circles ** with redirecting URLs that point back to themselves. ** We'll set the original Lynx limit of 10 redirections ** per requested URL from a user, because the HTTP/1.1 ** will no longer specify a restriction to 5, but will ** leave it up to the browser's discretion, in deference ** to Microsoft. - FM */ if (redirection_attempts > 10) { redirection_attempts = 0; HTAlert(TOO_MANY_REDIRECTIONS); return NO; } /* * If this is marked as an internal link but we don't have the * document loaded any more, and we haven't explicitly flagged * that we want to reload with LYforce_no_cache, then something * has disappeared from the cache when we expected it to be still * there. The user probably doesn't expect a new network access. * So if we have POST data and safe is not set in the anchor, * ask for confirmation, and fail if not granted. The exception * are LYNXIMGMAP documents, for which we defer to LYLoadIMGmap * for prompting if necessary. - kw */ if (LYinternal_flag && !LYforce_no_cache && anchor->post_data && !anchor->safe && (text = (HText *)HTAnchor_document(anchor)) == NULL && strncmp(full_address, "LYNXIMGMAP:", 11) && HTConfirm(gettext("Document with POST content not found in cache. Resubmit?")) != TRUE) { return NO; } /* ** If we don't have POST content, check whether this is a previous ** redirecting URL, and keep re-checking until we get to the final ** destination or redirection limit. If we do have POST content, ** we didn't allow permanent redirection, and an interactive user ** will be deciding whether to keep redirecting. - FM */ if (!anchor->post_data) { while ((cp = HTAnchor_physical(anchor)) != NULL && !strncmp(cp, "Location=", 9)) { DocAddress NewDoc; CTRACE((tfp, "HTAccess: '%s' is a redirection URL.\n", anchor->address)); CTRACE((tfp, "HTAccess: Redirecting to '%s'\n", cp+9)); /* ** Don't exceed the redirection_attempts limit. - FM */ if (++redirection_attempts > 10) { HTAlert(TOO_MANY_REDIRECTIONS); redirection_attempts = 0; FREE(use_this_url_instead); return NO; } /* ** Set up the redirection. - FM **/ StrAllocCopy(use_this_url_instead, cp+9); NewDoc.address = use_this_url_instead; NewDoc.post_data = NULL; NewDoc.post_content_type = NULL; NewDoc.bookmark = anchor->bookmark; NewDoc.isHEAD = anchor->isHEAD; NewDoc.safe = anchor->safe; anchor = (HTParentAnchor *)HTAnchor_findAddress(&NewDoc); } } /* ** If we had previous redirection, go back and check out ** that the URL under the current restrictions. - FM */ if (use_this_url_instead) { FREE(redirecting_url); return(NO); } /* ** See if we can use an already loaded document. */ if (!LYforce_no_cache && (text = (HText *)HTAnchor_document(anchor))) { /* ** We have a cached rendition of the target document. ** Check if it's OK to re-use it. We consider it OK if: ** (1) the anchor does not have the no_cache element set, or ** (2) we've overridden it, e.g., because we are acting on ** a PREV_DOC command or a link in the History Page and ** it's not a reply from a POST with the LYresubmit_posts ** flag set, or ** (3) we are repositioning within the currently loaded document ** based on the target anchor's address (URL_Reference). * * If DONT_TRACK_INTERNAL_LINKS is defined, HText_AreDifferent() * is used to determine whether (3) applies. If the target address * differs from that of the current document only by a fragment * and the target address has an appended fragment, repositioning * without reloading is always assumed. * Note that HText_AreDifferent() currently always returns TRUE * if the target has a LYNXIMGMAP URL, so that an internally * generated pseudo-document will normally not be re-used unless * condition (2) applies. (Condition (1) cannot apply since in * LYMap.c, no_cache is always set in the anchor object). This * doesn't guarantee that the resource from which the MAP element * is taken will be read again (reloaded) when the list of links * for a client-side image map is regenerated, when in some cases * it should (e.g., user requested RELOAD, or HTTP response with * no-cache header and we are not overriding). * * If DONT_TRACK_INTERNAL_LINKS is undefined, a target address that * points to the same URL as the current document may still result in * reloading, depending on whether the original URL-Reference * was given as an internal link in the context of the previously * loaded document. HText_AreDifferent() is not used here for * testing whether we are just repositioning. For an internal * link, the potential callers of this function from mainloop() * down will either avoid making the call (and do the repositioning * differently) or set LYinternal_flag (or LYoverride_no_cache). * Note that (a) LYNXIMGMAP pseudo-documents and (b) The "List Page" * document are treated logically as being part of the document on * which they are based, for the purpose of whether to treat a link * as internal, but the logic for this (by setting LYinternal_flag * as necessary) is implemented elsewhere. There is a specific * test for LYNXIMGMAP here so that the generated pseudo-document * will not be re-used unless LYoverride_no_cache is set. The same * caveat as above applies w.r.t. reloading of the underlying * resource. * ** We also should be checking other aspects of cache ** regulation (e.g., based on an If-Modified-Since check, ** etc.) but the code for doing those other things isn't ** available yet. */ #ifdef DONT_TRACK_INTERNAL_LINKS if (LYoverride_no_cache || !HText_hasNoCacheSet(text) || !HText_AreDifferent(anchor, full_address)) #else if (LYoverride_no_cache || ((LYinternal_flag || !HText_hasNoCacheSet(text)) && strncmp(full_address, "LYNXIMGMAP:", 11))) #endif /* TRACK_INTERNAL_LINKS */ { CTRACE((tfp, "HTAccess: Document already in memory.\n")); HText_select(text); #ifdef DIRED_SUPPORT if (HTAnchor_format(anchor) == WWW_DIRED) lynx_edit_mode = TRUE; #endif redirection_attempts = 0; return YES; } else { ForcingNoCache = YES; CTRACE((tfp, "HTAccess: Auto-reloading document.\n")); } } /* ** Get the document from the net. If we are auto-reloading, ** the mutable anchor elements from the previous rendition ** should be freed in conjunction with loading of the new ** rendition. - FM */ LYforce_no_cache = NO; /* reset after each time through */ if (ForcingNoCache) { FREE(anchor->title); } status = HTLoad(address_to_load, anchor, format_out, sink); CTRACE((tfp, "HTAccess: status=%d\n", status)); /* ** Log the access if necessary. */ if (HTlogfile) { time_t theTime; time(&theTime); fprintf(HTlogfile, "%24.24s %s %s %s\n", ctime(&theTime), HTClientHost ? HTClientHost : "local", status < 0 ? "FAIL" : "GET", full_address); fflush(HTlogfile); /* Actually update it on disk */ CTRACE((tfp, "Log: %24.24s %s %s %s\n", ctime(&theTime), HTClientHost ? HTClientHost : "local", status < 0 ? "FAIL" : "GET", full_address)); } /* ** Check out what we received from the net. */ if (status == HT_REDIRECTING) { /* Exported from HTMIME.c, of all places. *//** NO!! - FM **/ /* ** Doing this via HTMIME.c meant that the redirection cover ** page was already loaded before we learned that we want a ** different URL. Also, changing anchor->address, as Lynx ** was doing, meant we could never again access its hash ** table entry, creating an insolvable memory leak. Instead, ** if we had a 301 status and set permanent_redirection, ** we'll load the new URL in anchor->physical, preceded by a ** token, which we can check to make replacements on subsequent ** access attempts. We'll check recursively, and retrieve the ** final URL if we had multiple redirections to it. If we just ** went to HTLoad now, as Lou originally had this, we couldn't do ** Lynx's security checks and alternate handling of some URL types. ** So, instead, we'll go all the way back to the top of getfile ** in LYGetFile.c when the status is HT_REDIRECTING. This may ** seem bizarre, but it works like a charm! - FM */ CTRACE((tfp, "HTAccess: '%s' is a redirection URL.\n", address_to_load)); CTRACE((tfp, "HTAccess: Redirecting to '%s'\n", redirecting_url)); /* ** Prevent circular references. */ if (strcmp(address_to_load, redirecting_url)) { /* if different */ /* ** Load token and redirecting url into anchor->physical ** if we had 301 Permanent redirection. HTTP.c does not ** allow this if we have POST content. - FM */ if (permanent_redirection) { StrAllocCopy(anchor->physical, "Location="); StrAllocCat(anchor->physical, redirecting_url); } /* ** Set up flags before return to getfile. - FM */ StrAllocCopy(use_this_url_instead, redirecting_url); if (ForcingNoCache) LYforce_no_cache = YES; ++redirection_attempts; FREE(redirecting_url); permanent_redirection = FALSE; return(NO); } ++redirection_attempts; FREE(redirecting_url); permanent_redirection = FALSE; return(YES); } /* ** We did not receive a redirecting URL. - FM */ redirection_attempts = 0; FREE(redirecting_url); permanent_redirection = FALSE; if (status == HT_LOADED) { CTRACE((tfp, "HTAccess: `%s' has been accessed.\n", full_address)); return YES; } if (status == HT_PARTIAL_CONTENT) { HTAlert(gettext("Loading incomplete.")); CTRACE((tfp, "HTAccess: `%s' has been accessed, partial content.\n", full_address)); return YES; } if (status == HT_NO_DATA) { CTRACE((tfp, "HTAccess: `%s' has been accessed, No data left.\n", full_address)); return NO; } if (status == HT_NOT_LOADED) { CTRACE((tfp, "HTAccess: `%s' has been accessed, No data loaded.\n", full_address)); return NO; } if (status == HT_INTERRUPTED) { CTRACE((tfp, "HTAccess: `%s' has been accessed, transfer interrupted.\n", full_address)); return NO; } if (status > 0) { /* ** If you get this, then please find which routine is returning ** a positive unrecognized error code! */ fprintf(stderr, gettext("**** HTAccess: socket or file number returned by obsolete load routine!\n")); fprintf(stderr, gettext("**** HTAccess: Internal software error. Please mail lynx-dev@sig.net!\n")); fprintf(stderr, gettext("**** HTAccess: Status returned was: %d\n"),status); exit(-1); } /* Failure in accessing a document */ cp = NULL; StrAllocCopy(cp, gettext("Can't Access")); StrAllocCat(cp, " `"); StrAllocCat(cp, full_address); StrAllocCat(cp, "'"); _HTProgress(cp); FREE(cp); CTRACE((tfp, "HTAccess: Can't access `%s'\n", full_address)); HTLoadError(sink, 500, gettext("Unable to access document.")); return NO; } /* HTLoadDocument */ /* Load a document from absolute name. HTLoadAbsolute() ** ----------------------------------- ** ** On Entry, ** addr The absolute address of the document to be accessed. ** filter if YES, treat document as HTML ** ** On Exit, ** returns YES Success in opening document ** NO Failure */ PUBLIC BOOL HTLoadAbsolute ARGS1( CONST DocAddress *, docaddr) { return HTLoadDocument(docaddr->address, HTAnchor_parent(HTAnchor_findAddress(docaddr)), (HTOutputFormat ? HTOutputFormat : WWW_PRESENT), HTOutputStream); } #ifdef NOT_USED_CODE /* Load a document from absolute name to stream. HTLoadToStream() ** --------------------------------------------- ** ** On Entry, ** addr The absolute address of the document to be accessed. ** sink if non-NULL, send data down this stream ** ** On Exit, ** returns YES Success in opening document ** NO Failure */ PUBLIC BOOL HTLoadToStream ARGS3( CONST char *, addr, BOOL, filter, HTStream *, sink) { return HTLoadDocument(addr, HTAnchor_parent(HTAnchor_findAddress(addr)), (HTOutputFormat ? HTOutputFormat : WWW_PRESENT), sink); } #endif /* NOT_USED_CODE */ /* Load a document from relative name. HTLoadRelative() ** ----------------------------------- ** ** On Entry, ** relative_name The relative address of the document ** to be accessed. ** ** On Exit, ** returns YES Success in opening document ** NO Failure */ PUBLIC BOOL HTLoadRelative ARGS2( CONST char *, relative_name, HTParentAnchor *, here) { DocAddress full_address; BOOL result; char * mycopy = NULL; char * stripped = NULL; char * current_address = HTAnchor_address((HTAnchor*)here); full_address.address = NULL; full_address.post_data = NULL; full_address.post_content_type = NULL; full_address.bookmark = NULL; full_address.isHEAD = FALSE; full_address.safe = FALSE; StrAllocCopy(mycopy, relative_name); stripped = HTStrip(mycopy); full_address.address = HTParse(stripped, current_address, PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION); result = HTLoadAbsolute(&full_address); /* ** If we got redirection, result will be NO, but use_this_url_instead ** will be set. The calling routine should check both and do whatever ** is appropriate. - FM */ FREE(full_address.address); FREE(current_address); FREE(mycopy); /* Memory leak fixed 10/7/92 -- JFG */ return result; } /* Load if necessary, and select an anchor. HTLoadAnchor() ** ---------------------------------------- ** ** On Entry, ** destination The child or parent anchor to be loaded. ** ** On Exit, ** returns YES Success ** NO Failure */ PUBLIC BOOL HTLoadAnchor ARGS1( HTAnchor *, destination) { HTParentAnchor * parent; BOOL loaded = NO; if (!destination) return NO; /* No link */ parent = HTAnchor_parent(destination); if (HTAnchor_document(parent) == NULL) { /* If not already loaded */ /* TBL 921202 */ BOOL result; char * address = HTAnchor_address((HTAnchor*) parent); result = HTLoadDocument(address, parent, HTOutputFormat ? HTOutputFormat : WWW_PRESENT, HTOutputStream); FREE(address); if (!result) return NO; loaded = YES; } { HText *text = (HText*)HTAnchor_document(parent); if (destination != (HTAnchor *)parent) { /* If child anchor */ HText_selectAnchor(text, /* Double display? @@ */ (HTChildAnchor*)destination); } else { if (!loaded) HText_select(text); } } return YES; } /* HTLoadAnchor */ /* Search. HTSearch() ** ------- ** ** Performs a keyword search on word given by the user. Adds the ** keyword to the end of the current address and attempts to open ** the new address. ** ** On Entry, ** *keywords space-separated keyword list or similar search list ** here is anchor search is to be done on. */ PRIVATE char hex ARGS1( int, i) { char * hexchars = "0123456789ABCDEF"; return hexchars[i]; } PUBLIC BOOL HTSearch ARGS2( CONST char *, keywords, HTParentAnchor *, here) { #define acceptable \ "1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_" char *q, *u; CONST char * p, *s, *e; /* Pointers into keywords */ char * address = NULL; BOOL result; char * escaped = (char *)calloc(1, ((strlen(keywords)*3) + 1)); static CONST BOOL isAcceptable[96] = /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ { 0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0, /* 2x !"#$%&'()*+,-./ */ 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 4x @ABCDEFGHIJKLMNO */ 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* 5X PQRSTUVWXYZ[\]^_ */ 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 6x `abcdefghijklmno */ 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; /* 7X pqrstuvwxyz{\}~ DEL */ if (escaped == NULL) outofmem(__FILE__, "HTSearch"); StrAllocCopy(address, here->isIndexAction); /* ** Convert spaces to + and hex escape unacceptable characters. */ for (s = keywords; *s && WHITE(*s); s++) /* Scan */ ; /* Skip white space */ for (e = s + strlen(s); e > s && WHITE(*(e-1)); e--) /* Scan */ ; /* Skip trailers */ for (q = escaped, p = s; p < e; p++) { /* Scan stripped field */ unsigned char c = (unsigned char)TOASCII(*p); if (WHITE(*p)) { *q++ = '+'; } else if (HTCJK != NOCJK) { *q++ = *p; } else if (c>=32 && c<=(unsigned char)127 && isAcceptable[c-32]) { *q++ = *p; /* 930706 TBL for MVS bug */ } else { *q++ = '%'; *q++ = hex((int)(c >> 4)); *q++ = hex((int)(c & 15)); } } /* Loop over string */ *q = '\0'; /* Terminate escaped string */ u = strchr(address, '?'); /* Find old search string */ if (u != NULL) *u = '\0'; /* Chop old search off */ StrAllocCat(address, "?"); StrAllocCat(address, escaped); FREE(escaped); result = HTLoadRelative(address, here); FREE(address); /* ** If we got redirection, result will be NO, but use_this_url_instead ** will be set. The calling routine should check both and do whatever ** is appropriate. Only an http server (not a gopher or wais server) ** could return redirection. Lynx will go all the way back to its ** mainloop() and subject a redirecting URL to all of its security and ** restrictions checks. - FM */ return result; } /* Search Given Indexname. HTSearchAbsolute() ** ----------------------- ** ** Performs a keyword search on word given by the user. Adds the ** keyword to the end of the current address and attempts to open ** the new address. ** ** On Entry, ** *keywords space-separated keyword list or similar search list ** *indexname is name of object search is to be done on. */ PUBLIC BOOL HTSearchAbsolute ARGS2( CONST char *, keywords, char *, indexname) { DocAddress abs_doc; HTParentAnchor * anchor; abs_doc.address = indexname; abs_doc.post_data = NULL; abs_doc.post_content_type = NULL; abs_doc.bookmark = NULL; abs_doc.isHEAD = FALSE; abs_doc.safe = FALSE; anchor = (HTParentAnchor*)HTAnchor_findAddress(&abs_doc); return HTSearch(keywords, anchor); } #ifdef NOT_USED_CODE /* Generate the anchor for the home page. HTHomeAnchor() ** -------------------------------------- ** ** As it involves file access, this should only be done once ** when the program first runs. ** This is a default algorithm -- browser don't HAVE to use this. ** But consistency between browsers is STRONGLY recommended! ** ** Priority order is: ** 1 WWW_HOME environment variable (logical name, etc) ** 2 ~/WWW/default.html ** 3 /usr/local/bin/default.html ** 4 http://www.w3.org/default.html */ PUBLIC HTParentAnchor * HTHomeAnchor NOARGS { char * my_home_document = NULL; char * home = (char *)getenv(LOGICAL_DEFAULT); char * ref; HTParentAnchor * anchor; if (home) { StrAllocCopy(my_home_document, home); #define MAX_FILE_NAME 1024 /* @@@ */ } else if (HTClientHost) { /* Telnet server */ /* ** Someone telnets in, they get a special home. */ FILE * fp = fopen(REMOTE_POINTER, "r"); char * status; if (fp) { my_home_document = (char*)calloc(1, MAX_FILE_NAME); if (my_home_document == NULL) outofmem(__FILE__, "HTHomeAnchor"); status = fgets(my_home_document, MAX_FILE_NAME, fp); if (!status) { FREE(my_home_document); } fclose(fp); } if (my_home_document == NULL) StrAllocCopy(my_home_document, REMOTE_ADDRESS); } #ifdef UNIX if (my_home_document == NULL) { FILE * fp = NULL; CONST char * home = (CONST char*)getenv("HOME"); if (home != null) { HTSprintf0(&my_home_document, "%s/%s", home, PERSONAL_DEFAULT); fp = fopen(my_home_document, "r"); } if (!fp) { StrAllocCopy(my_home_document, LOCAL_DEFAULT_FILE); fp = fopen(my_home_document, "r"); } if (fp) { fclose(fp); } else { CTRACE((tfp, "HTBrowse: No local home document ~/%s or %s\n", PERSONAL_DEFAULT, LOCAL_DEFAULT_FILE)); FREE(my_home_document); } } #endif /* UNIX */ ref = HTParse((my_home_document ? my_home_document : (HTClientHost ? REMOTE_ADDRESS : LAST_RESORT)), "file:", PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION); if (my_home_document) { CTRACE((tfp, "HTAccess: Using custom home page %s i.e., address %s\n", my_home_document, ref)); FREE(my_home_document); } anchor = (HTParentAnchor*)HTAnchor_findAddress(ref); FREE(ref); return anchor; } #endif /* NOT_USED_CODE */