/*
* $LynxId: HTAccess.c,v 1.85 2019/08/24 00:27:06 tom Exp $
*
* Access Manager HTAccess.c
* ==============
*
* Authors
* TBL Tim Berners-Lee timbl@info.cern.ch
* JFG Jean-Francois Groff jfg@dxcern.cern.ch
* DD Denis DeLaRoca (310) 825-4580 <CSP1DWD@mvs.oac.ucla.edu>
* FM Foteos Macrides macrides@sci.wfeb.edu
* PDM Danny Mayer mayer@ljo.dec.com
*
* History
* 8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL
* 26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG
* 6 Oct 92 Moved HTClientHost and logfile into here. TBL
* 17 Dec 92 Tn3270 added, bug fix. DD
* 4 Feb 93 Access registration, Search escapes bad chars TBL
* PARAMETERS TO HTSEARCH AND HTLOADRELATIVE CHANGED
* 28 May 93 WAIS gateway explicit if no WAIS library linked in.
* 31 May 94 Added DIRECT_WAIS support for VMS. FM
* 27 Jan 95 Fixed proxy support to use NNTPSERVER for checking
* whether or not to use the proxy server. PDM
* 27 Jan 95 Ensured that proxy service will be overridden for files
* on the local host (because HTLoadFile() doesn't try ftp
* for those) and will substitute ftp for remote files. FM
* 28 Jan 95 Tweaked PDM's proxy override mods to handle port info
* for news and wais URL's. FM
*
* Bugs
* This module assumes that that the graphic object is hypertext, as it
* needs to select it when it has been loaded. A superclass needs to be
* defined which accepts select and select_anchor.
*/
#ifdef VMS
#define DIRECT_WAIS
#endif /* VMS */
#include <HTUtils.h>
#include <HTTP.h>
#include <HTAlert.h>
/*
* Implements:
*/
#include <HTAccess.h>
/*
* Uses:
*/
#include <HTParse.h>
#include <HTML.h> /* SCW */
#ifndef NO_RULES
#include <HTRules.h>
#endif
#include <HTList.h>
#include <HText.h> /* See bugs above */
#include <HTCJK.h>
#include <UCMap.h>
#include <GridText.h>
#include <LYGlobalDefs.h>
#include <LYexit.h>
#include <LYStrings.h>
#include <LYUtils.h>
#include <LYLeaks.h>
/*
* These flags may be set to modify the operation of this module
*/
char *HTClientHost = NULL; /* Name of remote login host if any */
FILE *HTlogfile = NULL; /* File to which to output one-liners */
BOOL HTSecure = NO; /* Disable access for telnet users? */
BOOL HTPermitRedir = NO; /* Always allow redirection in getfile()? */
BOOL using_proxy = NO; /* are we using a proxy gateway? */
/*
* To generate other things, play with these:
*/
HTFormat HTOutputFormat = NULL;
HTStream *HTOutputStream = NULL; /* For non-interactive, set this */
static HTList *protocols = NULL; /* List of registered protocol descriptors */
char *use_this_url_instead = NULL;
static int pushed_assume_LYhndl = -1; /* see LYUC* functions below - kw */
static char *pushed_assume_MIMEname = NULL;
#ifdef LY_FIND_LEAKS
static void free_protocols(void)
{
HTList_delete(protocols);
protocols = NULL;
FREE(pushed_assume_MIMEname); /* shouldn't happen, just in case - kw */
}
#endif /* LY_FIND_LEAKS */
/* Register a Protocol. HTRegisterProtocol()
* --------------------
*/
BOOL HTRegisterProtocol(HTProtocol * protocol)
{
if (!protocols) {
protocols = HTList_new();
#ifdef LY_FIND_LEAKS
atexit(free_protocols);
#endif
}
HTList_addObject(protocols, protocol);
return YES;
}
/* Register all known protocols. HTAccessInit()
* -----------------------------
*
* Add to or subtract from this list if you add or remove protocol
* modules. This routine is called the first time the protocol list
* is needed, unless any protocols are already registered, in which
* case it is not called. Therefore the application can override
* this list.
*
* Compiling with NO_INIT prevents all known protocols from being
* forced in at link time.
*/
#ifndef NO_INIT
#ifdef GLOBALREF_IS_MACRO
extern GLOBALREF (HTProtocol, HTTP);
extern GLOBALREF (HTProtocol, HTTPS);
extern GLOBALREF (HTProtocol, HTFile);
extern GLOBALREF (HTProtocol, HTTelnet);
extern GLOBALREF (HTProtocol, HTTn3270);
extern GLOBALREF (HTProtocol, HTRlogin);
#ifndef DECNET
#ifndef DISABLE_FTP
extern GLOBALREF (HTProtocol, HTFTP);
#endif /* DISABLE_FTP */
#ifndef DISABLE_NEWS
extern GLOBALREF (HTProtocol, HTNews);
extern GLOBALREF (HTProtocol, HTNNTP);
extern GLOBALREF (HTProtocol, HTNewsPost);
extern GLOBALREF (HTProtocol, HTNewsReply);
extern GLOBALREF (HTProtocol, HTSNews);
extern GLOBALREF (HTProtocol, HTSNewsPost);
extern GLOBALREF (HTProtocol, HTSNewsReply);
#endif /* not DISABLE_NEWS */
#ifndef DISABLE_GOPHER
extern GLOBALREF (HTProtocol, HTGopher);
extern GLOBALREF (HTProtocol, HTCSO);
#endif /* not DISABLE_GOPHER */
#ifndef DISABLE_FINGER
extern GLOBALREF (HTProtocol, HTFinger);
#endif /* not DISABLE_FINGER */
#ifdef DIRECT_WAIS
extern GLOBALREF (HTProtocol, HTWAIS);
#endif /* DIRECT_WAIS */
#endif /* !DECNET */
#else
GLOBALREF HTProtocol HTTP, HTTPS, HTFile, HTTelnet, HTTn3270, HTRlogin;
#ifndef DECNET
#ifndef DISABLE_FTP
GLOBALREF HTProtocol HTFTP;
#endif /* DISABLE_FTP */
#ifndef DISABLE_NEWS
GLOBALREF HTProtocol HTNews, HTNNTP, HTNewsPost, HTNewsReply;
GLOBALREF HTProtocol HTSNews, HTSNewsPost, HTSNewsReply;
#endif /* not DISABLE_NEWS */
#ifndef DISABLE_GOPHER
GLOBALREF HTProtocol HTGopher, HTCSO;
#endif /* not DISABLE_GOPHER */
#ifndef DISABLE_FINGER
GLOBALREF HTProtocol HTFinger;
#endif /* not DISABLE_FINGER */
#ifdef DIRECT_WAIS
GLOBALREF HTProtocol HTWAIS;
#endif /* DIRECT_WAIS */
#endif /* !DECNET */
#endif /* GLOBALREF_IS_MACRO */
static void HTAccessInit(void) /* Call me once */
{
HTRegisterProtocol(&HTTP);
HTRegisterProtocol(&HTTPS);
HTRegisterProtocol(&HTFile);
HTRegisterProtocol(&HTTelnet);
HTRegisterProtocol(&HTTn3270);
HTRegisterProtocol(&HTRlogin);
#ifndef DECNET
#ifndef DISABLE_FTP
HTRegisterProtocol(&HTFTP);
#endif /* DISABLE_FTP */
#ifndef DISABLE_NEWS
HTRegisterProtocol(&HTNews);
HTRegisterProtocol(&HTNNTP);
HTRegisterProtocol(&HTNewsPost);
HTRegisterProtocol(&HTNewsReply);
HTRegisterProtocol(&HTSNews);
HTRegisterProtocol(&HTSNewsPost);
HTRegisterProtocol(&HTSNewsReply);
#endif /* not DISABLE_NEWS */
#ifndef DISABLE_GOPHER
HTRegisterProtocol(&HTGopher);
HTRegisterProtocol(&HTCSO);
#endif /* not DISABLE_GOPHER */
#ifndef DISABLE_FINGER
HTRegisterProtocol(&HTFinger);
#endif /* not DISABLE_FINGER */
#ifdef DIRECT_WAIS
HTRegisterProtocol(&HTWAIS);
#endif /* DIRECT_WAIS */
#endif /* !DECNET */
LYRegisterLynxProtocols();
}
#endif /* !NO_INIT */
/* Check for proxy override. override_proxy()
* -------------------------
*
* Check the no_proxy environment variable to get the list
* of hosts for which proxy server is not consulted.
*
* no_proxy is a comma- or space-separated list of machine
* or domain names, with optional :port part. If no :port
* part is present, it applies to all ports on that domain.
*
* Example:
* no_proxy="cern.ch,some.domain:8001"
*
* Use "*" to override all proxy service:
* no_proxy="*"
*/
BOOL override_proxy(const char *addr)
{
const char *no_proxy = getenv("no_proxy");
char *p = NULL;
char *at = NULL;
char *host = NULL;
char *Host = NULL;
char *acc_method = NULL;
int port = 0;
int h_len = 0;
/*
* Check for global override.
*/
if (no_proxy) {
if (!strcmp(no_proxy, "*"))
return YES;
}
/*
* Never proxy file:// URLs if they are on the local host. HTLoadFile()
* will not attempt ftp for those if direct access fails. We'll check that
* first, in case no_proxy hasn't been defined. - FM
*/
if (!addr)
return NO;
if (!(host = HTParse(addr, "", PARSE_HOST)))
return NO;
if (!*host) {
FREE(host);
return NO;
}
Host = (((at = StrChr(host, '@')) != NULL) ? (at + 1) : host);
if ((acc_method = HTParse(addr, "", PARSE_ACCESS))) {
if (!strcmp("file", acc_method) &&
(LYSameHostname(Host, "localhost") ||
LYSameHostname(Host, HTHostName()))) {
FREE(host);
FREE(acc_method);
return YES;
}
FREE(acc_method);
}
if (!no_proxy) {
FREE(host);
return NO;
}
if (NULL != (p = HTParsePort(Host, &port))) { /* Port specified */
*p = 0; /* Chop off port */
} else { /* Use default port */
acc_method = HTParse(addr, "", PARSE_ACCESS);
if (acc_method != NULL) {
/* *INDENT-OFF* */
if (!strcmp(acc_method, "http")) port = 80;
else if (!strcmp(acc_method, "https")) port = 443;
else if (!strcmp(acc_method, "ftp")) port = 21;
#ifndef DISABLE_GOPHER
else if (!strcmp(acc_method, "gopher")) port = 70;
#endif
else if (!strcmp(acc_method, "cso")) port = 105;
#ifndef DISABLE_NEWS
else if (!strcmp(acc_method, "news")) port = 119;
else if (!strcmp(acc_method, "nntp")) port = 119;
else if (!strcmp(acc_method, "newspost")) port = 119;
else if (!strcmp(acc_method, "newsreply")) port = 119;
else if (!strcmp(acc_method, "snews")) port = 563;
else if (!strcmp(acc_method, "snewspost")) port = 563;
else if (!strcmp(acc_method, "snewsreply")) port = 563;
#endif
else if (!strcmp(acc_method, "wais")) port = 210;
#ifndef DISABLE_FINGER
else if (!strcmp(acc_method, "finger")) port = 79;
#endif
else if (!strcmp(acc_method, "telnet")) port = 23;
else if (!strcmp(acc_method, "tn3270")) port = 23;
else if (!strcmp(acc_method, "rlogin")) port = 513;
/* *INDENT-ON* */
FREE(acc_method);
}
}
if (!port)
port = 80; /* Default */
h_len = (int) strlen(Host);
while (*no_proxy) {
const char *end;
const char *colon = NULL;
int templ_port = 0;
int t_len;
int brackets = 0;
while (*no_proxy && (WHITE(*no_proxy) || *no_proxy == ','))
no_proxy++; /* Skip whitespace and separators */
end = no_proxy;
while (*end && !WHITE(*end) && *end != ',') { /* Find separator */
if (!brackets && (*end == ':'))
colon = end; /* Port number given */
else if (*end == '[')
++brackets;
else if (*end == ']')
--brackets;
end++;
}
if (colon) {
/* unlike HTParsePort(), this may be followed by non-digits */
templ_port = atoi(colon + 1);
t_len = (int) (colon - no_proxy);
} else {
t_len = (int) (end - no_proxy);
}
if ((!templ_port || templ_port == port) &&
(t_len > 0 && t_len <= h_len &&
!strncasecomp(Host + h_len - t_len, no_proxy, t_len))) {
FREE(host);
return YES;
}
#ifdef CJK_EX /* ASATAKU PROXY HACK */
if ((!templ_port || templ_port == port) &&
(t_len > 0 && t_len <= h_len &&
isdigit(UCH(*no_proxy)) &&
!StrNCmp(host, no_proxy, t_len))) {
FREE(host);
return YES;
}
#endif /* ASATAKU PROXY HACK */
if (*end)
no_proxy = (end + 1);
else
break;
}
FREE(host);
return NO;
}
/* Find physical name and access protocol get_physical()
* --------------------------------------
*
* On entry,
* addr must point to the fully qualified hypertext reference.
* anchor a parent anchor with whose address is addr
*
* On exit,
* returns HT_NO_ACCESS Error has occurred.
* HT_OK Success
*/
static int get_physical(const char *addr,
HTParentAnchor *anchor)
{
int result;
char *acc_method = NULL; /* Name of access method */
char *physical = NULL;
char *Server_addr = NULL;
BOOL override_flag = NO;
CTRACE((tfp, "get_physical %s\n", addr));
/*
* Make sure the using_proxy variable is FALSE.
*/
using_proxy = NO;
#ifndef NO_RULES
if ((physical = HTTranslate(addr)) == 0) {
if (redirecting_url) {
return HT_REDIRECTING;
}
return HT_FORBIDDEN;
}
if (anchor->isISMAPScript == TRUE) {
StrAllocCat(physical, "?0,0");
CTRACE((tfp, "HTAccess: Appending '?0,0' coordinate pair.\n"));
}
if (!StrNCmp(physical, "Proxied=", 8)) {
HTAnchor_setPhysical(anchor, physical + 8);
using_proxy = YES;
} else if (!StrNCmp(physical, "NoProxy=", 8)) {
HTAnchor_setPhysical(anchor, physical + 8);
override_flag = YES;
} else {
HTAnchor_setPhysical(anchor, physical);
}
FREE(physical); /* free our copy */
#else
if (anchor->isISMAPScript == TRUE) {
StrAllocCopy(physical, addr);
StrAllocCat(physical, "?0,0");
CTRACE((tfp, "HTAccess: Appending '?0,0' coordinate pair.\n"));
HTAnchor_setPhysical(anchor, physical);
FREE(physical); /* free our copy */
} else {
HTAnchor_setPhysical(anchor, addr);
}
#endif /* NO_RULES */
acc_method = HTParse(HTAnchor_physical(anchor), STR_FILE_URL, PARSE_ACCESS);
/*
* Check whether gateway access has been set up for this.
*
* This function can be replaced by the rule system above.
*
* If the rule system has already determined that we should use a proxy, or
* that we shouldn't, ignore proxy-related settings, don't use no_proxy
* either.
*/
#define USE_GATEWAYS
#ifdef USE_GATEWAYS
if (!override_flag && !using_proxy) { /* else ignore no_proxy env var */
char *host = NULL;
int port;
if (!strcasecomp(acc_method, "news")) {
/*
* News is different, so we need to check the name of the server,
* as well as the default port for selective exclusions.
*/
if ((host = HTParse(addr, "", PARSE_HOST))) {
if (HTParsePort(host, &port) == NULL) {
StrAllocCopy(Server_addr, "news://");
StrAllocCat(Server_addr, host);
StrAllocCat(Server_addr, ":119/");
}
FREE(host);
} else if (LYGetEnv("NNTPSERVER") != NULL) {
StrAllocCopy(Server_addr, "news://");
StrAllocCat(Server_addr, LYGetEnv("NNTPSERVER"));
StrAllocCat(Server_addr, ":119/");
}
} else if (!strcasecomp(acc_method, "wais")) {
/*
* Wais also needs checking of the default port for selective
* exclusions.
*/
if ((host = HTParse(addr, "", PARSE_HOST))) {
if (!(HTParsePort(host, &port))) {
StrAllocCopy(Server_addr, "wais://");
StrAllocCat(Server_addr, host);
StrAllocCat(Server_addr, ":210/");
}
FREE(host);
} else
StrAllocCopy(Server_addr, addr);
} else {
StrAllocCopy(Server_addr, addr);
}
override_flag = override_proxy(Server_addr);
}
if (!override_flag && !using_proxy) {
char *gateway_parameter = NULL, *gateway, *proxy;
/*
* Search for gateways.
*/
HTSprintf0(&gateway_parameter, "WWW_%s_GATEWAY", acc_method);
gateway = LYGetEnv(gateway_parameter); /* coerce for decstation */
/*
* Search for proxy servers.
*/
if (!strcmp(acc_method, "file"))
/*
* If we got to here, a file URL is for ftp on a remote host. - FM
*/
strcpy(gateway_parameter, "ftp_proxy");
else
sprintf(gateway_parameter, "%s_proxy", acc_method);
proxy = LYGetEnv(gateway_parameter);
FREE(gateway_parameter);
if (gateway)
CTRACE((tfp, "Gateway found: %s\n", gateway));
if (proxy)
CTRACE((tfp, "proxy server found: %s\n", proxy));
/*
* Proxy servers have precedence over gateway servers.
*/
if (proxy) {
char *gatewayed = NULL;
StrAllocCopy(gatewayed, proxy);
if (!StrNCmp(gatewayed, "http", 4)) {
char *cp = strrchr(gatewayed, '/');
/* Append a slash to the proxy specification if it doesn't
* end in one but otherwise looks normal (starts with "http",
* has no '/' other than ones before the hostname). - kw */
if (cp && (cp - gatewayed) <= 7)
LYAddHtmlSep(&gatewayed);
}
/*
* Ensure that the proxy server uses ftp for file URLs. - FM
*/
if (!StrNCmp(addr, "file", 4)) {
StrAllocCat(gatewayed, "ftp");
StrAllocCat(gatewayed, (addr + 4));
} else
StrAllocCat(gatewayed, addr);
using_proxy = YES;
if (anchor->isISMAPScript == TRUE)
StrAllocCat(gatewayed, "?0,0");
HTAnchor_setPhysical(anchor, gatewayed);
FREE(gatewayed);
FREE(acc_method);
acc_method = HTParse(HTAnchor_physical(anchor),
STR_HTTP_URL, PARSE_ACCESS);
} else if (gateway) {
char *path = HTParse(addr, "",
PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION);
/* Chop leading / off to make host into part of path */
char *gatewayed = HTParse(path + 1, gateway, PARSE_ALL);
FREE(path);
HTAnchor_setPhysical(anchor, gatewayed);
FREE(gatewayed);
FREE(acc_method);
acc_method = HTParse(HTAnchor_physical(anchor),
STR_HTTP_URL, PARSE_ACCESS);
}
}
FREE(Server_addr);
#endif /* use gateways */
/*
* Search registered protocols to find suitable one.
*/
result = HT_NO_ACCESS;
{
int i, n;
#ifndef NO_INIT
if (!protocols)
HTAccessInit();
#endif
n = HTList_count(protocols);
for (i = 0; i < n; i++) {
HTProtocol *p = (HTProtocol *) HTList_objectAt(protocols, i);
if (!strcmp(p->name, acc_method)) {
HTAnchor_setProtocol(anchor, p);
FREE(acc_method);
result = HT_OK;
break;
}
}
}
FREE(acc_method);
return result;
}
/*
* Temporarily set the int UCLYhndl_for_unspec and string UCLYhndl_for_unspec
* used for charset "assuming" to the values implied by a HTParentAnchor's
* UCStages, after saving the current values for later restoration. - kw @@@
* These functions may not really belong here, but where else? I want the
* "pop" to occur as soon as possible after loading has finished. - kw @@@
*/
void LYUCPushAssumed(HTParentAnchor *anchor)
{
int anchor_LYhndl = -1;
LYUCcharset *anchor_UCI = NULL;
if (anchor) {
anchor_LYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_PARSER);
if (anchor_LYhndl >= 0)
anchor_UCI = HTAnchor_getUCInfoStage(anchor,
UCT_STAGE_PARSER);
if (anchor_UCI && anchor_UCI->MIMEname) {
pushed_assume_MIMEname = UCAssume_MIMEcharset;
UCAssume_MIMEcharset = NULL;
if (HTCJK == JAPANESE)
StrAllocCopy(UCAssume_MIMEcharset, pushed_assume_MIMEname);
else
StrAllocCopy(UCAssume_MIMEcharset, anchor_UCI->MIMEname);
pushed_assume_LYhndl = anchor_LYhndl;
/* some diagnostics */
if (UCLYhndl_for_unspec != anchor_LYhndl)
CTRACE((tfp,
"LYUCPushAssumed: UCLYhndl_for_unspec changed %d -> %d\n",
UCLYhndl_for_unspec,
anchor_LYhndl));
UCLYhndl_for_unspec = anchor_LYhndl;
return;
}
}
pushed_assume_LYhndl = -1;
FREE(pushed_assume_MIMEname);
}
/*
* Restore the int UCLYhndl_for_unspec and string UCLYhndl_for_unspec used for
* charset "assuming" from the values saved by LYUCPushAssumed, if any. - kw
*/
int LYUCPopAssumed(void)
{
if (pushed_assume_LYhndl >= 0) {
/* some diagnostics */
if (UCLYhndl_for_unspec != pushed_assume_LYhndl)
CTRACE((tfp,
"LYUCPopAssumed: UCLYhndl_for_unspec changed %d -> %d\n",
UCLYhndl_for_unspec,
pushed_assume_LYhndl));
UCLYhndl_for_unspec = pushed_assume_LYhndl;
pushed_assume_LYhndl = -1;
FREE(UCAssume_MIMEcharset);
UCAssume_MIMEcharset = pushed_assume_MIMEname;
pushed_assume_MIMEname = NULL;
return UCLYhndl_for_unspec;
}
return -1;
}
/* Load a document HTLoad()
* ---------------
*
* This is an internal routine, which has an address AND a matching
* anchor. (The public routines are called with one OR the other.)
*
* On entry,
* addr must point to the fully qualified hypertext reference.
* anchor a parent anchor with whose address is addr
*
* On exit,
* returns <0 Error has occurred.
* HT_LOADED Success
* HT_NO_DATA Success, but no document loaded.
* (telnet session started etc)
*/
static int HTLoad(const char *addr,
HTParentAnchor *anchor,
HTFormat format_out,
HTStream *sink)
{
HTProtocol *p;
int status = get_physical(addr, anchor);
if (reloading) {
FREE(anchor->charset);
FREE(anchor->UCStages);
}
if (status == HT_FORBIDDEN) {
/* prevent crash if telnet or similar was forbidden by rule. - kw */
LYFixCursesOn("show alert:");
status = HTLoadError(sink, 500, gettext("Access forbidden by rule"));
} else if (status == HT_REDIRECTING) {
; /* fake redirection by rule, to redirecting_url */
} else if (status >= 0) {
/* prevent crash if telnet or similar mapped or proxied by rule. - kw */
LYFixCursesOnForAccess(addr, HTAnchor_physical(anchor));
p = (HTProtocol *) HTAnchor_protocol(anchor);
anchor->parent->underway = TRUE; /* Hack to deal with caching */
status = p->load(HTAnchor_physical(anchor),
anchor, format_out, sink);
anchor->parent->underway = FALSE;
LYUCPopAssumed();
}
return status;
}
/* Get a save stream for a document HTSaveStream()
* --------------------------------
*/
HTStream *HTSaveStream(HTParentAnchor *anchor)
{
HTProtocol *p = (HTProtocol *) HTAnchor_protocol(anchor);
if (!p)
return NULL;
return p->saveStream(anchor);
}
int redirection_limit = 10;
int redirection_attempts = 0; /* counter in HTLoadDocument */
static BOOL too_many_redirections(void)
{
if (redirection_attempts > redirection_limit) {
char *msg = NULL;
HTSprintf0(&msg, TOO_MANY_REDIRECTIONS, redirection_limit);
free(msg);
redirection_attempts = 0;
return TRUE;
}
return FALSE;
}
/* Load a document - with logging etc HTLoadDocument()
* ----------------------------------
*
* - Checks or documents already loaded
* - Logs the access
* - Allows stdin filter option
* - Trace output and error messages
*
* On Entry,
* anchor is the node_anchor for the document
* full_address The address of the document to be accessed.
* filter if YES, treat stdin as HTML
*
* On Exit,
* returns YES Success in opening document
* NO Failure
*/
static BOOL HTLoadDocument(const char *full_address, /* may include #fragment */
HTParentAnchor *anchor,
HTFormat format_out,
HTStream *sink)
{
int status;
HText *text;
const char *address_to_load = full_address;
char *cp;
BOOL ForcingNoCache = LYforce_no_cache;
CTRACE((tfp, "HTAccess: loading document %s\n", NonNull(address_to_load)));
if (isEmpty(address_to_load))
return NO;
/*
* Free use_this_url_instead and reset permanent_redirection if not done
* elsewhere. - FM
*/
FREE(use_this_url_instead);
permanent_redirection = FALSE;
if (too_many_redirections()) {
return NO;
}
/*
* If this is marked as an internal link but we don't have the document
* loaded any more, and we haven't explicitly flagged that we want to
* reload with LYforce_no_cache, then something has disappeared from the
* cache when we expected it to be still there. The user probably doesn't
* expect a new network access. So if we have POST data and safe is not
* set in the anchor, ask for confirmation, and fail if not granted. The
* exception are LYNXIMGMAP documents, for which we defer to LYLoadIMGmap
* for prompting if necessary. - kw
*/
text = (HText *) HTAnchor_document(anchor);
if (LYinternal_flag && !text && !LYforce_no_cache &&
anchor->post_data && !anchor->safe &&
!isLYNXIMGMAP(full_address) &&
HTConfirm(gettext("Document with POST content not found in cache. Resubmit?"))
!= TRUE) {
return NO;
}
/*
* If we don't have POST content, check whether this is a previous
* redirecting URL, and keep re-checking until we get to the final
* destination or redirection limit. If we do have POST content, we didn't
* allow permanent redirection, and an interactive user will be deciding
* whether to keep redirecting. - FM
*/
if (!anchor->post_data) {
while ((cp = HTAnchor_physical(anchor)) != NULL &&
!StrNCmp(cp, "Location=", 9)) {
DocAddress NewDoc;
CTRACE((tfp, "HTAccess: '%s' is a redirection URL.\n",
anchor->address));
CTRACE((tfp, "HTAccess: Redirecting to '%s'\n", cp + 9));
/*
* Don't exceed the redirection_attempts limit. - FM
*/
++redirection_attempts;
if (too_many_redirections()) {
FREE(use_this_url_instead);
return NO;
}
/*
* Set up the redirection. - FM
*/
StrAllocCopy(use_this_url_instead, cp + 9);
NewDoc.address = use_this_url_instead;
NewDoc.post_data = NULL;
NewDoc.post_content_type = NULL;
NewDoc.bookmark = anchor->bookmark;
NewDoc.isHEAD = anchor->isHEAD;
NewDoc.safe = anchor->safe;
anchor = HTAnchor_findAddress(&NewDoc);
}
}
/*
* If we had previous redirection, go back and check out that the URL under
* the current restrictions. - FM
*/
if (use_this_url_instead) {
FREE(redirecting_url);
return (NO);
}
/*
* See if we can use an already loaded document.
*/
text = (HText *) HTAnchor_document(anchor);
if (text && !LYforce_no_cache) {
/*
* We have a cached rendition of the target document. Check if it's OK
* to re-use it. We consider it OK if:
* (1) the anchor does not have the no_cache element set, or
* (2) we've overridden it, e.g., because we are acting on a PREV_DOC
* command or a link in the History Page and it's not a reply from a
* POST with the LYresubmit_posts flag set, or
* (3) we are repositioning within the currently loaded document based
* on the target anchor's address (URL_Reference).
*
* If track_internal_links is false, HText_AreDifferent() is
* used to determine whether (3) applies. If the target address
* differs from that of the current document only by a fragment and the
* target address has an appended fragment, repositioning without
* reloading is always assumed. Note that HText_AreDifferent()
* currently always returns TRUE if the target has a LYNXIMGMAP URL, so
* that an internally generated pseudo-document will normally not be
* re-used unless condition (2) applies. (Condition (1) cannot apply
* since in LYMap.c, no_cache is always set in the anchor object).
* This doesn't guarantee that the resource from which the MAP element
* is taken will be read again (reloaded) when the list of links for a
* client-side image map is regenerated, when in some cases it should
* (e.g., user requested RELOAD, or HTTP response with no-cache header
* and we are not overriding).
*
* If track_internal_links is true, a target address that
* points to the same URL as the current document may still result in
* reloading, depending on whether the original URL-Reference was given
* as an internal link in the context of the previously loaded
* document. HText_AreDifferent() is not used here for testing whether
* we are just repositioning. For an internal link, the potential
* callers of this function from mainloop() down will either avoid
* making the call (and do the repositioning differently) or set
* LYinternal_flag (or LYoverride_no_cache). Note that (a) LYNXIMGMAP
* pseudo-documents and (b) The "List Page" document are treated
* logically as being part of the document on which they are based, for
* the purpose of whether to treat a link as internal, but the logic
* for this (by setting LYinternal_flag as necessary) is implemented
* elsewhere. There is a specific test for LYNXIMGMAP here so that the
* generated pseudo-document will not be re-used unless
* LYoverride_no_cache is set. The same caveat as above applies w.r.t.
* reloading of the underlying resource.
*
* We also should be checking other aspects of cache regulation (e.g.,
* based on an If-Modified-Since check, etc.) but the code for doing
* those other things isn't available yet.
*/
if ((reloading != REAL_RELOAD) &&
(LYoverride_no_cache ||
((!track_internal_links &&
(!HText_hasNoCacheSet(text) ||
!HText_AreDifferent(anchor, full_address))) ||
(track_internal_links &&
(((LYinternal_flag || !HText_hasNoCacheSet(text)) &&
!isLYNXIMGMAP(full_address))))))) {
CTRACE((tfp, "HTAccess: Document already in memory.\n"));
HText_select(text);
#ifdef DIRED_SUPPORT
if (HTAnchor_format(anchor) == WWW_DIRED)
lynx_edit_mode = TRUE;
#endif
redirection_attempts = 0;
return YES;
} else {
ForcingNoCache = YES;
BStrFree(anchor->post_data);
CTRACE((tfp, "HTAccess: Auto-reloading document.\n"));
}
}
if (HText_HaveUserChangedForms(text)) {
/*
* Issue a warning. User forms content will be lost.
* Will not restore changed forms, currently.
*/
HTAlert(RELOADING_FORM);
}
/*
* Get the document from the net. If we are auto-reloading, the mutable
* anchor elements from the previous rendition should be freed in
* conjunction with loading of the new rendition. - FM
*/
LYforce_no_cache = NO; /* reset after each time through */
if (ForcingNoCache) {
FREE(anchor->title); /* ??? */
}
status = HTLoad(address_to_load, anchor, format_out, sink);
CTRACE((tfp, "HTAccess: status=%d\n", status));
/*
* RECOVERY: if the loading failed, and we had a cached HText copy, and no
* new HText created - use a previous copy, issue a warning.
*/
if (text && status < 0 && (HText *) HTAnchor_document(anchor) == text) {
HTAlert(gettext("Loading failed, use a previous copy."));
CTRACE((tfp, "HTAccess: Loading failed, use a previous copy.\n"));
HText_select(text);
#ifdef DIRED_SUPPORT
if (HTAnchor_format(anchor) == WWW_DIRED)
lynx_edit_mode = TRUE;
#endif
redirection_attempts = 0;
return YES;
}
/*
* Log the access if necessary.
*/
if (HTlogfile) {
time_t theTime;
time(&theTime);
fprintf(HTlogfile, "%24.24s %s %s %s\n",
ctime(&theTime),
HTClientHost ? HTClientHost : "local",
status < 0 ? "FAIL" : "GET",
full_address);
fflush(HTlogfile); /* Actually update it on disk */
CTRACE((tfp, "Log: %24.24s %s %s %s\n",
ctime(&theTime),
HTClientHost ? HTClientHost : "local",
status < 0 ? "FAIL" : "GET",
full_address));
}
/*
* Check out what we received from the net.
*/
if (status == HT_REDIRECTING) {
/* Exported from HTMIME.c, of all places. */
/* NO!! - FM */
/*
* Doing this via HTMIME.c meant that the redirection cover page was
* already loaded before we learned that we want a different URL.
* Also, changing anchor->address, as Lynx was doing, meant we could
* never again access its hash table entry, creating an insolvable
* memory leak. Instead, if we had a 301 status and set
* permanent_redirection, we'll load the new URL in anchor->physical,
* preceded by a token, which we can check to make replacements on
* subsequent access attempts. We'll check recursively, and retrieve
* the final URL if we had multiple redirections to it. If we just
* went to HTLoad now, as Lou originally had this, we couldn't do
* Lynx's security checks and alternate handling of some URL types.
* So, instead, we'll go all the way back to the top of getfile in
* LYGetFile.c when the status is HT_REDIRECTING. This may seem
* bizarre, but it works like a charm! - FM
*
* Actually, the location header for redirections is now again picked
* up in HTMIME.c. But that's an internal matter between HTTP.c and
* HTMIME.c, is still under control of HTLoadHTTP for http URLs, is
* done in a way that doesn't load the redirection response's body
* (except when wanted as an error fallback), and thus need not concern
* us here. - kw 1999-12-02
*/
CTRACE((tfp, "HTAccess: '%s' is a redirection URL.\n",
address_to_load));
CTRACE((tfp, "HTAccess: Redirecting to '%s'\n",
redirecting_url));
/*
* Prevent circular references.
*/
if (strcmp(address_to_load, redirecting_url)) { /* if different */
/*
* Load token and redirecting url into anchor->physical if we had
* 301 Permanent redirection. HTTP.c does not allow this if we
* have POST content. - FM
*/
if (permanent_redirection) {
StrAllocCopy(anchor->physical, "Location=");
StrAllocCat(anchor->physical, redirecting_url);
}
/*
* Set up flags before return to getfile. - FM
*/
StrAllocCopy(use_this_url_instead, redirecting_url);
if (ForcingNoCache)
LYforce_no_cache = YES;
++redirection_attempts;
FREE(redirecting_url);
permanent_redirection = FALSE;
return (NO);
}
++redirection_attempts;
FREE(redirecting_url);
permanent_redirection = FALSE;
return (YES);
}
/*
* We did not receive a redirecting URL. - FM
*/
redirection_attempts = 0;
FREE(redirecting_url);
permanent_redirection = FALSE;
if (status == HT_LOADED) {
CTRACE((tfp, "HTAccess: `%s' has been accessed.\n",
full_address));
return YES;
}
if (status == HT_PARTIAL_CONTENT) {
HTAlert(gettext("Loading incomplete."));
CTRACE((tfp, "HTAccess: `%s' has been accessed, partial content.\n",
full_address));
return YES;
}
if (status == HT_NO_DATA) {
CTRACE((tfp, "HTAccess: `%s' has been accessed, No data left.\n",
full_address));
return NO;
}
if (status == HT_NOT_LOADED) {
CTRACE((tfp, "HTAccess: `%s' has been accessed, No data loaded.\n",
full_address));
return NO;
}
if (status == HT_INTERRUPTED) {
CTRACE((tfp,
"HTAccess: `%s' has been accessed, transfer interrupted.\n",
full_address));
return NO;
}
if (status > 0) {
/*
* If you get this, then please find which routine is returning a
* positive unrecognized error code!
*/
fprintf(stderr,
gettext("**** HTAccess: socket or file number returned by obsolete load routine!\n"));
fprintf(stderr,
gettext("**** HTAccess: Internal software error. Please mail lynx-dev@nongnu.org!\n"));
fprintf(stderr, gettext("**** HTAccess: Status returned was: %d\n"), status);
exit_immediately(EXIT_FAILURE);
}
/* Failure in accessing a document */
cp = NULL;
StrAllocCopy(cp, gettext("Can't Access"));
StrAllocCat(cp, " `");
StrAllocCat(cp, full_address);
StrAllocCat(cp, "'");
_HTProgress(cp);
FREE(cp);
CTRACE((tfp, "HTAccess: Can't access `%s'\n", full_address));
HTLoadError(sink, 500, gettext("Unable to access document."));
return NO;
} /* HTLoadDocument */
/* Load a document from absolute name. HTLoadAbsolute()
* -----------------------------------
*
* On Entry,
* addr The absolute address of the document to be accessed.
* filter if YES, treat document as HTML
*
* On Exit,
* returns YES Success in opening document
* NO Failure
*/
BOOL HTLoadAbsolute(const DocAddress *docaddr)
{
BOOL result;
HTParentAnchor *anchor = HTAnchor_findAddress(docaddr);
result = HTLoadDocument(docaddr->address,
anchor,
(HTOutputFormat ? HTOutputFormat : WWW_PRESENT),
HTOutputStream);
if (!result) {
HTAnchor_delete(anchor->parent);
}
return result;
}
#ifdef NOT_USED_CODE
/* Load a document from absolute name to stream. HTLoadToStream()
* ---------------------------------------------
*
* On Entry,
* addr The absolute address of the document to be accessed.
* sink if non-NULL, send data down this stream
*
* On Exit,
* returns YES Success in opening document
* NO Failure
*/
BOOL HTLoadToStream(const char *addr,
BOOL filter,
HTStream *sink)
{
return HTLoadDocument(addr,
HTAnchor_findSimpleAddress(addr),
(HTOutputFormat ? HTOutputFormat : WWW_PRESENT),
sink);
}
#endif /* NOT_USED_CODE */
/* Load a document from relative name. HTLoadRelative()
* -----------------------------------
*
* On Entry,
* relative_name The relative address of the document
* to be accessed.
*
* On Exit,
* returns YES Success in opening document
* NO Failure
*/
BOOL HTLoadRelative(const char *relative_name,
HTParentAnchor *here)
{
DocAddress full_address;
BOOL result;
char *mycopy = NULL;
char *stripped = NULL;
full_address.address = NULL;
full_address.post_data = NULL;
full_address.post_content_type = NULL;
full_address.bookmark = NULL;
full_address.isHEAD = FALSE;
full_address.safe = FALSE;
StrAllocCopy(mycopy, relative_name);
stripped = HTStrip(mycopy);
full_address.address =
HTParse(stripped,
here->address,
PARSE_ALL_WITHOUT_ANCHOR);
result = HTLoadAbsolute(&full_address);
/*
* If we got redirection, result will be NO, but use_this_url_instead will
* be set. The calling routine should check both and do whatever is
* appropriate. - FM
*/
FREE(full_address.address);
FREE(mycopy); /* Memory leak fixed 10/7/92 -- JFG */
return result;
}
/* Load if necessary, and select an anchor. HTLoadAnchor()
* ----------------------------------------
*
* On Entry,
* destination The child or parent anchor to be loaded.
*
* On Exit,
* returns YES Success
* NO Failure
*/
BOOL HTLoadAnchor(HTAnchor * destination)
{
HTParentAnchor *parent;
BOOL loaded = NO;
if (!destination)
return NO; /* No link */
parent = HTAnchor_parent(destination);
if (HTAnchor_document(parent) == NULL) { /* If not already loaded */
/* TBL 921202 */
BOOL result;
result = HTLoadDocument(parent->address,
parent,
HTOutputFormat ?
HTOutputFormat : WWW_PRESENT,
HTOutputStream);
if (!result)
return NO;
loaded = YES;
} {
HText *text = (HText *) HTAnchor_document(parent);
if ((destination != (HTAnchor *) parent) &&
(destination != (HTAnchor *) (parent->parent))) {
/* If child anchor */
HText_selectAnchor(text, /* Double display? @@ */
(HTChildAnchor *) destination);
} else {
if (!loaded)
HText_select(text);
}
}
return YES;
} /* HTLoadAnchor */
/* Search. HTSearch()
* -------
*
* Performs a keyword search on word given by the user. Adds the
* keyword to the end of the current address and attempts to open
* the new address.
*
* On Entry,
* *keywords space-separated keyword list or similar search list
* here is anchor search is to be done on.
*/
static char hex(int i)
{
const char *hexchars = "0123456789ABCDEF";
return hexchars[i];
}
BOOL HTSearch(const char *keywords,
HTParentAnchor *here)
{
#define acceptable \
"1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_"
char *q, *u;
const char *p, *s, *e; /* Pointers into keywords */
char *address = NULL;
BOOL result;
char *escaped = typecallocn(char, (strlen(keywords) * 3) + 1);
static const BOOL isAcceptable[96] =
/* *INDENT-OFF* */
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
{ 0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0, /* 2x !"#$%&'()*+,-./ */
1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 4x @ABCDEFGHIJKLMNO */
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* 5X PQRSTUVWXYZ[\]^_ */
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 6x `abcdefghijklmno */
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; /* 7X pqrstuvwxyz{\}~ DEL */
/* *INDENT-ON* */
if (escaped == NULL)
outofmem(__FILE__, "HTSearch");
if (here->isIndexAction == NULL) {
free(escaped);
return FALSE;
}
StrAllocCopy(address, here->isIndexAction);
/*
* Convert spaces to + and hex escape unacceptable characters.
*/
for (s = keywords; *s && WHITE(*s); s++) /* Scan */
; /* Skip white space */
for (e = s + strlen(s); e > s && WHITE(*(e - 1)); e--) /* Scan */
; /* Skip trailers */
for (q = escaped, p = s; p < e; p++) { /* Scan stripped field */
unsigned char c = UCH(TOASCII(*p));
if (WHITE(*p)) {
*q++ = '+';
} else if (IS_CJK_TTY) {
*q++ = *p;
} else if (c >= 32 && c <= UCH(127) && isAcceptable[c - 32]) {
*q++ = *p; /* 930706 TBL for MVS bug */
} else {
*q++ = '%';
*q++ = hex((int) (c >> 4));
*q++ = hex((int) (c & 15));
}
} /* Loop over string */
*q = '\0'; /* Terminate escaped string */
u = StrChr(address, '?'); /* Find old search string */
if (u != NULL)
*u = '\0'; /* Chop old search off */
StrAllocCat(address, "?");
StrAllocCat(address, escaped);
FREE(escaped);
result = HTLoadRelative(address, here);
FREE(address);
/*
* If we got redirection, result will be NO, but use_this_url_instead will
* be set. The calling routine should check both and do whatever is
* appropriate. Only an http server (not a gopher or wais server) could
* return redirection. Lynx will go all the way back to its mainloop() and
* subject a redirecting URL to all of its security and restrictions
* checks. - FM
*/
return result;
}
/* Search Given Indexname. HTSearchAbsolute()
* -----------------------
*
* Performs a keyword search on word given by the user. Adds the
* keyword to the end of the current address and attempts to open
* the new address.
*
* On Entry,
* *keywords space-separated keyword list or similar search list
* *indexname is name of object search is to be done on.
*/
BOOL HTSearchAbsolute(const char *keywords,
char *indexname)
{
DocAddress abs_doc;
HTParentAnchor *anchor;
abs_doc.address = indexname;
abs_doc.post_data = NULL;
abs_doc.post_content_type = NULL;
abs_doc.bookmark = NULL;
abs_doc.isHEAD = FALSE;
abs_doc.safe = FALSE;
anchor = HTAnchor_findAddress(&abs_doc);
return HTSearch(keywords, anchor);
}
#ifdef NOT_USED_CODE
/* Generate the anchor for the home page. HTHomeAnchor()
* --------------------------------------
*
* As it involves file access, this should only be done once
* when the program first runs.
* This is a default algorithm -- browser don't HAVE to use this.
* But consistency between browsers is STRONGLY recommended!
*
* Priority order is:
* 1 WWW_HOME environment variable (logical name, etc)
* 2 ~/WWW/default.html
* 3 /usr/local/bin/default.html
* 4 http://www.w3.org/default.html
*/
HTParentAnchor *HTHomeAnchor(void)
{
char *my_home_document = NULL;
char *home = LYGetEnv(LOGICAL_DEFAULT);
char *ref;
HTParentAnchor *anchor;
if (home) {
StrAllocCopy(my_home_document, home);
#define MAX_FILE_NAME 1024 /* @@@ */
} else if (HTClientHost) { /* Telnet server */
/*
* Someone telnets in, they get a special home.
*/
FILE *fp = fopen(REMOTE_POINTER, "r");
char *status;
if (fp) {
my_home_document = typecallocn(char, MAX_FILE_NAME);
if (my_home_document == NULL)
outofmem(__FILE__, "HTHomeAnchor");
status = fgets(my_home_document, MAX_FILE_NAME, fp);
if (!status) {
FREE(my_home_document);
}
fclose(fp);
}
if (my_home_document == NULL)
StrAllocCopy(my_home_document, REMOTE_ADDRESS);
}
#ifdef UNIX
if (my_home_document == NULL) {
FILE *fp = NULL;
char *home = LYGetEnv("HOME");
if (home != 0) {
HTSprintf0(&my_home_document, "%s/%s", home, PERSONAL_DEFAULT);
fp = fopen(my_home_document, "r");
}
if (!fp) {
StrAllocCopy(my_home_document, LOCAL_DEFAULT_FILE);
fp = fopen(my_home_document, "r");
}
if (fp) {
fclose(fp);
} else {
CTRACE((tfp, "HTBrowse: No local home document ~/%s or %s\n",
PERSONAL_DEFAULT, LOCAL_DEFAULT_FILE));
FREE(my_home_document);
}
}
#endif /* UNIX */
ref = HTParse((my_home_document ?
my_home_document : (HTClientHost ?
REMOTE_ADDRESS : LAST_RESORT)),
STR_FILE_URL,
PARSE_ALL_WITHOUT_ANCHOR);
if (my_home_document) {
CTRACE((tfp, "HTAccess: Using custom home page %s i.e., address %s\n",
my_home_document, ref));
FREE(my_home_document);
}
anchor = HTAnchor_findSimpleAddress(ref);
FREE(ref);
return anchor;
}
#endif /* NOT_USED_CODE */