diff options
Diffstat (limited to 'WWW/Library/Implementation')
98 files changed, 73893 insertions, 0 deletions
diff --git a/WWW/Library/Implementation/HTAABrow.c b/WWW/Library/Implementation/HTAABrow.c new file mode 100644 index 00000000..3e25deaf --- /dev/null +++ b/WWW/Library/Implementation/HTAABrow.c @@ -0,0 +1,1284 @@ +/* + * $LynxId: HTAABrow.c,v 1.34 2010/09/24 08:27:42 tom Exp $ + * + * MODULE HTAABrow.c + * BROWSER SIDE ACCESS AUTHORIZATION MODULE + * + * Contains the code for keeping track on server hostnames, + * port numbers, scheme names, usernames, passwords + * (and servers' public keys). + * + * IMPORTANT: + * Routines in this module use dynamic allocation, but free + * automatically all the memory reserved by them. + * + * Therefore the caller never has to (and never should) + * free() any object returned by these functions. + * + * Therefore also all the strings returned by this package + * are only valid until the next call to the same function + * is made. This approach is selected, because of the nature + * of access authorization: no string returned by the package + * needs to be valid longer than until the next call. + * + * This also makes it easy to plug the AA package in: + * you don't have to ponder whether to free() something + * here or is it done somewhere else (because it is always + * done somewhere else). + * + * The strings that the package needs to store are copied + * so the original strings given as parameters to AA + * functions may be freed or modified with no side effects. + * + * The AA package does not free() anything else than what + * it has itself allocated. + * + * AUTHORS: + * AL Ari Luotonen luotonen@dxcern.cern.ch + * + * HISTORY: + * Oct 17 AL Made corrections suggested by marca: + * Added if (!realm->username) return NULL; + * Changed some ""s to NULLs. + * Now doing calloc() to init uuencode source; + * otherwise HTUU_encode() reads uninitialized memory + * every now and then (not a real bug but not pretty). + * Corrected the formula for uuencode destination size. + * + * 28 Apr 1997 AJL Do Proxy Authorisation. + * + * BUGS: + * + * + */ + +#include <HTUtils.h> +#include <HTString.h> +#include <HTParse.h> /* URL parsing function */ +#include <HTList.h> /* HTList object */ +#include <HTAlert.h> /* HTConfirm(), HTPrompt() */ +#include <HTAAUtil.h> /* AA common to both sides */ +#include <HTAssoc.h> /* Assoc list */ +#include <HTAccess.h> /* Are we using an HTTP gateway? */ +#include <HTAABrow.h> /* Implemented here */ +#include <HTUU.h> /* Uuencoding and uudecoding */ + +#include <LYLeaks.h> + +/* + * Local datatype definitions + * + * HTAAServer contains all the information about one server. + */ +typedef struct { + + char *hostname; /* Host's name */ + int portnumber; /* Port number */ + BOOL IsProxy; /* Is it a proxy? */ + HTList *setups; /* List of protection setups + on this server; i.e., valid + authentication schemes and + templates when to use them. + This is actually a list of + HTAASetup objects. */ + HTList *realms; /* Information about passwords */ +} HTAAServer; + +/* + * HTAASetup contains information about one server's one + * protected tree of documents. + */ +typedef struct { + HTAAServer *server; /* Which server serves this tree */ + char *ctemplate; /* Template for this tree */ + HTList *valid_schemes; /* Valid authentic.schemes */ + HTAssocList **scheme_specifics; /* Scheme specific params */ + BOOL retry; /* Failed last time -- reprompt (or whatever) */ +} HTAASetup; + +/* + * Information about usernames and passwords in + * Basic and Pubkey authentication schemes; + */ +typedef struct { + char *realmname; /* Password domain name */ + char *username; /* Username in that domain */ + char *password; /* Corresponding password */ +} HTAARealm; + +/* + * To free off all globals. - FM + */ +static void free_HTAAGlobals(void); +static BOOL free_HTAAGlobalsSet = FALSE; +static char *HTAA_composeAuthResult = NULL; +static char *compose_auth_stringResult = NULL; /* Uuencoded presentation */ + +/* + * Module-wide global variables + */ +static HTList *server_table = NULL; /* Browser's info about servers */ +static char *secret_key = NULL; /* Browser's latest secret key */ +static HTAASetup *current_setup = NULL; /* The server setup we are currently */ + + /* talking to */ +static char *current_hostname = NULL; /* The server's name and portnumber */ +static int current_portnumber = 80; /* where we are currently trying to */ + + /* connect. */ +static char *current_docname = NULL; /* The document's name we are */ + + /* trying to access. */ +static char *HTAAForwardAuth = NULL; /* Authorization: line to forward */ + + /* (used by gateway httpds) */ +static HTAASetup *proxy_setup = NULL; /* Same as above, but for Proxy -AJL */ +static char *proxy_hostname = NULL; +static char *proxy_docname = NULL; +static int proxy_portnumber = 80; + +/*** HTAAForwardAuth for enabling gateway-httpds to forward Authorization ***/ + +void HTAAForwardAuth_set(const char *scheme_name, + const char *scheme_specifics) +{ + size_t len = (20 + + (scheme_name ? strlen(scheme_name) : 0) + + (scheme_specifics ? strlen(scheme_specifics) : 0)); + + FREE(HTAAForwardAuth); + if ((HTAAForwardAuth = typecallocn(char, len)) == 0) + outofmem(__FILE__, "HTAAForwardAuth_set"); + + assert(HTAAForwardAuth != 0); + + strcpy(HTAAForwardAuth, "Authorization: "); + if (scheme_name) { + strcat(HTAAForwardAuth, scheme_name); + strcat(HTAAForwardAuth, " "); + if (scheme_specifics) { + strcat(HTAAForwardAuth, scheme_specifics); + } + } +} + +void HTAAForwardAuth_reset(void) +{ + FREE(HTAAForwardAuth); +} + +/**************************** HTAAServer ***********************************/ + +static void HTAASetup_delete(HTAASetup * killme); /* Forward */ + +/* static HTAAServer_new() + * ALLOCATE A NEW NODE TO HOLD SERVER INFO + * AND ADD IT TO THE LIST OF SERVERS + * ON ENTRY: + * hostname is the name of the host that the server + * is running in. + * portnumber is the portnumber which the server listens. + * IsProxy should be TRUE if this is a proxy. + * + * ON EXIT: + * returns the newly-allocated node with all the strings + * duplicated. + * Strings will be automatically freed by + * the function HTAAServer_delete(), which also + * frees the node itself. + */ +static HTAAServer *HTAAServer_new(const char *hostname, + int portnumber, + int IsProxy) +{ + HTAAServer *server; + + if ((server = typecalloc(HTAAServer)) == 0) + outofmem(__FILE__, "HTAAServer_new"); + + assert(server != NULL); + + server->hostname = NULL; + server->portnumber = (portnumber > 0 ? portnumber : 80); + server->IsProxy = (BOOLEAN) IsProxy; + server->setups = HTList_new(); + server->realms = HTList_new(); + + if (hostname) + StrAllocCopy(server->hostname, hostname); + + if (!server_table) + server_table = HTList_new(); + + HTList_addObject(server_table, (void *) server); + + return server; +} + +/* static HTAAServer_delete() + * + * DELETE THE ENTRY FOR THE SERVER FROM THE HOST TABLE, + * AND FREE THE MEMORY USED BY IT. + * + * ON ENTRY: + * killme points to the HTAAServer to be freed. + * + * ON EXIT: + * returns nothing. + */ +static void HTAAServer_delete(HTAAServer *killme) +{ + int n, i; + HTAASetup *setup; + HTAARealm *realm; + HTList *cur; + + if (killme) { + if (killme->setups != NULL) { + n = HTList_count(killme->setups); + for (i = (n - 1); i >= 0; i--) { + if ((setup = (HTAASetup *) HTList_objectAt(killme->setups, + i)) != NULL) { + HTAASetup_delete(setup); + setup = NULL; + } + } + HTList_delete(killme->setups); + killme->setups = NULL; + } + + cur = killme->realms; + while (NULL != (realm = (HTAARealm *) HTList_nextObject(cur))) { + FREE(realm->realmname); + FREE(realm->username); + FREE(realm->password); + FREE(realm); + } + HTList_delete(killme->realms); + killme->realms = NULL; + + FREE(killme->hostname); + + HTList_removeObject(server_table, (void *) killme); + FREE(killme); + } +} + +/* static HTAAServer_lookup() + * LOOK UP SERVER BY HOSTNAME AND PORTNUMBER + * ON ENTRY: + * hostname obvious. + * portnumber if non-positive defaults to 80. + * IsProxy should be TRUE if this is a proxy. + * + * Looks up the server in the module-global server_table. + * + * ON EXIT: + * returns pointer to a HTAAServer structure + * representing the looked-up server. + * NULL, if not found. + */ +static HTAAServer *HTAAServer_lookup(const char *hostname, + int portnumber, + int IsProxy) +{ + if (hostname) { + HTList *cur = server_table; + HTAAServer *server; + + if (portnumber <= 0) + portnumber = 80; + + while (NULL != (server = (HTAAServer *) HTList_nextObject(cur))) { + if (server->portnumber == portnumber && + 0 == strcmp(server->hostname, hostname) && + server->IsProxy == IsProxy) + return server; + } + } + return NULL; /* NULL parameter, or not found */ +} + +/*************************** HTAASetup *******************************/ + +/* static HTAASetup_lookup() + * FIGURE OUT WHICH AUTHENTICATION SETUP THE SERVER + * IS USING FOR A GIVEN FILE ON A GIVEN HOST AND PORT + * + * ON ENTRY: + * hostname is the name of the server host machine. + * portnumber is the port that the server is running in. + * docname is the (URL-)pathname of the document we + * are trying to access. + * IsProxy should be TRUE if this is a proxy. + * + * This function goes through the information known about + * all the setups of the server, and finds out if the given + * filename resides in one of the protected directories. + * + * ON EXIT: + * returns NULL if no match. + * Otherwise, a HTAASetup structure representing + * the protected server setup on the corresponding + * document tree. + * + */ +static HTAASetup *HTAASetup_lookup(const char *hostname, + int portnumber, + const char *docname, + int IsProxy) +{ + HTAAServer *server; + HTAASetup *setup; + + if (portnumber <= 0) + portnumber = 80; + + if (hostname && docname && *hostname && *docname && + NULL != (server = HTAAServer_lookup(hostname, + portnumber, + IsProxy))) { + + HTList *cur = server->setups; + + CTRACE((tfp, "%s %s (%s:%d:%s)\n", + "HTAASetup_lookup: resolving setup for", + (IsProxy ? "proxy" : "server"), + hostname, portnumber, docname)); + + while (NULL != (setup = (HTAASetup *) HTList_nextObject(cur))) { + if (HTAA_templateMatch(setup->ctemplate, docname)) { + CTRACE((tfp, "%s `%s' %s `%s'\n", + "HTAASetup_lookup:", docname, + "matched template", setup->ctemplate)); + return setup; + } else { + CTRACE((tfp, "%s `%s' %s `%s'\n", + "HTAASetup_lookup:", docname, + "did NOT match template", setup->ctemplate)); + } + } /* while setups remain */ + } + /* if valid parameters and server found */ + CTRACE((tfp, "%s `%s' %s\n", + "HTAASetup_lookup: No template matched", + NONNULL(docname), + "(so probably not protected)")); + + return NULL; /* NULL in parameters, or not found */ +} + +/* static HTAASetup_new() + * CREATE A NEW SETUP NODE + * ON ENTRY: + * server is a pointer to a HTAAServer structure + * to which this setup belongs. + * ctemplate documents matching this template + * are protected according to this setup. + * valid_schemes a list containing all valid authentication + * schemes for this setup. + * If NULL, all schemes are disallowed. + * scheme_specifics is an array of assoc lists, which + * contain scheme specific parameters given + * by server in Authenticate: fields. + * If NULL, all scheme specifics are + * set to NULL. + * ON EXIT: + * returns a new HTAASetup node, and also adds it as + * part of the HTAAServer given as parameter. + */ +static HTAASetup *HTAASetup_new(HTAAServer *server, char *ctemplate, + HTList *valid_schemes, + HTAssocList **scheme_specifics) +{ + HTAASetup *setup; + + if (!server || isEmpty(ctemplate)) + return NULL; + + if ((setup = typecalloc(HTAASetup)) == 0) + outofmem(__FILE__, "HTAASetup_new"); + + assert(setup != NULL); + + setup->retry = NO; + setup->server = server; + setup->ctemplate = NULL; + if (ctemplate) + StrAllocCopy(setup->ctemplate, ctemplate); + setup->valid_schemes = valid_schemes; + setup->scheme_specifics = scheme_specifics; + + HTList_addObject(server->setups, (void *) setup); + + return setup; +} + +/* static HTAASetup_delete() + * FREE A HTAASetup STRUCTURE + * ON ENTRY: + * killme is a pointer to the structure to free(). + * + * ON EXIT: + * returns nothing. + */ +static void HTAASetup_delete(HTAASetup * killme) +{ + int scheme; + + if (killme) { + FREE(killme->ctemplate); + if (killme->valid_schemes) { + HTList_delete(killme->valid_schemes); + killme->valid_schemes = NULL; + } + for (scheme = 0; scheme < HTAA_MAX_SCHEMES; scheme++) + if (killme->scheme_specifics[scheme]) + HTAssocList_delete(killme->scheme_specifics[scheme]); + FREE(killme->scheme_specifics); + FREE(killme); + } +} + +/* static HTAASetup_updateSpecifics() + * COPY SCHEME SPECIFIC PARAMETERS + * TO HTAASetup STRUCTURE + * ON ENTRY: + * setup destination setup structure. + * specifics string array containing scheme + * specific parameters for each scheme. + * If NULL, all the scheme specific + * parameters are set to NULL. + * + * ON EXIT: + * returns nothing. + */ +static void HTAASetup_updateSpecifics(HTAASetup * setup, HTAssocList **specifics) +{ + int scheme; + + if (setup) { + if (setup->scheme_specifics) { + for (scheme = 0; scheme < HTAA_MAX_SCHEMES; scheme++) { + if (setup->scheme_specifics[scheme]) + HTAssocList_delete(setup->scheme_specifics[scheme]); + } + FREE(setup->scheme_specifics); + } + setup->scheme_specifics = specifics; + } +} + +/*************************** HTAARealm **********************************/ + +/* static HTAARealm_lookup() + * LOOKUP HTAARealm STRUCTURE BY REALM NAME + * ON ENTRY: + * realm_table a list of realm objects. + * realmname is the name of realm to look for. + * + * ON EXIT: + * returns the realm. NULL, if not found. + */ +static HTAARealm *HTAARealm_lookup(HTList *realm_table, + const char *realmname) +{ + if (realm_table && realmname) { + HTList *cur = realm_table; + HTAARealm *realm; + + while (NULL != (realm = (HTAARealm *) HTList_nextObject(cur))) { + if (0 == strcmp(realm->realmname, realmname)) + return realm; + } + } + return NULL; /* No table, NULL param, or not found */ +} + +/* static HTAARealm_new() + * CREATE A NODE CONTAINING USERNAME AND + * PASSWORD USED FOR THE GIVEN REALM. + * IF REALM ALREADY EXISTS, CHANGE + * USERNAME/PASSWORD. + * ON ENTRY: + * realm_table a list of realms to where to add + * the new one, too. + * realmname is the name of the password domain. + * username and + * password are what you can expect them to be. + * + * ON EXIT: + * returns the created realm. + */ +static HTAARealm *HTAARealm_new(HTList *realm_table, + const char *realmname, + const char *username, + const char *password) +{ + HTAARealm *realm; + + realm = HTAARealm_lookup(realm_table, realmname); + + if (!realm) { + if ((realm = typecalloc(HTAARealm)) == 0) + outofmem(__FILE__, "HTAARealm_new"); + + assert(realm != NULL); + + realm->realmname = NULL; + realm->username = NULL; + realm->password = NULL; + StrAllocCopy(realm->realmname, realmname); + if (realm_table) + HTList_addObject(realm_table, (void *) realm); + } + if (username) + StrAllocCopy(realm->username, username); + if (password) + StrAllocCopy(realm->password, password); + + return realm; +} + +/***************** Basic and Pubkey Authentication ************************/ + +/* static compose_auth_string() + * + * COMPOSE Basic OR Pubkey AUTHENTICATION STRING; + * PROMPTS FOR USERNAME AND PASSWORD IF NEEDED + * + * ON ENTRY: + * scheme is either HTAA_BASIC or HTAA_PUBKEY. + * setup is the current server setup. + * IsProxy should be TRUE if this is a proxy. + * + * ON EXIT: + * returns a newly composed authorization string, + * (with, of course, a newly generated secret + * key and fresh timestamp, if Pubkey-scheme + * is being used). + * NULL, if something fails. + * NOTE: + * Like throughout the entire AA package, no string or structure + * returned by AA package needs to (or should) be freed. + * + */ +static char *compose_auth_string(HTAAScheme scheme, HTAASetup * setup, int IsProxy) +{ + char *cleartext = NULL; /* Cleartext presentation */ + char *ciphertext = NULL; /* Encrypted presentation */ + size_t len; + char *msg = NULL; + char *username = NULL; + char *password = NULL; + char *realmname = NULL; + char *theHost = NULL; + char *proxiedHost = NULL; + char *thePort = NULL; + HTAARealm *realm; + const char *i_net_addr = "0.0.0.0"; /* Change... @@@@ */ + const char *timestamp = "42"; /* ... these @@@@ */ + + FREE(compose_auth_stringResult); /* From previous call */ + + if ((scheme != HTAA_BASIC && scheme != HTAA_PUBKEY) || !setup || + !setup->scheme_specifics || !setup->scheme_specifics[scheme] || + !setup->server || !setup->server->realms) + return NULL; + + realmname = HTAssocList_lookup(setup->scheme_specifics[scheme], "realm"); + if (!realmname) + return NULL; + + realm = HTAARealm_lookup(setup->server->realms, realmname); + if (!(realm && + realm->username && *realm->username && + realm->password) || setup->retry) { + if (!realm) { + CTRACE((tfp, "%s `%s' %s\n", + "compose_auth_string: realm:", realmname, + "not found -- creating")); + realm = HTAARealm_new(setup->server->realms, + realmname, NULL, NULL); + } + /* + * The template should be either the '*' global for everything on the + * server (always true for proxy authorization setups), or a path for + * the start of a protected limb, with no host field, but we'll check + * for a host anyway in case a WWW-Protection-Template header set an + * absolute URL instead of a path. If we do get a host from this, it + * will include the port. - FM + */ + if ((!IsProxy) && using_proxy && setup->ctemplate) { + proxiedHost = HTParse(setup->ctemplate, "", PARSE_HOST); + if (proxiedHost && *proxiedHost != '\0') { + theHost = proxiedHost; + } + } + /* + * If we didn't get a host field from the template, set up the host + * name and port from the setup->server elements. - FM + */ + if (!theHost) + theHost = setup->server->hostname; + if (setup->server->portnumber > 0 && + setup->server->portnumber != 80) { + HTSprintf0(&thePort, ":%d", setup->server->portnumber); + } + /* + * Set up the message for the username prompt, and then issue the + * prompt. The default username is included in the call to the + * prompting function, but the password is NULL-ed and always replaced. + * - FM + */ + HTSprintf0(&msg, gettext("Username for '%s' at %s '%s%s':"), + realm->realmname, + (IsProxy ? "proxy" : "server"), + (theHost ? theHost : "??"), + NonNull(thePort)); + FREE(proxiedHost); + FREE(thePort); + username = realm->username; + password = NULL; + HTPromptUsernameAndPassword(msg, &username, &password, IsProxy); + + FREE(msg); + FREE(realm->username); + FREE(realm->password); + realm->username = username; + realm->password = password; + + if (!realm->username || !realm->password) { + /* + * Signals to retry. - FM + */ + return NULL; + } else if (*realm->username == '\0') { + /* + * Signals to abort. - FM + */ + StrAllocCopy(compose_auth_stringResult, ""); + return compose_auth_stringResult; + } + } + + len = (strlen(NonNull(realm->username)) + + strlen(NonNull(realm->password)) + 3); + + if (scheme == HTAA_PUBKEY) { +#ifdef PUBKEY + /* Generate new secret key */ + StrAllocCopy(secret_key, HTAA_generateRandomKey()); +#endif /* PUBKEY */ + /* Room for secret key, timestamp and inet address */ + len += strlen(NonNull(secret_key)) + 30; + } else { + FREE(secret_key); + } + + if ((cleartext = typecallocn(char, len)) == 0) + outofmem(__FILE__, "compose_auth_string"); + + assert(cleartext != NULL); + + if (realm->username) + strcpy(cleartext, realm->username); + else + *cleartext = '\0'; + + strcat(cleartext, ":"); + + if (realm->password) + strcat(cleartext, realm->password); + + if (scheme == HTAA_PUBKEY) { + strcat(cleartext, ":"); + strcat(cleartext, i_net_addr); + strcat(cleartext, ":"); + strcat(cleartext, timestamp); + strcat(cleartext, ":"); + if (secret_key) + strcat(cleartext, secret_key); + + if (!((ciphertext = typecallocn(char, 2 * len)) && + (compose_auth_stringResult = typecallocn(char, 3 * len)))) + outofmem(__FILE__, "compose_auth_string"); + +#ifdef PUBKEY + HTPK_encrypt(cleartext, ciphertext, server->public_key); + HTUU_encode((unsigned char *) ciphertext, strlen(ciphertext), + compose_auth_stringResult); +#endif /* PUBKEY */ + FREE(cleartext); + FREE(ciphertext); + } else { /* scheme == HTAA_BASIC */ + if (!(compose_auth_stringResult = + typecallocn(char, (4 * ((len + 2) / 3)) + 1))) + outofmem(__FILE__, "compose_auth_string"); + + HTUU_encode((unsigned char *) cleartext, strlen(cleartext), + compose_auth_stringResult); + FREE(cleartext); + } + return compose_auth_stringResult; +} + +/* BROWSER static HTAA_selectScheme() + * SELECT THE AUTHENTICATION SCHEME TO USE + * ON ENTRY: + * setup is the server setup structure which can + * be used to make the decision about the + * used scheme. + * + * When new authentication methods are added to library + * this function makes the decision about which one to + * use at a given time. This can be done by inspecting + * environment variables etc. + * + * Currently only searches for the first valid scheme, + * and if nothing found suggests Basic scheme; + * + * ON EXIT: + * returns the authentication scheme to use. + */ +static HTAAScheme HTAA_selectScheme(HTAASetup * setup) +{ + int scheme; + + if (setup && setup->valid_schemes) { + for (scheme = HTAA_BASIC; scheme < HTAA_MAX_SCHEMES; scheme++) { + void *object = (void *) scheme; + + if (-1 < HTList_indexOf(setup->valid_schemes, object)) + return (HTAAScheme) scheme; + } + } + return HTAA_BASIC; +} + +/* + * Purpose: Free off all module globals. + * Arguments: void + * Return Value: void + * Remarks/Portability/Dependencies/Restrictions: + * To be used at program exit. + * Revision History: + * 06-19-96 created - FM + */ +static void free_HTAAGlobals(void) +{ + HTAAServer *server; + int n, i; + + if (server_table != NULL) { + n = HTList_count(server_table); + for (i = (n - 1); i >= 0; i--) { + if ((server = (HTAAServer *) HTList_objectAt(server_table, + i)) != NULL) { + HTAAServer_delete(server); + server = NULL; + } + } + HTList_delete(server_table); + server_table = NULL; + } + + HTAAForwardAuth_reset(); + FREE(HTAA_composeAuthResult); + FREE(current_hostname); + FREE(current_docname); + FREE(proxy_hostname); + FREE(proxy_docname); + FREE(compose_auth_stringResult); + FREE(secret_key); +} + +/* BROWSER PUBLIC HTAA_composeAuth() + * + * SELECT THE AUTHENTICATION SCHEME AND + * COMPOSE THE ENTIRE AUTHORIZATION HEADER LINE + * IF WE ALREADY KNOW THAT THE HOST REQUIRES AUTHENTICATION + * + * ON ENTRY: + * hostname is the hostname of the server. + * portnumber is the portnumber in which the server runs. + * docname is the pathname of the document (as in URL) + * IsProxy should be TRUE if this is a proxy. + * + * ON EXIT: + * returns NULL, if no authorization seems to be needed, or + * if it is the entire Authorization: line, e.g. + * + * "Authorization: Basic username:password" + * + * As usual, this string is automatically freed. + */ +char *HTAA_composeAuth(const char *hostname, + const int portnumber, + const char *docname, + int IsProxy) +{ + char *auth_string; + BOOL retry; + HTAAScheme scheme; + size_t len; + + /* + * Setup atexit() freeing if not done already. - FM + */ + if (!free_HTAAGlobalsSet) { +#ifdef LY_FIND_LEAKS + atexit(free_HTAAGlobals); +#endif + free_HTAAGlobalsSet = TRUE; + } + + /* + * Make gateway httpds pass authorization field as it was received. (This + * still doesn't really work because Authenticate: headers from remote + * server are not forwarded to client yet so it cannot really know that it + * should send authorization; I will not implement it yet because I feel we + * will soon change radically the way requests are represented to allow + * multithreading on server-side. Life is hard.) + */ + if (HTAAForwardAuth) { + CTRACE((tfp, "HTAA_composeAuth: %s\n", + "Forwarding received authorization")); + StrAllocCopy(HTAA_composeAuthResult, HTAAForwardAuth); + HTAAForwardAuth_reset(); /* Just a precaution */ + return HTAA_composeAuthResult; + } + + FREE(HTAA_composeAuthResult); /* From previous call */ + + if (IsProxy) { + /* + * Proxy Authorization required. - AJL + */ + + CTRACE((tfp, "Composing Proxy Authorization for %s:%d/%s\n", + hostname, portnumber, docname)); + + if (proxy_portnumber != portnumber || + !proxy_hostname || !proxy_docname || + !hostname || !docname || + 0 != strcmp(proxy_hostname, hostname) || + 0 != strcmp(proxy_docname, docname)) { + + retry = NO; + + proxy_portnumber = portnumber; + + if (hostname) + StrAllocCopy(proxy_hostname, hostname); + else + FREE(proxy_hostname); + + if (docname) + StrAllocCopy(proxy_docname, docname); + else + FREE(proxy_docname); + } else { + retry = YES; + } + + if (!proxy_setup || !retry) + proxy_setup = HTAASetup_lookup(hostname, portnumber, + docname, IsProxy); + + if (!proxy_setup) + return NULL; + + switch (scheme = HTAA_selectScheme(proxy_setup)) { + case HTAA_BASIC: + case HTAA_PUBKEY: + auth_string = compose_auth_string(scheme, proxy_setup, IsProxy); + break; + case HTAA_KERBEROS_V4: + /* OTHER AUTHENTICATION ROUTINES ARE CALLED HERE */ + default: + { + char *msg = NULL; + + HTSprintf0(&msg, "%s `%s'", + gettext("This client doesn't know how to compose proxy authorization information for scheme"), + HTAAScheme_name(scheme)); + HTAlert(msg); + FREE(msg); + auth_string = NULL; + } + } /* switch scheme */ + + proxy_setup->retry = NO; + + if (!auth_string) + /* + * Signal a failure. - FM + */ + return NULL; /* Added by marca. */ + if (*auth_string == '\0') { + /* + * Signal an abort. - FM + */ + StrAllocCopy(HTAA_composeAuthResult, ""); + return (HTAA_composeAuthResult); + } + len = strlen(auth_string) + strlen(HTAAScheme_name(scheme)) + 26; + if ((HTAA_composeAuthResult = typecallocn(char, len)) == 0) + outofmem(__FILE__, "HTAA_composeAuth"); + + assert(HTAA_composeAuthResult != NULL); + + strcpy(HTAA_composeAuthResult, "Proxy-Authorization: "); + + } else { + /* + * Normal WWW authorization. + */ + CTRACE((tfp, "Composing Authorization for %s:%d/%s\n", + hostname, portnumber, docname)); + + if (current_portnumber != portnumber || + !current_hostname || !current_docname || + !hostname || !docname || + 0 != strcmp(current_hostname, hostname) || + 0 != strcmp(current_docname, docname)) { + + retry = NO; + + current_portnumber = portnumber; + + if (hostname) + StrAllocCopy(current_hostname, hostname); + else + FREE(current_hostname); + + if (docname) + StrAllocCopy(current_docname, docname); + else + FREE(current_docname); + } else { + retry = YES; + } + + if (!current_setup || !retry) + current_setup = HTAASetup_lookup(hostname, portnumber, + docname, IsProxy); + + if (!current_setup) + return NULL; + + switch (scheme = HTAA_selectScheme(current_setup)) { + case HTAA_BASIC: + case HTAA_PUBKEY: + auth_string = compose_auth_string(scheme, current_setup, IsProxy); + break; + case HTAA_KERBEROS_V4: + /* OTHER AUTHENTICATION ROUTINES ARE CALLED HERE */ + default: + { + char *msg = 0; + + HTSprintf0(&msg, "%s `%s'", + gettext("This client doesn't know how to compose authorization information for scheme"), + HTAAScheme_name(scheme)); + HTAlert(msg); + FREE(msg); + auth_string = NULL; + } + } /* switch scheme */ + + current_setup->retry = NO; + + if (!auth_string) + /* + * Signal a failure. - FM + */ + return NULL; /* Added by marca. */ + if (*auth_string == '\0') { + /* + * Signal an abort. - FM + */ + StrAllocCopy(HTAA_composeAuthResult, ""); + return (HTAA_composeAuthResult); + } + + len = strlen(auth_string) + strlen(HTAAScheme_name(scheme)) + 20; + if ((HTAA_composeAuthResult = typecallocn(char, len)) == 0) + outofmem(__FILE__, "HTAA_composeAuth"); + + assert(HTAA_composeAuthResult != NULL); + + strcpy(HTAA_composeAuthResult, "Authorization: "); + } + + strcat(HTAA_composeAuthResult, HTAAScheme_name(scheme)); + strcat(HTAA_composeAuthResult, " "); + strcat(HTAA_composeAuthResult, auth_string); + return HTAA_composeAuthResult; +} + +/* BROWSER PUBLIC HTAA_shouldRetryWithAuth() + * + * DETERMINES IF WE SHOULD RETRY THE SERVER + * WITH AUTHORIZATION + * (OR IF ALREADY RETRIED, WITH A DIFFERENT + * USERNAME AND/OR PASSWORD (IF MISSPELLED)) + * ON ENTRY: + * start_of_headers is the first block already read from socket, + * but status line skipped; i.e., points to the + * start of the header section. + * length is the remaining length of the first block. + * soc is the socket to read the rest of server reply. + * IsProxy should be TRUE if this is a proxy. + * + * This function should only be called when + * server has replied with a 401 (Unauthorized) + * status code. + * ON EXIT: + * returns YES, if connection should be retried. + * The node containing all the necessary + * information is + * * either constructed if it does not exist + * * or password is reset to NULL to indicate + * that username and password should be + * reprompted when composing Authorization: + * field (in function HTAA_composeAuth()). + * NO, otherwise. + */ +BOOL HTAA_shouldRetryWithAuth(char *start_of_headers, + size_t length, + int soc, + int IsProxy) +{ + HTAAScheme scheme; + char *line = NULL; + int num_schemes = 0; + HTList *valid_schemes = HTList_new(); + HTAssocList **scheme_specifics = NULL; + char *ctemplate = NULL; + char *temp = NULL; + + /* + * Setup atexit() freeing if not done already. - FM + */ + if (!free_HTAAGlobalsSet) { +#ifdef LY_FIND_LEAKS + atexit(free_HTAAGlobals); +#endif + free_HTAAGlobalsSet = TRUE; + } + + /* + * Read server reply header lines + */ + CTRACE((tfp, "Server reply header lines:\n")); + + HTAA_setupReader(start_of_headers, length, soc); + while (NULL != (line = HTAA_getUnfoldedLine()) && *line != '\0') { + CTRACE((tfp, "%s\n", line)); + + if (strchr(line, ':')) { /* Valid header line */ + + char *p = line; + char *fieldname = HTNextField(&p); + char *arg1 = HTNextField(&p); + char *args = p; + + if ((IsProxy && + 0 == strcasecomp(fieldname, "Proxy-Authenticate:")) || + (!IsProxy && + 0 == strcasecomp(fieldname, "WWW-Authenticate:"))) { + if (isEmpty(arg1) || isEmpty(args)) { + HTSprintf0(&temp, gettext("Invalid header '%s%s%s%s%s'"), line, + (non_empty(arg1) ? " " : ""), + NonNull(arg1), + (non_empty(args) ? " " : ""), + NonNull(args)); + HTAlert(temp); + FREE(temp); + } else if (HTAA_UNKNOWN != (scheme = HTAAScheme_enum(arg1))) { + HTList_addObject(valid_schemes, (void *) scheme); + if (!scheme_specifics) { + int i; + + scheme_specifics = + typecallocn(HTAssocList *, HTAA_MAX_SCHEMES); + + if (!scheme_specifics) + outofmem(__FILE__, "HTAA_shouldRetryWithAuth"); + + assert(scheme_specifics != NULL); + + for (i = 0; i < HTAA_MAX_SCHEMES; i++) + scheme_specifics[i] = NULL; + } + scheme_specifics[scheme] = HTAA_parseArgList(args); + num_schemes++; + } else { + CTRACE((tfp, "Unknown scheme `%s' %s\n", + NONNULL(arg1), + (IsProxy ? + "in Proxy-Authenticate: field" : + "in WWW-Authenticate: field"))); + } + } + + else if (!IsProxy && + 0 == strcasecomp(fieldname, "WWW-Protection-Template:")) { + CTRACE((tfp, "Protection template set to `%s'\n", arg1)); + StrAllocCopy(ctemplate, arg1); + } + + } else { + CTRACE((tfp, "Invalid header line `%s' ignored\n", line)); + } + + FREE(line); + } /* while header lines remain */ + FREE(line); + + /* + * So should we retry with authorization? + */ + if (IsProxy) { + if (num_schemes == 0) { + /* + * No proxy authorization valid + */ + proxy_setup = NULL; + return NO; + } + /* + * Doing it for proxy. -AJL + */ + if (proxy_setup && proxy_setup->server) { + /* + * We have already tried with proxy authorization. Either we don't + * have access or username or password was misspelled. + * + * Update scheme-specific parameters (in case they have expired by + * chance). + */ + HTAASetup_updateSpecifics(proxy_setup, scheme_specifics); + + if (NO == HTConfirm(AUTH_FAILED_PROMPT)) { + proxy_setup = NULL; + return NO; + } else { + /* + * Re-ask username+password (if misspelled). + */ + proxy_setup->retry = YES; + return YES; + } + } else { + /* + * proxy_setup == NULL, i.e., we have a first connection to a + * protected server or the server serves a wider set of documents + * than we expected so far. + */ + HTAAServer *server = HTAAServer_lookup(proxy_hostname, + proxy_portnumber, + IsProxy); + + if (!server) { + server = HTAAServer_new(proxy_hostname, + proxy_portnumber, + IsProxy); + } + if (!ctemplate) /* Proxy matches everything -AJL */ + StrAllocCopy(ctemplate, "*"); + proxy_setup = HTAASetup_new(server, + ctemplate, + valid_schemes, + scheme_specifics); + FREE(ctemplate); + + HTAlert(gettext("Proxy authorization required -- retrying")); + return YES; + } + /* Never reached */ + } + /* + * Normal WWW authorization. + */ + if (num_schemes == 0) { + /* + * No authorization valid. + */ + current_setup = NULL; + return NO; + } + if (current_setup && current_setup->server) { + /* + * So we have already tried with WWW authorization. Either we don't + * have access or username or password was misspelled. + * + * Update scheme-specific parameters (in case they have expired by + * chance). + */ + HTAASetup_updateSpecifics(current_setup, scheme_specifics); + + if (NO == HTConfirm(AUTH_FAILED_PROMPT)) { + current_setup = NULL; + return NO; + } else { + /* + * Re-ask username+password (if misspelled). + */ + current_setup->retry = YES; + return YES; + } + } else { + /* + * current_setup == NULL, i.e., we have a first connection to a + * protected server or the server serves a wider set of documents than + * we expected so far. + */ + HTAAServer *server = HTAAServer_lookup(current_hostname, + current_portnumber, + IsProxy); + + if (!server) { + server = HTAAServer_new(current_hostname, + current_portnumber, + IsProxy); + } + if (!ctemplate) + ctemplate = HTAA_makeProtectionTemplate(current_docname); + current_setup = HTAASetup_new(server, + ctemplate, + valid_schemes, + scheme_specifics); + FREE(ctemplate); + + HTAlert(gettext("Access without authorization denied -- retrying")); + return YES; + } + /* Never reached */ +} + +/* + * This function clears all authorization information by + * invoking the free_HTAAGlobals() function, which normally + * is invoked at exit. It allows a browser command to do + * this at any time, for example, if the user is leaving + * the terminal for a period of time, but does not want + * to end the current session. - FM + */ +void HTClearHTTPAuthInfo(void) +{ + /* + * Need code to check cached documents against the protection templates, + * and do something to ensure that any protected documents no longer can be + * accessed without a new retrieval. - FM + */ + + /* + * Now free all of the authorization info, and reset the + * free_HTAAGlobalsSet flag. - FM + */ + free_HTAAGlobals(); + free_HTAAGlobalsSet = FALSE; +} diff --git a/WWW/Library/Implementation/HTAABrow.h b/WWW/Library/Implementation/HTAABrow.h new file mode 100644 index 00000000..064f11e3 --- /dev/null +++ b/WWW/Library/Implementation/HTAABrow.h @@ -0,0 +1,137 @@ +/* + * $LynxId: HTAABrow.h,v 1.16 2010/10/27 00:13:53 tom Exp $ + * + * BROWSER SIDE ACCESS AUTHORIZATION MODULE + + This module is the browser side interface to Access Authorization (AA) package. It + contains code only for browser. + + Important to know about memory allocation: + + Routines in this module use dynamic allocation, but free automatically all the memory + reserved by them. + + Therefore the caller never has to (and never should) free() any object returned by + these functions. + + Therefore also all the strings returned by this package are only valid until the next + call to the same function is made. This approach is selected, because of the nature of + access authorization: no string returned by the package needs to be valid longer than + until the next call. + + This also makes it easy to plug the AA package in: you don't have to ponder whether to + free()something here or is it done somewhere else (because it is always done somewhere + else). + + The strings that the package needs to store are copied so the original strings given as + parameters to AA functions may be freed or modified with no side effects. + + Also note:The AA package does not free() anything else than what it has itself + allocated. + + */ + +#ifndef HTAABROW_H +#define HTAABROW_H + +#include <HTAAUtil.h> /* Common parts of AA */ + +#ifdef __cplusplus +extern "C" { +#endif +/* + Routines for Browser Side Recording of AA Info + + Most of the browser-side AA is done by the following two functions (which are called + from file HTTP.c so the browsers using libwww only need to be linked with the new + library and not be changed at all): + + HTAA_composeAuth() composes the Authorization: line contents, if the AA package + thinks that the given document is protected. Otherwise this function returns NULL. + This function also calls the functions HTPrompt(),HTPromptPassword() and HTConfirm() + to get the username, password and some confirmation from the user. + + HTAA_shouldRetryWithAuth() determines whether to retry the request with AA or with a + new AA (in case username or password was misspelled). + + */ +/* PUBLIC HTAA_composeAuth() + * + * COMPOSE THE ENTIRE AUTHORIZATION HEADER LINE IF WE + * ALREADY KNOW, THAT THE HOST MIGHT REQUIRE AUTHORIZATION + * + * ON ENTRY: + * hostname is the hostname of the server. + * portnumber is the portnumber in which the server runs. + * docname is the pathname of the document (as in URL) + * + * ON EXIT: + * returns NULL, if no authorization seems to be needed, or + * if it is the entire Authorization: line, e.g. + * + * "Authorization: basic username:password" + * + * As usual, this string is automatically freed. + */ + extern char *HTAA_composeAuth(const char *hostname, + const int portnumber, + const char *docname, + int IsProxy); + +/* BROWSER PUBLIC HTAA_shouldRetryWithAuth() + * + * DETERMINES IF WE SHOULD RETRY THE SERVER + * WITH AUTHORIZATION + * (OR IF ALREADY RETRIED, WITH A DIFFERENT + * USERNAME AND/OR PASSWORD (IF MISSPELLED)) + * ON ENTRY: + * start_of_headers is the first block already read from socket, + * but status line skipped; i.e., points to the + * start of the header section. + * length is the remaining length of the first block. + * soc is the socket to read the rest of server reply. + * + * This function should only be called when + * server has replied with a 401 (Unauthorized) + * status code. + * ON EXIT: + * returns YES, if connection should be retried. + * The node containing all the necessary + * information is + * * either constructed if it does not exist + * * or password is reset to NULL to indicate + * that username and password should be + * reprompted when composing Authorization: + * field (in function HTAA_composeAuth()). + * NO, otherwise. + */ + extern BOOL HTAA_shouldRetryWithAuth(char *start_of_headers, + size_t length, + int soc, + int IsProxy); + +/* + * Function to allow clearing of all Authorization info + * via a browser command. - FM + */ + extern void HTClearHTTPAuthInfo(void); + +/* + +Enabling Gateway httpds to Forward Authorization + + These functions should only be called from daemon code, and HTAAForwardAuth_reset() + must be called before the next request is handled to make sure that authorization + string isn't cached in daemon so that other people can access private files using + somebody else's previous authorization information. + + */ + + extern void HTAAForwardAuth_set(const char *scheme_name, + const char *scheme_specifics); + extern void HTAAForwardAuth_reset(void); + +#ifdef __cplusplus +} +#endif +#endif /* NOT HTAABROW_H */ diff --git a/WWW/Library/Implementation/HTAAProt.c b/WWW/Library/Implementation/HTAAProt.c new file mode 100644 index 00000000..45f654a3 --- /dev/null +++ b/WWW/Library/Implementation/HTAAProt.c @@ -0,0 +1,742 @@ +/* + * $LynxId: HTAAProt.c,v 1.32 2010/04/29 09:30:57 tom Exp $ + * + * MODULE HTAAProt.c + * PROTECTION FILE PARSING MODULE + * + * AUTHORS: + * AL Ari Luotonen luotonen@dxcern.cern.ch + * MD Mark Donszelmann duns@vxdeop.cern.ch + * + * HISTORY: + * 20 Oct 93 AL Now finds uid/gid for nobody/nogroup by name + * (doesn't use default 65534 right away). + * Also understands negative uids/gids. + * 14 Nov 93 MD Added VMS compatibility + * + * BUGS: + * + * + */ + +#include <HTUtils.h> + +#ifndef VMS +#ifndef NOUSERS +#include <pwd.h> /* Unix password file routine: getpwnam() */ +#include <grp.h> /* Unix group file routine: getgrnam() */ +#endif /* NOUSERS */ +#endif /* not VMS */ + +#include <HTAAUtil.h> +#include <HTLex.h> /* Lexical analysor */ +#include <HTAAProt.h> /* Implemented here */ + +#include <LYUtils.h> +#include <LYLeaks.h> + +#define NOBODY 65534 /* -2 in 16-bit environment */ +#define NONESUCH 65533 /* -3 in 16-bit environment */ + +/* + * Protection setup caching + */ +typedef struct { + char *prot_filename; + HTAAProt *prot; +} HTAAProtCache; + +static HTList *prot_cache = NULL; /* Protection setup cache. */ +static HTAAProt *default_prot = NULL; /* Default protection. */ +static HTAAProt *current_prot = NULL; /* Current protection mode */ + + /* which is set up by callbacks */ + /* from the rule system when */ + /* a "protect" rule is matched. */ + +#ifndef NOUSERS +/* static isNumber() + * DOES A CHARACTER STRING REPRESENT A NUMBER + */ +static BOOL isNumber(const char *s) +{ + const char *cur = s; + + if (isEmpty(s)) + return NO; + + if (*cur == '-') + cur++; /* Allow initial minus sign in a number */ + + while (*cur) { + if (*cur < '0' || *cur > '9') + return NO; + cur++; + } + return YES; +} + +/* PUBLIC HTAA_getUid() + * GET THE USER ID TO CHANGE THE PROCESS UID TO + * ON ENTRY: + * No arguments. + * + * ON EXIT: + * returns the uid number to give to setuid() system call. + * Default is 65534 (nobody). + */ +int HTAA_getUid(void) +{ + int uid; + + if (current_prot && current_prot->uid_name) { + if (isNumber(current_prot->uid_name)) { + uid = atoi(current_prot->uid_name); + if ((*HTAA_UidToName(uid)) != '\0') { + return uid; + } + } else { /* User name (not a number) */ + if ((uid = HTAA_NameToUid(current_prot->uid_name)) != NONESUCH) { + return uid; + } + } + } + /* + * Ok, then let's get uid for nobody. + */ + if ((uid = HTAA_NameToUid("nobody")) != NONESUCH) { + return uid; + } + /* + * Ok, then use default. + */ + return NOBODY; /* nobody */ +} + +/* PUBLIC HTAA_getGid() + * GET THE GROUP ID TO CHANGE THE PROCESS GID TO + * ON ENTRY: + * No arguments. + * + * ON EXIT: + * returns the uid number to give to setgid() system call. + * Default is 65534 (nogroup). + */ +int HTAA_getGid(void) +{ + int gid; + + if (current_prot && current_prot->gid_name) { + if (isNumber(current_prot->gid_name)) { + gid = atoi(current_prot->gid_name); + if (*HTAA_GidToName(gid) != '\0') { + return gid; + } + } else { /* Group name (not number) */ + if ((gid = HTAA_NameToGid(current_prot->gid_name)) != NONESUCH) { + return gid; + } + } + } + /* + * Ok, then let's get gid for nogroup. + */ + if ((gid = HTAA_NameToGid("nogroup")) != NONESUCH) { + return gid; + } + /* + * Ok, then use default. + */ + return NOBODY; /* nogroup */ +} +#endif /* !NOUSERS */ + +/* static HTAA_setIds() + * SET UID AND GID (AS NAMES OR NUMBERS) + * TO HTAAProt STRUCTURE + * ON ENTRY: + * prot destination. + * ids is a string like "james.www" or "1422.69" etc. + * giving uid and gid. + * + * ON EXIT: + * returns nothing. + */ +static void HTAA_setIds(HTAAProt *prot, const char *ids) +{ + if (ids) { + char *local_copy = NULL; + char *point; + + StrAllocCopy(local_copy, ids); + point = strchr(local_copy, '.'); + if (point) { + *(point++) = (char) 0; + StrAllocCopy(prot->gid_name, point); + } else { + StrAllocCopy(prot->gid_name, "nogroup"); + } + StrAllocCopy(prot->uid_name, local_copy); + FREE(local_copy); + } else { + StrAllocCopy(prot->uid_name, "nobody"); + StrAllocCopy(prot->gid_name, "nogroup"); + } +} + +/* static HTAA_parseProtFile() + * PARSE A PROTECTION SETUP FILE AND + * PUT THE RESULT IN A HTAAProt STRUCTURE + * ON ENTRY: + * prot destination structure. + * fp open protection file. + * + * ON EXIT: + * returns nothing. + */ +static void HTAA_parseProtFile(HTAAProt *prot, FILE *fp) +{ + if (prot && fp) { + LexItem lex_item; + char *fieldname = NULL; + + while (LEX_EOF != (lex_item = lex(fp))) { + + while (lex_item == LEX_REC_SEP) /* Ignore empty lines */ + lex_item = lex(fp); + + if (lex_item == LEX_EOF) /* End of file */ + break; + + if (lex_item == LEX_ALPH_STR) { /* Valid setup record */ + + StrAllocCopy(fieldname, HTlex_buffer); + + if (LEX_FIELD_SEP != (lex_item = lex(fp))) + unlex(lex_item); /* If someone wants to use colon */ + /* after field name it's ok, but */ + /* not required. Here we read it. */ + + if (0 == strncasecomp(fieldname, "Auth", 4)) { + lex_item = lex(fp); + while (lex_item == LEX_ALPH_STR) { + HTAAScheme scheme = HTAAScheme_enum(HTlex_buffer); + + if (scheme != HTAA_UNKNOWN) { + if (!prot->valid_schemes) + prot->valid_schemes = HTList_new(); + HTList_addObject(prot->valid_schemes, (void *) scheme); + CTRACE((tfp, "%s %s `%s'\n", + "HTAA_parseProtFile: valid", + "authentication scheme:", + HTAAScheme_name(scheme))); + } else { + CTRACE((tfp, "%s %s `%s'\n", + "HTAA_parseProtFile: unknown", + "authentication scheme:", + HTlex_buffer)); + } + + if (LEX_ITEM_SEP != (lex_item = lex(fp))) + break; + /* + * Here lex_item == LEX_ITEM_SEP; after item separator + * it is ok to have one or more newlines (LEX_REC_SEP) + * and they are ignored (continuation line). + */ + do { + lex_item = lex(fp); + } while (lex_item == LEX_REC_SEP); + } /* while items in list */ + } + /* if "Authenticate" */ + else if (0 == strncasecomp(fieldname, "mask", 4)) { + prot->mask_group = HTAA_parseGroupDef(fp); + lex_item = LEX_REC_SEP; /*groupdef parser read this already */ + if (TRACE) { + if (prot->mask_group) { + fprintf(tfp, + "HTAA_parseProtFile: Mask group:\n"); + HTAA_printGroupDef(prot->mask_group); + } else + fprintf(tfp, + "HTAA_parseProtFile: Mask group syntax error\n"); + } + } + /* if "Mask" */ + else { /* Just a name-value pair, put it to assoclist */ + + if (LEX_ALPH_STR == (lex_item = lex(fp))) { + if (!prot->values) + prot->values = HTAssocList_new(); + HTAssocList_add(prot->values, fieldname, HTlex_buffer); + lex_item = lex(fp); /* Read record separator */ + CTRACE((tfp, "%s `%s' bound to value `%s'\n", + "HTAA_parseProtFile: Name", + fieldname, HTlex_buffer)); + } + } /* else name-value pair */ + + } + /* if valid field */ + if (lex_item != LEX_EOF && lex_item != LEX_REC_SEP) { + CTRACE((tfp, "%s %s %d (that line ignored)\n", + "HTAA_parseProtFile: Syntax error", + "in protection setup file at line", + HTlex_line)); + do { + lex_item = lex(fp); + } while (lex_item != LEX_EOF && lex_item != LEX_REC_SEP); + } /* if syntax error */ + } /* while not end-of-file */ + FREE(fieldname); + } /* if valid parameters */ +} + +/* static HTAAProt_new() + * ALLOCATE A NEW HTAAProt STRUCTURE AND + * INITIALIZE IT FROM PROTECTION SETUP FILE + * ON ENTRY: + * cur_docname current filename after rule translations. + * prot_filename protection setup file name. + * If NULL, not an error. + * ids Uid and gid names or numbers, + * examples: + * james ( <=> james.nogroup) + * .www ( <=> nobody.www) + * james.www + * james.69 + * 1422.69 + * 1422.www + * + * May be NULL, defaults to nobody.nogroup. + * Should be NULL, if prot_file is NULL. + * + * ON EXIT: + * returns returns a new and initialized protection + * setup structure. + * If setup file is already read in (found + * in cache), only sets uid_name and gid + * fields, and returns that. + */ +static HTAAProt *HTAAProt_new(const char *cur_docname, + const char *prot_filename, + const char *ids) +{ + HTList *cur = prot_cache; + HTAAProtCache *cache_item = NULL; + HTAAProt *prot; + FILE *fp; + + if (!prot_cache) + prot_cache = HTList_new(); + + while (NULL != (cache_item = (HTAAProtCache *) HTList_nextObject(cur))) { + if (!strcmp(cache_item->prot_filename, prot_filename)) + break; + } + if (cache_item) { + prot = cache_item->prot; + CTRACE((tfp, "%s `%s' already in cache\n", + "HTAAProt_new: Protection file", prot_filename)); + } else { + CTRACE((tfp, "HTAAProt_new: Loading protection file `%s'\n", + prot_filename)); + + if ((prot = typecalloc(HTAAProt)) == 0) + outofmem(__FILE__, "HTAAProt_new"); + + assert(prot != NULL); + + prot->ctemplate = NULL; + prot->filename = NULL; + prot->uid_name = NULL; + prot->gid_name = NULL; + prot->valid_schemes = HTList_new(); + prot->mask_group = NULL; /* Masking disabled by defaults */ + prot->values = HTAssocList_new(); + + if (prot_filename && NULL != (fp = fopen(prot_filename, TXT_R))) { + HTAA_parseProtFile(prot, fp); + fclose(fp); + if ((cache_item = typecalloc(HTAAProtCache)) == 0) + outofmem(__FILE__, "HTAAProt_new"); + + assert(cache_item != NULL); + + cache_item->prot = prot; + cache_item->prot_filename = NULL; + StrAllocCopy(cache_item->prot_filename, prot_filename); + HTList_addObject(prot_cache, (void *) cache_item); + } else { + CTRACE((tfp, "HTAAProt_new: %s `%s'\n", + "Unable to open protection setup file", + NONNULL(prot_filename))); + } + } + + if (cur_docname) + StrAllocCopy(prot->filename, cur_docname); + HTAA_setIds(prot, ids); + + return prot; +} + +/* PUBLIC HTAA_setDefaultProtection() + * SET THE DEFAULT PROTECTION MODE + * (called by rule system when a + * "defprot" rule is matched) + * ON ENTRY: + * cur_docname is the current result of rule translations. + * prot_filename is the protection setup file (second argument + * for "defprot" rule, optional) + * ids contains user and group names separated by + * a dot, corresponding to the uid + * gid under which the server should run, + * default is "nobody.nogroup" (third argument + * for "defprot" rule, optional; can be given + * only if protection setup file is also given). + * + * ON EXIT: + * returns nothing. + * Sets the module-wide variable default_prot. + */ +void HTAA_setDefaultProtection(const char *cur_docname, + const char *prot_filename, + const char *ids) +{ + default_prot = NULL; /* Not free()'d because this is in cache */ + + if (prot_filename) { + default_prot = HTAAProt_new(cur_docname, prot_filename, ids); + } else { + CTRACE((tfp, "%s %s\n", + "HTAA_setDefaultProtection: ERROR: Protection file", + "not specified (obligatory for DefProt rule)!!\n")); + } +} + +/* PUBLIC HTAA_setCurrentProtection() + * SET THE CURRENT PROTECTION MODE + * (called by rule system when a + * "protect" rule is matched) + * ON ENTRY: + * cur_docname is the current result of rule translations. + * prot_filename is the protection setup file (second argument + * for "protect" rule, optional) + * ids contains user and group names separated by + * a dot, corresponding to the uid + * gid under which the server should run, + * default is "nobody.nogroup" (third argument + * for "protect" rule, optional; can be given + * only if protection setup file is also given). + * + * ON EXIT: + * returns nothing. + * Sets the module-wide variable current_prot. + */ +void HTAA_setCurrentProtection(const char *cur_docname, + const char *prot_filename, + const char *ids) +{ + current_prot = NULL; /* Not free()'d because this is in cache */ + + if (prot_filename) { + current_prot = HTAAProt_new(cur_docname, prot_filename, ids); + } else { + if (default_prot) { + current_prot = default_prot; + HTAA_setIds(current_prot, ids); + CTRACE((tfp, "%s %s %s\n", + "HTAA_setCurrentProtection: Protection file", + "not specified for Protect rule", + "-- using default protection")); + } else { + CTRACE((tfp, "%s %s %s\n", + "HTAA_setCurrentProtection: ERROR: Protection", + "file not specified for Protect rule, and", + "default protection is not set!!")); + } + } +} + +/* PUBLIC HTAA_getCurrentProtection() + * GET CURRENT PROTECTION SETUP STRUCTURE + * (this is set up by callbacks made from + * the rule system when matching "protect" + * (and "defprot") rules) + * ON ENTRY: + * HTTranslate() must have been called before calling + * this function. + * + * ON EXIT: + * returns a HTAAProt structure representing the + * protection setup of the HTTranslate()'d file. + * This must not be free()'d. + */ +HTAAProt *HTAA_getCurrentProtection(void) +{ + return current_prot; +} + +/* PUBLIC HTAA_getDefaultProtection() + * GET DEFAULT PROTECTION SETUP STRUCTURE + * AND SET IT TO CURRENT PROTECTION + * (this is set up by callbacks made from + * the rule system when matching "defprot" + * rules) + * ON ENTRY: + * HTTranslate() must have been called before calling + * this function. + * + * ON EXIT: + * returns a HTAAProt structure representing the + * default protection setup of the HTTranslate()'d + * file (if HTAA_getCurrentProtection() returned + * NULL, i.e., if there is no "protect" rule + * but ACL exists, and we need to know default + * protection settings). + * This must not be free()'d. + * IMPORTANT: + * As a side-effect this tells the protection system that + * the file is in fact protected and sets the current + * protection mode to default. + */ +HTAAProt *HTAA_getDefaultProtection(void) +{ + if (!current_prot) { + current_prot = default_prot; + default_prot = NULL; + } + return current_prot; +} + +/* SERVER INTERNAL HTAA_clearProtections() + * CLEAR DOCUMENT PROTECTION MODE + * (ALSO DEFAULT PROTECTION) + * (called by the rule system) + * ON ENTRY: + * No arguments. + * + * ON EXIT: + * returns nothing. + * Frees the memory used by protection information. + */ +void HTAA_clearProtections(void) +{ + current_prot = NULL; /* These are not freed because */ + default_prot = NULL; /* they are actually in cache. */ +} + +typedef struct { + char *name; + int user; +} USER_DATA; + +#ifndef NOUSERS +static HTList *known_grp = NULL; +static HTList *known_pwd = NULL; +static BOOL uidgid_cache_inited = NO; +#endif + +#ifdef LY_FIND_LEAKS +static void clear_uidgid_cache(void) +{ +#ifndef NOUSERS + USER_DATA *data; + + if (known_grp) { + while ((data = HTList_removeLastObject(known_grp)) != NULL) { + FREE(data->name); + FREE(data); + } + FREE(known_grp); + } + if (known_pwd) { + while ((data = HTList_removeLastObject(known_pwd)) != NULL) { + FREE(data->name); + FREE(data); + } + FREE(known_pwd); + } +#endif +} +#endif /* LY_FIND_LEAKS */ + +#ifndef NOUSERS +static void save_gid_info(const char *name, int user) +{ + USER_DATA *data = typecalloc(USER_DATA); + + if (!data) + return; + if (!known_grp) { + known_grp = HTList_new(); + if (!uidgid_cache_inited) { +#ifdef LY_FIND_LEAKS + atexit(clear_uidgid_cache); +#endif + uidgid_cache_inited = YES; + } + } + StrAllocCopy(data->name, name); + data->user = user; + HTList_addObject(known_grp, data); +} +#endif /* NOUSERS */ + +#ifndef NOUSERS +static void save_uid_info(const char *name, int user) +{ + USER_DATA *data = typecalloc(USER_DATA); + + if (!data) + return; + if (!known_pwd) { + known_pwd = HTList_new(); + if (!uidgid_cache_inited) { +#ifdef LY_FIND_LEAKS + atexit(clear_uidgid_cache); +#endif + uidgid_cache_inited = YES; + } + } + StrAllocCopy(data->name, name); + data->user = user; + HTList_addObject(known_pwd, data); +} +#endif /* !NOUSERS */ + +/* PUBLIC HTAA_UidToName + * GET THE USER NAME + * ON ENTRY: + * The user-id + * + * ON EXIT: + * returns the user name, or an empty string if not found. + */ +const char *HTAA_UidToName(int uid GCC_UNUSED) +{ +#ifndef NOUSERS + struct passwd *pw; + HTList *me = known_pwd; + + while (HTList_nextObject(me)) { + USER_DATA *data = (USER_DATA *) (me->object); + + if (uid == data->user) + return data->name; + } + + if ((pw = getpwuid((uid_t) uid)) != 0 + && pw->pw_name != 0) { + CTRACE((tfp, "%s(%d) returned (%s:%d:...)\n", + "HTAA_UidToName: getpwuid", + uid, + pw->pw_name, (int) pw->pw_uid)); + save_uid_info(pw->pw_name, (int) pw->pw_uid); + return pw->pw_name; + } +#endif + return ""; +} + +/* PUBLIC HTAA_NameToUid + * GET THE USER ID + * ON ENTRY: + * The user-name + * + * ON EXIT: + * returns the user id, or NONESUCH if not found. + */ +int HTAA_NameToUid(const char *name GCC_UNUSED) +{ +#ifndef NOUSERS + struct passwd *pw; + HTList *me = known_pwd; + + while (HTList_nextObject(me)) { + USER_DATA *data = (USER_DATA *) (me->object); + + if (!strcmp(name, data->name)) + return data->user; + } + + if ((pw = getpwnam(name)) != 0) { + CTRACE((tfp, "%s(%s) returned (%s:%d:...)\n", + "HTAA_NameToUid: getpwnam", + name, + pw->pw_name, (int) pw->pw_uid)); + save_uid_info(pw->pw_name, (int) pw->pw_uid); + return (int) pw->pw_uid; + } +#endif + return NONESUCH; +} + +/* PUBLIC HTAA_GidToName + * GET THE GROUP NAME + * ON ENTRY: + * The group-id + * + * ON EXIT: + * returns the group name, or an empty string if not found. + */ +const char *HTAA_GidToName(int gid GCC_UNUSED) +{ +#ifndef NOUSERS + struct group *gr; + HTList *me = known_grp; + + while (HTList_nextObject(me)) { + USER_DATA *data = (USER_DATA *) (me->object); + + if (gid == data->user) + return data->name; + } + + if ((gr = getgrgid((gid_t) gid)) != 0 + && gr->gr_name != 0) { + CTRACE((tfp, "%s(%d) returned (%s:%d:...)\n", + "HTAA_GidToName: getgrgid", + gid, + gr->gr_name, (int) gr->gr_gid)); + save_gid_info(gr->gr_name, (int) gr->gr_gid); + return gr->gr_name; + } +#endif + return ""; +} + +/* PUBLIC HTAA_NameToGid + * GET THE GROUP ID + * ON ENTRY: + * The group-name + * + * ON EXIT: + * returns the group id, or NONESUCH if not found. + */ +int HTAA_NameToGid(const char *name GCC_UNUSED) +{ +#ifndef NOUSERS + struct group *gr; + HTList *me = known_grp; + + while (HTList_nextObject(me)) { + USER_DATA *data = (USER_DATA *) (me->object); + + if (!strcmp(name, data->name)) + return data->user; + } + + if ((gr = getgrnam(name)) != 0) { + CTRACE((tfp, "%s(%s) returned (%s:%d:...)\n", + "HTAA_NameToGid: getgrnam", + name, + gr->gr_name, (int) gr->gr_gid)); + save_gid_info(gr->gr_name, (int) gr->gr_gid); + return (int) gr->gr_gid; + } +#endif + return NONESUCH; +} diff --git a/WWW/Library/Implementation/HTAAProt.h b/WWW/Library/Implementation/HTAAProt.h new file mode 100644 index 00000000..22e3d928 --- /dev/null +++ b/WWW/Library/Implementation/HTAAProt.h @@ -0,0 +1,226 @@ +/* PROTECTION SETUP FILE + + */ + +#ifndef HTAAPROT_H +#define HTAAPROT_H + +#include <HTGroup.h> +#include <HTAssoc.h> + +#ifdef __cplusplus +extern "C" { +#endif +/* + +Server's Representation of Document (Tree) Protections + + */ typedef struct { + char *ctemplate; /* Template for this protection */ + char *filename; /* Current document file */ + char *uid_name; /* Effective uid (name of it) */ + char *gid_name; /* Effective gid (name of it) */ + GroupDef *mask_group; /* Allowed users and IP addresses */ + HTList *valid_schemes; /* Valid authentication schemes */ + HTAssocList *values; /* Association list for scheme specific */ + /* parameters. */ + } HTAAProt; + +/* + +Callbacks for rule system + + The following three functioncs are called by the rule system: + + HTAA_clearProtections() when starting to translate a filename + + HTAA_setDefaultProtection() when "defprot" rule is matched + + HTAA_setCurrentProtection() when "protect" rule is matched + + Protection setup files are cached by these functions. + + */ + +/* PUBLIC HTAA_setDefaultProtection() + * SET THE DEFAULT PROTECTION MODE + * (called by rule system when a + * "defprot" rule is matched) + * ON ENTRY: + * cur_docname is the current result of rule translations. + * prot_filename is the protection setup file (second argument + * for "defprot" rule, optional) + * eff_ids contains user and group names separated by + * a dot, corresponding to the effective uid + * gid under which the server should run, + * default is "nobody.nogroup" (third argument + * for "defprot" rule, optional; can be given + * only if protection setup file is also given). + * + * ON EXIT: + * returns nothing. + * Sets the module-wide variable default_prot. + */ + extern void HTAA_setDefaultProtection(const char *cur_docname, + const char *prot_filename, + const char *eff_ids); + +/* PUBLIC HTAA_setCurrentProtection() + * SET THE CURRENT PROTECTION MODE + * (called by rule system when a + * "protect" rule is matched) + * ON ENTRY: + * cur_docname is the current result of rule translations. + * prot_filename is the protection setup file (second argument + * for "protect" rule, optional) + * eff_ids contains user and group names separated by + * a dot, corresponding to the effective uid + * gid under which the server should run, + * default is "nobody.nogroup" (third argument + * for "protect" rule, optional; can be given + * only if protection setup file is also given). + * + * ON EXIT: + * returns nothing. + * Sets the module-wide variable current_prot. + */ + extern void HTAA_setCurrentProtection(const char *cur_docname, + const char *prot_filename, + const char *eff_ids); + +/* SERVER INTERNAL HTAA_clearProtections() + * CLEAR DOCUMENT PROTECTION MODE + * (ALSO DEFAULT PROTECTION) + * (called by the rule system) + * ON ENTRY: + * No arguments. + * + * ON EXIT: + * returns nothing. + * Frees the memory used by protection information. + */ + extern void HTAA_clearProtections(void); + +/* + +Getting Protection Settings + + HTAA_getCurrentProtection() returns the current protection mode (if there was a + "protect" rule). NULL, if no "protect" rule has been matched. + + HTAA_getDefaultProtection() sets the current protection mode to what it was set to + by "defprot" rule and also returns it (therefore after this call also + HTAA_getCurrentProtection() returns the same structure. + + */ + +/* PUBLIC HTAA_getCurrentProtection() + * GET CURRENT PROTECTION SETUP STRUCTURE + * (this is set up by callbacks made from + * the rule system when matching "protect" + * (and "defprot") rules) + * ON ENTRY: + * HTTranslate() must have been called before calling + * this function. + * + * ON EXIT: + * returns a HTAAProt structure representing the + * protection setup of the HTTranslate()'d file. + * This must not be free()'d. + */ + extern HTAAProt *HTAA_getCurrentProtection(void); + +/* PUBLIC HTAA_getDefaultProtection() + * GET DEFAULT PROTECTION SETUP STRUCTURE + * (this is set up by callbacks made from + * the rule system when matching "defprot" + * rules) + * ON ENTRY: + * HTTranslate() must have been called before calling + * this function. + * + * ON EXIT: + * returns a HTAAProt structure representing the + * default protection setup of the HTTranslate()'d + * file (if HTAA_getCurrentProtection() returned + * NULL, i.e., if there is no "protect" rule + * but ACL exists, and we need to know default + * protection settings). + * This must not be free()'d. + */ + extern HTAAProt *HTAA_getDefaultProtection(void); + +/* + +Get User and Group IDs to Which Set to + + */ + +#ifndef NOUSERS +/* PUBLIC HTAA_getUid() + * GET THE USER ID TO CHANGE THE PROCESS UID TO + * ON ENTRY: + * No arguments. + * + * ON EXIT: + * returns the uid number to give to setuid() system call. + * Default is 65534 (nobody). + */ + extern int HTAA_getUid(void); + +/* PUBLIC HTAA_getGid() + * GET THE GROUP ID TO CHANGE THE PROCESS GID TO + * ON ENTRY: + * No arguments. + * + * ON EXIT: + * returns the uid number to give to setgid() system call. + * Default is 65534 (nogroup). + */ + extern int HTAA_getGid(void); +#endif /* !NOUSERS */ + +/* PUBLIC HTAA_UidToName + * GET THE USER NAME + * ON ENTRY: + * The user-id + * + * ON EXIT: + * returns the user name + */ + extern const char *HTAA_UidToName(int uid); + +/* PUBLIC HTAA_NameToUid + * GET THE USER ID + * ON ENTRY: + * The user-name + * + * ON EXIT: + * returns the user id + */ + extern int HTAA_NameToUid(const char *name); + +/* PUBLIC HTAA_GidToName + * GET THE GROUP NAME + * ON ENTRY: + * The group-id + * + * ON EXIT: + * returns the group name + */ + extern const char *HTAA_GidToName(int gid); + +/* PUBLIC HTAA_NameToGid + * GET THE GROUP ID + * ON ENTRY: + * The group-name + * + * ON EXIT: + * returns the group id + */ + extern int HTAA_NameToGid(const char *name); + +#ifdef __cplusplus +} +#endif +#endif /* not HTAAPROT_H */ diff --git a/WWW/Library/Implementation/HTAAUtil.c b/WWW/Library/Implementation/HTAAUtil.c new file mode 100644 index 00000000..8a6caab6 --- /dev/null +++ b/WWW/Library/Implementation/HTAAUtil.c @@ -0,0 +1,606 @@ +/* + * $LynxId: HTAAUtil.c,v 1.31 2010/09/24 23:51:22 tom Exp $ + * + * MODULE HTAAUtil.c + * COMMON PARTS OF ACCESS AUTHORIZATION MODULE + * FOR BOTH SERVER AND BROWSER + * + * IMPORTANT: + * Routines in this module use dynamic allocation, but free + * automatically all the memory reserved by them. + * + * Therefore the caller never has to (and never should) + * free() any object returned by these functions. + * + * Therefore also all the strings returned by this package + * are only valid until the next call to the same function + * is made. This approach is selected, because of the nature + * of access authorization: no string returned by the package + * needs to be valid longer than until the next call. + * + * This also makes it easy to plug the AA package in: + * you don't have to ponder whether to free() something + * here or is it done somewhere else (because it is always + * done somewhere else). + * + * The strings that the package needs to store are copied + * so the original strings given as parameters to AA + * functions may be freed or modified with no side effects. + * + * The AA package does not free() anything else than what + * it has itself allocated. + * + * AA (Access Authorization) package means modules which + * names start with HTAA. + * + * AUTHORS: + * AL Ari Luotonen luotonen@dxcern.cern.ch + * MD Mark Donszelmann duns@vxdeop.cern.ch + * + * HISTORY: + * 8 Nov 93 MD (VMS only) Added case insensitive comparison in HTAA_templateCaseMatch + * + * + * BUGS: + * + * + */ + +#include <HTUtils.h> + +#include <HTAAUtil.h> /* Implemented here */ +#include <HTAssoc.h> /* Assoc list */ +#include <HTTCP.h> +#include <HTTP.h> + +#include <LYStrings.h> +#include <LYUtils.h> +#include <LYLeaks.h> + +/* PUBLIC HTAAScheme_enum() + * TRANSLATE SCHEME NAME INTO + * A SCHEME ENUMERATION + * + * ON ENTRY: + * name is a string representing the scheme name. + * + * ON EXIT: + * returns the enumerated constant for that scheme. + */ +HTAAScheme HTAAScheme_enum(const char *name) +{ + char *upcased = NULL; + + if (!name) + return HTAA_UNKNOWN; + + StrAllocCopy(upcased, name); + LYUpperCase(upcased); + + if (!StrNCmp(upcased, "NONE", 4)) { + FREE(upcased); + return HTAA_NONE; + } else if (!StrNCmp(upcased, "BASIC", 5)) { + FREE(upcased); + return HTAA_BASIC; + } else if (!StrNCmp(upcased, "PUBKEY", 6)) { + FREE(upcased); + return HTAA_PUBKEY; + } else if (!StrNCmp(upcased, "KERBEROSV4", 10)) { + FREE(upcased); + return HTAA_KERBEROS_V4; + } else if (!StrNCmp(upcased, "KERBEROSV5", 10)) { + FREE(upcased); + return HTAA_KERBEROS_V5; + } else { + FREE(upcased); + return HTAA_UNKNOWN; + } +} + +/* PUBLIC HTAAScheme_name() + * GET THE NAME OF A GIVEN SCHEME + * ON ENTRY: + * scheme is one of the scheme enum values: + * HTAA_NONE, HTAA_BASIC, HTAA_PUBKEY, ... + * + * ON EXIT: + * returns the name of the scheme, i.e. + * "None", "Basic", "Pubkey", ... + */ +const char *HTAAScheme_name(HTAAScheme scheme) +{ + switch (scheme) { + case HTAA_NONE: + return "None"; + case HTAA_BASIC: + return "Basic"; + case HTAA_PUBKEY: + return "Pubkey"; + case HTAA_KERBEROS_V4: + return "KerberosV4"; + case HTAA_KERBEROS_V5: + return "KerberosV5"; + case HTAA_UNKNOWN: + return "UNKNOWN"; + default: + return "THIS-IS-A-BUG"; + } +} + +/* PUBLIC HTAAMethod_enum() + * TRANSLATE METHOD NAME INTO AN ENUMERATED VALUE + * ON ENTRY: + * name is the method name to translate. + * + * ON EXIT: + * returns HTAAMethod enumerated value corresponding + * to the given name. + */ +HTAAMethod HTAAMethod_enum(const char *name) +{ + if (!name) + return METHOD_UNKNOWN; + + if (0 == strcasecomp(name, "GET")) + return METHOD_GET; + else if (0 == strcasecomp(name, "PUT")) + return METHOD_PUT; + else + return METHOD_UNKNOWN; +} + +/* PUBLIC HTAAMethod_name() + * GET THE NAME OF A GIVEN METHOD + * ON ENTRY: + * method is one of the method enum values: + * METHOD_GET, METHOD_PUT, ... + * + * ON EXIT: + * returns the name of the scheme, i.e. + * "GET", "PUT", ... + */ +const char *HTAAMethod_name(HTAAMethod method) +{ + switch (method) { + case METHOD_GET: + return "GET"; + case METHOD_PUT: + return "PUT"; + case METHOD_UNKNOWN: + return "UNKNOWN"; + default: + return "THIS-IS-A-BUG"; + } +} + +/* PUBLIC HTAAMethod_inList() + * IS A METHOD IN A LIST OF METHOD NAMES + * ON ENTRY: + * method is the method to look for. + * list is a list of method names. + * + * ON EXIT: + * returns YES, if method was found. + * NO, if not found. + */ +BOOL HTAAMethod_inList(HTAAMethod method, HTList *list) +{ + HTList *cur = list; + char *item; + + while (NULL != (item = (char *) HTList_nextObject(cur))) { + CTRACE((tfp, " %s", item)); + if (method == HTAAMethod_enum(item)) + return YES; + } + + return NO; /* Not found */ +} + +/* PUBLIC HTAA_templateMatch() + * STRING COMPARISON FUNCTION FOR FILE NAMES + * WITH ONE WILDCARD * IN THE TEMPLATE + * NOTE: + * This is essentially the same code as in HTRules.c, but it + * cannot be used because it is embedded in between other code. + * (In fact, HTRules.c should use this routine, but then this + * routine would have to be more sophisticated... why is life + * sometimes so hard...) + * + * ON ENTRY: + * ctemplate is a template string to match the file name + * against, may contain a single wildcard + * character * which matches zero or more + * arbitrary characters. + * filename is the filename (or pathname) to be matched + * against the template. + * + * ON EXIT: + * returns YES, if filename matches the template. + * NO, otherwise. + */ +BOOL HTAA_templateMatch(const char *ctemplate, + const char *filename) +{ + const char *p = ctemplate; + const char *q = filename; + int m; + + for (; *p && *q && *p == *q; p++, q++) /* Find first mismatch */ + ; /* do nothing else */ + + if (!*p && !*q) + return YES; /* Equally long equal strings */ + else if ('*' == *p) { /* Wildcard */ + p++; /* Skip wildcard character */ + m = (int) (strlen(q) - strlen(p)); /* Amount to match to wildcard */ + if (m < 0) + return NO; /* No match, filename too short */ + else { /* Skip the matched characters and compare */ + if (strcmp(p, q + m)) + return NO; /* Tail mismatch */ + else + return YES; /* Tail match */ + } + /* if wildcard */ + } else + return NO; /* Length or character mismatch */ +} + +/* PUBLIC HTAA_templateCaseMatch() + * STRING COMPARISON FUNCTION FOR FILE NAMES + * WITH ONE WILDCARD * IN THE TEMPLATE (Case Insensitive) + * NOTE: + * This is essentially the same code as in HTAA_templateMatch, but + * it compares case insensitive (for VMS). Reason for this routine + * is that HTAA_templateMatch gets called from several places, also + * there where a case sensitive match is needed, so one cannot just + * change the HTAA_templateMatch routine for VMS. + * + * ON ENTRY: + * template is a template string to match the file name + * against, may contain a single wildcard + * character * which matches zero or more + * arbitrary characters. + * filename is the filename (or pathname) to be matched + * against the template. + * + * ON EXIT: + * returns YES, if filename matches the template. + * NO, otherwise. + */ +BOOL HTAA_templateCaseMatch(const char *ctemplate, + const char *filename) +{ + const char *p = ctemplate; + const char *q = filename; + int m; + + /* Find first mismatch */ + for (; *p && *q && TOUPPER(*p) == TOUPPER(*q); p++, q++) ; /* do nothing else */ + + if (!*p && !*q) + return YES; /* Equally long equal strings */ + else if ('*' == *p) { /* Wildcard */ + p++; /* Skip wildcard character */ + m = (int) (strlen(q) - strlen(p)); /* Amount to match to wildcard */ + if (m < 0) + return NO; /* No match, filename too short */ + else { /* Skip the matched characters and compare */ + if (strcasecomp(p, q + m)) + return NO; /* Tail mismatch */ + else + return YES; /* Tail match */ + } + /* if wildcard */ + } else + return NO; /* Length or character mismatch */ +} + +/* PUBLIC HTAA_makeProtectionTemplate() + * CREATE A PROTECTION TEMPLATE FOR THE FILES + * IN THE SAME DIRECTORY AS THE GIVEN FILE + * (Used by server if there is no fancier way for + * it to tell the client, and by browser if server + * didn't send WWW-ProtectionTemplate: field) + * ON ENTRY: + * docname is the document pathname (from URL). + * + * ON EXIT: + * returns a template matching docname, and other files + * files in that directory. + * + * E.g. /foo/bar/x.html => /foo/bar/ * + * ^ + * Space only to prevent it from + * being a comment marker here, + * there really isn't any space. + */ +char *HTAA_makeProtectionTemplate(const char *docname) +{ + char *ctemplate = NULL; + char *slash = NULL; + + if (docname) { + StrAllocCopy(ctemplate, docname); + slash = strrchr(ctemplate, '/'); + if (slash) + slash++; + else + slash = ctemplate; + *slash = '\0'; + StrAllocCat(ctemplate, "*"); + } else + StrAllocCopy(ctemplate, "*"); + + CTRACE((tfp, "make_template: made template `%s' for file `%s'\n", + ctemplate, docname)); + + return ctemplate; +} + +/* + * Skip leading whitespace from *s forward + */ +#define SKIPWS(s) while (*s==' ' || *s=='\t') s++; + +/* + * Kill trailing whitespace starting from *(s-1) backwards + */ +#define KILLWS(s) {char *c=s-1; while (*c==' ' || *c=='\t') *(c--)='\0';} + +/* PUBLIC HTAA_parseArgList() + * PARSE AN ARGUMENT LIST GIVEN IN A HEADER FIELD + * ON ENTRY: + * str is a comma-separated list: + * + * item, item, item + * where + * item ::= value + * | name=value + * | name="value" + * + * Leading and trailing whitespace is ignored + * everywhere except inside quotes, so the following + * examples are equal: + * + * name=value,foo=bar + * name="value",foo="bar" + * name = value , foo = bar + * name = "value" , foo = "bar" + * + * ON EXIT: + * returns a list of name-value pairs (actually HTAssocList*). + * For items with no name, just value, the name is + * the number of order number of that item. E.g. + * "1" for the first, etc. + */ +HTAssocList *HTAA_parseArgList(char *str) +{ + HTAssocList *assoc_list = HTAssocList_new(); + char *cur = NULL; + char *name = NULL; + int n = 0; + + if (!str) + return assoc_list; + + while (*str) { + SKIPWS(str); /* Skip leading whitespace */ + cur = str; + n++; + + while (*cur && *cur != '=' && *cur != ',') + cur++; /* Find end of name (or lonely value without a name) */ + KILLWS(cur); /* Kill trailing whitespace */ + + if (*cur == '=') { /* Name followed by a value */ + *(cur++) = '\0'; /* Terminate name */ + StrAllocCopy(name, str); + SKIPWS(cur); /* Skip WS leading the value */ + str = cur; + if (*str == '"') { /* Quoted value */ + str++; + cur = str; + while (*cur && *cur != '"') + cur++; + if (*cur == '"') + *(cur++) = '\0'; /* Terminate value */ + /* else it is lacking terminating quote */ + SKIPWS(cur); /* Skip WS leading comma */ + if (*cur == ',') + cur++; /* Skip separating colon */ + } else { /* Unquoted value */ + while (*cur && *cur != ',') + cur++; + KILLWS(cur); /* Kill trailing whitespace */ + if (*cur == ',') + *(cur++) = '\0'; + /* else *cur already NULL */ + } + } else { /* No name, just a value */ + if (*cur == ',') + *(cur++) = '\0'; /* Terminate value */ + /* else last value on line (already terminated by NULL) */ + HTSprintf0(&name, "%d", n); /* Item order number for name */ + } + HTAssocList_add(assoc_list, name, str); + str = cur; + } /* while *str */ + + FREE(name); + return assoc_list; +} + +/************** HEADER LINE READER -- DOES UNFOLDING *************************/ + +#define BUFFER_SIZE 1024 + +static size_t buffer_length; +static char *buffer = 0; +static char *start_pointer; +static char *end_pointer; +static int in_soc = -1; + +#ifdef LY_FIND_LEAKS +static void FreeHTAAUtil(void) +{ + FREE(buffer); +} +#endif /* LY_FIND_LEAKS */ + +/* PUBLIC HTAA_setupReader() + * SET UP HEADER LINE READER, i.e., give + * the already-read-but-not-yet-processed + * buffer of text to be read before more + * is read from the socket. + * ON ENTRY: + * start_of_headers is a pointer to a buffer containing + * the beginning of the header lines + * (rest will be read from a socket). + * length is the number of valid characters in + * 'start_of_headers' buffer. + * soc is the socket to use when start_of_headers + * buffer is used up. + * ON EXIT: + * returns nothing. + * Subsequent calls to HTAA_getUnfoldedLine() + * will use this buffer first and then + * proceed to read from socket. + */ +void HTAA_setupReader(char *start_of_headers, + size_t length, + int soc) +{ + if (!start_of_headers) + length = 0; /* initialize length (is this reached at all?) */ + if (buffer == NULL) { /* first call? */ + buffer_length = length; + if (buffer_length < BUFFER_SIZE) /* would fall below BUFFER_SIZE? */ + buffer_length = BUFFER_SIZE; + buffer = (char *) malloc((size_t) (sizeof(char) * (buffer_length + 1))); + } else if (length > buffer_length) { /* need more space? */ + buffer_length = length; + buffer = (char *) realloc((char *) buffer, + (size_t) (sizeof(char) * (buffer_length + 1))); + } + if (buffer == NULL) + outofmem(__FILE__, "HTAA_setupReader"); + + assert(buffer != NULL); + +#ifdef LY_FIND_LEAKS + atexit(FreeHTAAUtil); +#endif + start_pointer = buffer; + if (start_of_headers) { + StrNCpy(buffer, start_of_headers, length); + buffer[length] = '\0'; + end_pointer = buffer + length; + } else { + *start_pointer = '\0'; + end_pointer = start_pointer; + } + in_soc = soc; +} + +/* PUBLIC HTAA_getUnfoldedLine() + * READ AN UNFOLDED HEADER LINE FROM SOCKET + * ON ENTRY: + * HTAA_setupReader must absolutely be called before + * this function to set up internal buffer. + * + * ON EXIT: + * returns a newly-allocated character string representing + * the read line. The line is unfolded, i.e. + * lines that begin with whitespace are appended + * to current line. E.g. + * + * Field-Name: Blaa-Blaa + * This-Is-A-Continuation-Line + * Here-Is_Another + * + * is seen by the caller as: + * + * Field-Name: Blaa-Blaa This-Is-A-Continuation-Line Here-Is_Another + * + */ +char *HTAA_getUnfoldedLine(void) +{ + char *line = NULL; + char *cur; + int count; + BOOL peek_for_folding = NO; + + if (in_soc < 0) { + CTRACE((tfp, "%s %s\n", + "HTAA_getUnfoldedLine: buffer not initialized", + "with function HTAA_setupReader()")); + return NULL; + } + + for (;;) { + + /* Reading from socket */ + + if (start_pointer >= end_pointer) { /*Read the next block and continue */ +#ifdef USE_SSL + if (SSL_handle) + count = SSL_read(SSL_handle, buffer, BUFFER_SIZE); + else + count = NETREAD(in_soc, buffer, BUFFER_SIZE); +#else + count = NETREAD(in_soc, buffer, BUFFER_SIZE); +#endif /* USE_SSL */ + if (count <= 0) { + in_soc = -1; + return line; + } + start_pointer = buffer; + end_pointer = buffer + count; + *end_pointer = '\0'; +#ifdef NOT_ASCII + cur = start_pointer; + while (cur < end_pointer) { + *cur = TOASCII(*cur); + cur++; + } +#endif /*NOT_ASCII */ + } + cur = start_pointer; + + /* Unfolding */ + + if (peek_for_folding) { + if (*cur != ' ' && *cur != '\t') + return line; /* Ok, no continuation line */ + else /* So this is a continuation line, continue */ + peek_for_folding = NO; + } + + /* Finding end-of-line */ + + while (cur < end_pointer && *cur != '\n') /* Find the end-of-line */ + cur++; /* (or end-of-buffer). */ + + /* Terminating line */ + + if (cur < end_pointer) { /* So *cur==LF, terminate line */ + *cur = '\0'; /* Overwrite LF */ + if (*(cur - 1) == '\r') + *(cur - 1) = '\0'; /* Overwrite CR */ + peek_for_folding = YES; /* Check for a continuation line */ + } + + /* Copying the result */ + + if (line) + StrAllocCat(line, start_pointer); /* Append */ + else + StrAllocCopy(line, start_pointer); /* A new line */ + + start_pointer = cur + 1; /* Skip the read line */ + + } /* forever */ +} diff --git a/WWW/Library/Implementation/HTAAUtil.h b/WWW/Library/Implementation/HTAAUtil.h new file mode 100644 index 00000000..33a8ee30 --- /dev/null +++ b/WWW/Library/Implementation/HTAAUtil.h @@ -0,0 +1,318 @@ +/* + * $LynxId: HTAAUtil.h,v 1.13 2010/10/27 00:09:52 tom Exp $ + * + * Utilities for the Authorization parts of libwww + * COMMON PARTS OF AUTHORIZATION MODULE TO BOTH SERVER AND BROWSER + * + * This module is the interface to the common parts of Access Authorization (AA) package + * for both server and browser. Important to know about memory allocation: + * + * Routines in this module use dynamic allocation, but free automatically all the memory + * reserved by them. + * + * Therefore the caller never has to (and never should) free() any object returned by + * these functions. + * + * Therefore also all the strings returned by this package are only valid until the next + * call to the same function is made. This approach is selected, because of the nature of + * access authorization: no string returned by the package needs to be valid longer than + * until the next call. + * + * This also makes it easy to plug the AA package in: you don't have to ponder whether to + * free() something here or is it done somewhere else (because it is always done somewhere + * else). + * + * The strings that the package needs to store are copied so the original strings given as + * parameters to AA functions may be freed or modified with no side effects. + * + * Also note: The AA package does not free() anything else than what it has itself + * allocated. + * + */ + +#ifndef HTAAUTIL_H +#define HTAAUTIL_H + +#include <HTList.h> + +#ifdef __cplusplus +extern "C" { +#endif +/* + * Numeric constants + */ +#define MAX_USERNAME_LEN 16 /* @@ Longest allowed username */ +#define MAX_PASSWORD_LEN 3*13 /* @@ Longest allowed password */ + /* (encrypted, so really only 3*8) */ +#define MAX_METHODNAME_LEN 12 /* @@ Longest allowed method name */ +#define MAX_FIELDNAME_LEN 16 /* @@ Longest field name in */ + /* protection setup file */ +#define MAX_PATHNAME_LEN 80 /* @@ Longest passwd/group file */ +/* pathname to allow */ +/* + + Datatype definitions + + HTAASCHEME + + The enumeration HTAAScheme represents the possible authentication schemes used by the + WWW Access Authorization. + + */ typedef enum { + HTAA_UNKNOWN, + HTAA_NONE, + HTAA_BASIC, + HTAA_PUBKEY, + HTAA_KERBEROS_V4, + HTAA_KERBEROS_V5, + HTAA_MAX_SCHEMES /* THIS MUST ALWAYS BE LAST! Number of schemes */ + } HTAAScheme; + +/* + + ENUMERATION TO REPRESENT HTTP METHODS + + */ + + typedef enum { + METHOD_UNKNOWN, + METHOD_GET, + METHOD_PUT + } HTAAMethod; + +/* + +Authentication Schemes + + */ + +/* PUBLIC HTAAScheme_enum() + * TRANSLATE SCHEME NAME TO A SCHEME ENUMERATION + * ON ENTRY: + * name is a string representing the scheme name. + * + * ON EXIT: + * returns the enumerated constant for that scheme. + */ + extern HTAAScheme HTAAScheme_enum(const char *name); + +/* PUBLIC HTAAScheme_name() + * GET THE NAME OF A GIVEN SCHEME + * ON ENTRY: + * scheme is one of the scheme enum values: + * HTAA_NONE, HTAA_BASIC, HTAA_PUBKEY, ... + * + * ON EXIT: + * returns the name of the scheme, i.e. + * "none", "basic", "pubkey", ... + */ + extern const char *HTAAScheme_name(HTAAScheme scheme); + +/* + +Methods + + */ + +/* PUBLIC HTAAMethod_enum() + * TRANSLATE METHOD NAME INTO AN ENUMERATED VALUE + * ON ENTRY: + * name is the method name to translate. + * + * ON EXIT: + * returns HTAAMethod enumerated value corresponding + * to the given name. + */ + extern HTAAMethod HTAAMethod_enum(const char *name); + +/* PUBLIC HTAAMethod_name() + * GET THE NAME OF A GIVEN METHOD + * ON ENTRY: + * method is one of the method enum values: + * METHOD_GET, METHOD_PUT, ... + * + * ON EXIT: + * returns the name of the scheme, i.e. + * "GET", "PUT", ... + */ + extern const char *HTAAMethod_name(HTAAMethod method); + +/* PUBLIC HTAAMethod_inList() + * IS A METHOD IN A LIST OF METHOD NAMES + * ON ENTRY: + * method is the method to look for. + * list is a list of method names. + * + * ON EXIT: + * returns YES, if method was found. + * NO, if not found. + */ + extern BOOL HTAAMethod_inList(HTAAMethod method, HTList *list); + +/* + +Match Template Against Filename + + */ + +/* PUBLIC HTAA_templateMatch() + * STRING COMPARISON FUNCTION FOR FILE NAMES + * WITH ONE WILDCARD * IN THE TEMPLATE + * NOTE: + * This is essentially the same code as in HTRules.c, but it + * cannot be used because it is embedded in between other code. + * (In fact, HTRules.c should use this routine, but then this + * routine would have to be more sophisticated... why is life + * sometimes so hard...) + * + * ON ENTRY: + * ctemplate is a template string to match the file name + * against, may contain a single wildcard + * character * which matches zero or more + * arbitrary characters. + * filename is the filename (or pathname) to be matched + * against the template. + * + * ON EXIT: + * returns YES, if filename matches the template. + * NO, otherwise. + */ + extern BOOL HTAA_templateMatch(const char *ctemplate, + const char *filename); + +/* PUBLIC HTAA_templateCaseMatch() + * STRING COMPARISON FUNCTION FOR FILE NAMES + * WITH ONE WILDCARD * IN THE TEMPLATE (Case Insensitive) + * NOTE: + * This is essentially the same code as in HTAA_templateMatch, but + * it compares case insensitive (for VMS). Reason for this routine + * is that HTAA_templateMatch gets called from several places, also + * there where a case sensitive match is needed, so one cannot just + * change the HTAA_templateMatch routine for VMS. + * + * ON ENTRY: + * ctemplate is a template string to match the file name + * against, may contain a single wildcard + * character * which matches zero or more + * arbitrary characters. + * filename is the filename (or pathname) to be matched + * against the template. + * + * ON EXIT: + * returns YES, if filename matches the template. + * NO, otherwise. + */ + extern BOOL HTAA_templateCaseMatch(const char *ctemplate, + const char *filename); + +/* PUBLIC HTAA_makeProtectionTemplate() + * CREATE A PROTECTION TEMPLATE FOR THE FILES + * IN THE SAME DIRECTORY AS THE GIVEN FILE + * (Used by server if there is no fancier way for + * it to tell the client, and by browser if server + * didn't send WWW-ProtectionTemplate: field) + * ON ENTRY: + * docname is the document pathname (from URL). + * + * ON EXIT: + * returns a template matching docname, and other files + * files in that directory. + * + * E.g. /foo/bar/x.html => /foo/bar/ * + * ^ + * Space only to prevent it from + * being a comment marker here, + * there really isn't any space. + */ + extern char *HTAA_makeProtectionTemplate(const char *docname); + +/* + +MIME Argument List Parser + + */ + +/* PUBLIC HTAA_parseArgList() + * PARSE AN ARGUMENT LIST GIVEN IN A HEADER FIELD + * ON ENTRY: + * str is a comma-separated list: + * + * item, item, item + * where + * item ::= value + * | name=value + * | name="value" + * + * Leading and trailing whitespace is ignored + * everywhere except inside quotes, so the following + * examples are equal: + * + * name=value,foo=bar + * name="value",foo="bar" + * name = value , foo = bar + * name = "value" , foo = "bar" + * + * ON EXIT: + * returns a list of name-value pairs (actually HTAssocList*). + * For items with no name, just value, the name is + * the number of order number of that item. E.g. + * "1" for the first, etc. + */ + extern HTList *HTAA_parseArgList(char *str); + +/* + +Header Line Reader + + */ + +/* PUBLIC HTAA_setupReader() + * SET UP HEADER LINE READER, i.e., give + * the already-read-but-not-yet-processed + * buffer of text to be read before more + * is read from the socket. + * ON ENTRY: + * start_of_headers is a pointer to a buffer containing + * the beginning of the header lines + * (rest will be read from a socket). + * length is the number of valid characters in + * 'start_of_headers' buffer. + * soc is the socket to use when start_of_headers + * buffer is used up. + * ON EXIT: + * returns nothing. + * Subsequent calls to HTAA_getUnfoldedLine() + * will use this buffer first and then + * proceed to read from socket. + */ + extern void HTAA_setupReader(char *start_of_headers, + size_t length, + int soc); + +/* PUBLIC HTAA_getUnfoldedLine() + * READ AN UNFOLDED HEADER LINE FROM SOCKET + * ON ENTRY: + * HTAA_setupReader must absolutely be called before + * this function to set up internal buffer. + * + * ON EXIT: + * returns a newly-allocated character string representing + * the read line. The line is unfolded, i.e. + * lines that begin with whitespace are appended + * to current line. E.g. + * + * Field-Name: Blaa-Blaa + * This-Is-A-Continuation-Line + * Here-Is_Another + * + * is seen by the caller as: + * + * Field-Name: Blaa-Blaa This-Is-A-Continuation-Line Here-Is_Another + * + */ + extern char *HTAA_getUnfoldedLine(void); + +#ifdef __cplusplus +} +#endif +#endif /* NOT HTAAUTIL_H */ diff --git a/WWW/Library/Implementation/HTAccess.c b/WWW/Library/Implementation/HTAccess.c new file mode 100644 index 00000000..2b287e9d --- /dev/null +++ b/WWW/Library/Implementation/HTAccess.c @@ -0,0 +1,1447 @@ +/* + * $LynxId: HTAccess.c,v 1.76 2012/02/04 00:15:53 tom Exp $ + * + * Access Manager HTAccess.c + * ============== + * + * Authors + * TBL Tim Berners-Lee timbl@info.cern.ch + * JFG Jean-Francois Groff jfg@dxcern.cern.ch + * DD Denis DeLaRoca (310) 825-4580 <CSP1DWD@mvs.oac.ucla.edu> + * FM Foteos Macrides macrides@sci.wfeb.edu + * PDM Danny Mayer mayer@ljo.dec.com + * + * History + * 8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL + * 26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG + * 6 Oct 92 Moved HTClientHost and logfile into here. TBL + * 17 Dec 92 Tn3270 added, bug fix. DD + * 4 Feb 93 Access registration, Search escapes bad chars TBL + * PARAMETERS TO HTSEARCH AND HTLOADRELATIVE CHANGED + * 28 May 93 WAIS gateway explicit if no WAIS library linked in. + * 31 May 94 Added DIRECT_WAIS support for VMS. FM + * 27 Jan 95 Fixed proxy support to use NNTPSERVER for checking + * whether or not to use the proxy server. PDM + * 27 Jan 95 Ensured that proxy service will be overridden for files + * on the local host (because HTLoadFile() doesn't try ftp + * for those) and will substitute ftp for remote files. FM + * 28 Jan 95 Tweaked PDM's proxy override mods to handle port info + * for news and wais URL's. FM + * + * Bugs + * This module assumes that that the graphic object is hypertext, as it + * needs to select it when it has been loaded. A superclass needs to be + * defined which accepts select and select_anchor. + */ + +#ifdef VMS +#define DIRECT_WAIS +#endif /* VMS */ + +#include <HTUtils.h> +#include <HTTP.h> +#include <HTAlert.h> +/* + * Implements: + */ +#include <HTAccess.h> + +/* + * Uses: + */ +#include <HTParse.h> +#include <HTML.h> /* SCW */ + +#ifndef NO_RULES +#include <HTRules.h> +#endif + +#include <HTList.h> +#include <HText.h> /* See bugs above */ +#include <HTCJK.h> +#include <UCMap.h> +#include <GridText.h> + +#include <LYGlobalDefs.h> +#include <LYexit.h> +#include <LYStrings.h> +#include <LYUtils.h> +#include <LYLeaks.h> + +/* + * These flags may be set to modify the operation of this module + */ +char *HTClientHost = NULL; /* Name of remote login host if any */ +FILE *HTlogfile = NULL; /* File to which to output one-liners */ +BOOL HTSecure = NO; /* Disable access for telnet users? */ +BOOL HTPermitRedir = NO; /* Always allow redirection in getfile()? */ + +BOOL using_proxy = NO; /* are we using a proxy gateway? */ + +/* + * To generate other things, play with these: + */ +HTFormat HTOutputFormat = NULL; +HTStream *HTOutputStream = NULL; /* For non-interactive, set this */ + +static HTList *protocols = NULL; /* List of registered protocol descriptors */ + +char *use_this_url_instead = NULL; + +static int pushed_assume_LYhndl = -1; /* see LYUC* functions below - kw */ +static char *pushed_assume_MIMEname = NULL; + +#ifdef LY_FIND_LEAKS +static void free_protocols(void) +{ + HTList_delete(protocols); + protocols = NULL; + FREE(pushed_assume_MIMEname); /* shouldn't happen, just in case - kw */ +} +#endif /* LY_FIND_LEAKS */ + +/* Register a Protocol. HTRegisterProtocol() + * -------------------- + */ +BOOL HTRegisterProtocol(HTProtocol * protocol) +{ + if (!protocols) { + protocols = HTList_new(); +#ifdef LY_FIND_LEAKS + atexit(free_protocols); +#endif + } + HTList_addObject(protocols, protocol); + return YES; +} + +/* Register all known protocols. HTAccessInit() + * ----------------------------- + * + * Add to or subtract from this list if you add or remove protocol + * modules. This routine is called the first time the protocol list + * is needed, unless any protocols are already registered, in which + * case it is not called. Therefore the application can override + * this list. + * + * Compiling with NO_INIT prevents all known protocols from being + * forced in at link time. + */ +#ifndef NO_INIT +#ifdef GLOBALREF_IS_MACRO +extern GLOBALREF (HTProtocol, HTTP); +extern GLOBALREF (HTProtocol, HTTPS); +extern GLOBALREF (HTProtocol, HTFile); +extern GLOBALREF (HTProtocol, HTTelnet); +extern GLOBALREF (HTProtocol, HTTn3270); +extern GLOBALREF (HTProtocol, HTRlogin); + +#ifndef DECNET +#ifndef DISABLE_FTP +extern GLOBALREF (HTProtocol, HTFTP); +#endif /* DISABLE_FTP */ +#ifndef DISABLE_NEWS +extern GLOBALREF (HTProtocol, HTNews); +extern GLOBALREF (HTProtocol, HTNNTP); +extern GLOBALREF (HTProtocol, HTNewsPost); +extern GLOBALREF (HTProtocol, HTNewsReply); +extern GLOBALREF (HTProtocol, HTSNews); +extern GLOBALREF (HTProtocol, HTSNewsPost); +extern GLOBALREF (HTProtocol, HTSNewsReply); +#endif /* not DISABLE_NEWS */ +#ifndef DISABLE_GOPHER +extern GLOBALREF (HTProtocol, HTGopher); +extern GLOBALREF (HTProtocol, HTCSO); +#endif /* not DISABLE_GOPHER */ +#ifndef DISABLE_FINGER +extern GLOBALREF (HTProtocol, HTFinger); +#endif /* not DISABLE_FINGER */ +#ifdef DIRECT_WAIS +extern GLOBALREF (HTProtocol, HTWAIS); +#endif /* DIRECT_WAIS */ +#endif /* !DECNET */ +#else +GLOBALREF HTProtocol HTTP, HTTPS, HTFile, HTTelnet, HTTn3270, HTRlogin; + +#ifndef DECNET +#ifndef DISABLE_FTP +GLOBALREF HTProtocol HTFTP; +#endif /* DISABLE_FTP */ +#ifndef DISABLE_NEWS +GLOBALREF HTProtocol HTNews, HTNNTP, HTNewsPost, HTNewsReply; +GLOBALREF HTProtocol HTSNews, HTSNewsPost, HTSNewsReply; +#endif /* not DISABLE_NEWS */ +#ifndef DISABLE_GOPHER +GLOBALREF HTProtocol HTGopher, HTCSO; +#endif /* not DISABLE_GOPHER */ +#ifndef DISABLE_FINGER +GLOBALREF HTProtocol HTFinger; +#endif /* not DISABLE_FINGER */ +#ifdef DIRECT_WAIS +GLOBALREF HTProtocol HTWAIS; +#endif /* DIRECT_WAIS */ +#endif /* !DECNET */ +#endif /* GLOBALREF_IS_MACRO */ + +static void HTAccessInit(void) /* Call me once */ +{ + HTRegisterProtocol(&HTTP); + HTRegisterProtocol(&HTTPS); + HTRegisterProtocol(&HTFile); + HTRegisterProtocol(&HTTelnet); + HTRegisterProtocol(&HTTn3270); + HTRegisterProtocol(&HTRlogin); +#ifndef DECNET +#ifndef DISABLE_FTP + HTRegisterProtocol(&HTFTP); +#endif /* DISABLE_FTP */ +#ifndef DISABLE_NEWS + HTRegisterProtocol(&HTNews); + HTRegisterProtocol(&HTNNTP); + HTRegisterProtocol(&HTNewsPost); + HTRegisterProtocol(&HTNewsReply); + HTRegisterProtocol(&HTSNews); + HTRegisterProtocol(&HTSNewsPost); + HTRegisterProtocol(&HTSNewsReply); +#endif /* not DISABLE_NEWS */ +#ifndef DISABLE_GOPHER + HTRegisterProtocol(&HTGopher); + HTRegisterProtocol(&HTCSO); +#endif /* not DISABLE_GOPHER */ +#ifndef DISABLE_FINGER + HTRegisterProtocol(&HTFinger); +#endif /* not DISABLE_FINGER */ +#ifdef DIRECT_WAIS + HTRegisterProtocol(&HTWAIS); +#endif /* DIRECT_WAIS */ +#endif /* !DECNET */ + LYRegisterLynxProtocols(); +} +#endif /* !NO_INIT */ + +/* Check for proxy override. override_proxy() + * ------------------------- + * + * Check the no_proxy environment variable to get the list + * of hosts for which proxy server is not consulted. + * + * no_proxy is a comma- or space-separated list of machine + * or domain names, with optional :port part. If no :port + * part is present, it applies to all ports on that domain. + * + * Example: + * no_proxy="cern.ch,some.domain:8001" + * + * Use "*" to override all proxy service: + * no_proxy="*" + */ +BOOL override_proxy(const char *addr) +{ + const char *no_proxy = getenv("no_proxy"); + char *p = NULL; + char *at = NULL; + char *host = NULL; + char *Host = NULL; + char *acc_method = NULL; + int port = 0; + int h_len = 0; + + /* + * Check for global override. + */ + if (no_proxy) { + if (!strcmp(no_proxy, "*")) + return YES; + } + + /* + * Never proxy file:// URLs if they are on the local host. HTLoadFile() + * will not attempt ftp for those if direct access fails. We'll check that + * first, in case no_proxy hasn't been defined. - FM + */ + if (!addr) + return NO; + if (!(host = HTParse(addr, "", PARSE_HOST))) + return NO; + if (!*host) { + FREE(host); + return NO; + } + Host = (((at = strchr(host, '@')) != NULL) ? (at + 1) : host); + + if ((acc_method = HTParse(addr, "", PARSE_ACCESS))) { + if (!strcmp("file", acc_method) && + (LYSameHostname(Host, "localhost") || + LYSameHostname(Host, HTHostName()))) { + FREE(host); + FREE(acc_method); + return YES; + } + FREE(acc_method); + } + + if (!no_proxy) { + FREE(host); + return NO; + } + + if (NULL != (p = HTParsePort(Host, &port))) { /* Port specified */ + *p = 0; /* Chop off port */ + } else { /* Use default port */ + acc_method = HTParse(addr, "", PARSE_ACCESS); + if (acc_method != NULL) { + /* *INDENT-OFF* */ + if (!strcmp(acc_method, "http")) port = 80; + else if (!strcmp(acc_method, "https")) port = 443; + else if (!strcmp(acc_method, "ftp")) port = 21; +#ifndef DISABLE_GOPHER + else if (!strcmp(acc_method, "gopher")) port = 70; +#endif + else if (!strcmp(acc_method, "cso")) port = 105; +#ifndef DISABLE_NEWS + else if (!strcmp(acc_method, "news")) port = 119; + else if (!strcmp(acc_method, "nntp")) port = 119; + else if (!strcmp(acc_method, "newspost")) port = 119; + else if (!strcmp(acc_method, "newsreply")) port = 119; + else if (!strcmp(acc_method, "snews")) port = 563; + else if (!strcmp(acc_method, "snewspost")) port = 563; + else if (!strcmp(acc_method, "snewsreply")) port = 563; +#endif + else if (!strcmp(acc_method, "wais")) port = 210; +#ifndef DISABLE_FINGER + else if (!strcmp(acc_method, "finger")) port = 79; +#endif + else if (!strcmp(acc_method, "telnet")) port = 23; + else if (!strcmp(acc_method, "tn3270")) port = 23; + else if (!strcmp(acc_method, "rlogin")) port = 513; + /* *INDENT-ON* */ + + FREE(acc_method); + } + } + if (!port) + port = 80; /* Default */ + h_len = (int) strlen(Host); + + while (*no_proxy) { + const char *end; + const char *colon = NULL; + int templ_port = 0; + int t_len; + int brackets = 0; + + while (*no_proxy && (WHITE(*no_proxy) || *no_proxy == ',')) + no_proxy++; /* Skip whitespace and separators */ + + end = no_proxy; + while (*end && !WHITE(*end) && *end != ',') { /* Find separator */ + if (!brackets && (*end == ':')) + colon = end; /* Port number given */ + else if (*end == '[') + ++brackets; + else if (*end == ']') + --brackets; + end++; + } + + if (colon) { + /* unlike HTParsePort(), this may be followed by non-digits */ + templ_port = atoi(colon + 1); + t_len = (int) (colon - no_proxy); + } else { + t_len = (int) (end - no_proxy); + } + + if ((!templ_port || templ_port == port) && + (t_len > 0 && t_len <= h_len && + !strncasecomp(Host + h_len - t_len, no_proxy, t_len))) { + FREE(host); + return YES; + } +#ifdef CJK_EX /* ASATAKU PROXY HACK */ + if ((!templ_port || templ_port == port) && + (t_len > 0 && t_len <= h_len && + isdigit(UCH(*no_proxy)) && + !StrNCmp(host, no_proxy, t_len))) { + FREE(host); + return YES; + } +#endif /* ASATAKU PROXY HACK */ + + if (*end) + no_proxy = (end + 1); + else + break; + } + + FREE(host); + return NO; +} + +/* Find physical name and access protocol get_physical() + * -------------------------------------- + * + * On entry, + * addr must point to the fully qualified hypertext reference. + * anchor a parent anchor with whose address is addr + * + * On exit, + * returns HT_NO_ACCESS Error has occurred. + * HT_OK Success + */ +static int get_physical(const char *addr, + HTParentAnchor *anchor) +{ + int result; + char *acc_method = NULL; /* Name of access method */ + char *physical = NULL; + char *Server_addr = NULL; + BOOL override_flag = NO; + + CTRACE((tfp, "get_physical %s\n", addr)); + + /* + * Make sure the using_proxy variable is FALSE. + */ + using_proxy = NO; + +#ifndef NO_RULES + if ((physical = HTTranslate(addr)) == 0) { + if (redirecting_url) { + return HT_REDIRECTING; + } + return HT_FORBIDDEN; + } + if (anchor->isISMAPScript == TRUE) { + StrAllocCat(physical, "?0,0"); + CTRACE((tfp, "HTAccess: Appending '?0,0' coordinate pair.\n")); + } + if (!StrNCmp(physical, "Proxied=", 8)) { + HTAnchor_setPhysical(anchor, physical + 8); + using_proxy = YES; + } else if (!StrNCmp(physical, "NoProxy=", 8)) { + HTAnchor_setPhysical(anchor, physical + 8); + override_flag = YES; + } else { + HTAnchor_setPhysical(anchor, physical); + } + FREE(physical); /* free our copy */ +#else + if (anchor->isISMAPScript == TRUE) { + StrAllocCopy(physical, addr); + StrAllocCat(physical, "?0,0"); + CTRACE((tfp, "HTAccess: Appending '?0,0' coordinate pair.\n")); + HTAnchor_setPhysical(anchor, physical); + FREE(physical); /* free our copy */ + } else { + HTAnchor_setPhysical(anchor, addr); + } +#endif /* NO_RULES */ + + acc_method = HTParse(HTAnchor_physical(anchor), STR_FILE_URL, PARSE_ACCESS); + + /* + * Check whether gateway access has been set up for this. + * + * This function can be replaced by the rule system above. + * + * If the rule system has already determined that we should use a proxy, or + * that we shouldn't, ignore proxy-related settings, don't use no_proxy + * either. + */ +#define USE_GATEWAYS +#ifdef USE_GATEWAYS + + if (!override_flag && !using_proxy) { /* else ignore no_proxy env var */ + char *host = NULL; + int port; + + if (!strcasecomp(acc_method, "news")) { + /* + * News is different, so we need to check the name of the server, + * as well as the default port for selective exclusions. + */ + if ((host = HTParse(addr, "", PARSE_HOST))) { + if (HTParsePort(host, &port) == NULL) { + StrAllocCopy(Server_addr, "news://"); + StrAllocCat(Server_addr, host); + StrAllocCat(Server_addr, ":119/"); + } + FREE(host); + } else if (LYGetEnv("NNTPSERVER") != NULL) { + StrAllocCopy(Server_addr, "news://"); + StrAllocCat(Server_addr, LYGetEnv("NNTPSERVER")); + StrAllocCat(Server_addr, ":119/"); + } + } else if (!strcasecomp(acc_method, "wais")) { + /* + * Wais also needs checking of the default port for selective + * exclusions. + */ + if ((host = HTParse(addr, "", PARSE_HOST))) { + if (!(HTParsePort(host, &port))) { + StrAllocCopy(Server_addr, "wais://"); + StrAllocCat(Server_addr, host); + StrAllocCat(Server_addr, ":210/"); + } + FREE(host); + } else + StrAllocCopy(Server_addr, addr); + } else { + StrAllocCopy(Server_addr, addr); + } + override_flag = override_proxy(Server_addr); + } + + if (!override_flag && !using_proxy) { + char *gateway_parameter = NULL, *gateway, *proxy; + + /* + * Search for gateways. + */ + HTSprintf0(&gateway_parameter, "WWW_%s_GATEWAY", acc_method); + gateway = LYGetEnv(gateway_parameter); /* coerce for decstation */ + + /* + * Search for proxy servers. + */ + if (!strcmp(acc_method, "file")) + /* + * If we got to here, a file URL is for ftp on a remote host. - FM + */ + strcpy(gateway_parameter, "ftp_proxy"); + else + sprintf(gateway_parameter, "%s_proxy", acc_method); + proxy = LYGetEnv(gateway_parameter); + FREE(gateway_parameter); + + if (gateway) + CTRACE((tfp, "Gateway found: %s\n", gateway)); + if (proxy) + CTRACE((tfp, "proxy server found: %s\n", proxy)); + + /* + * Proxy servers have precedence over gateway servers. + */ + if (proxy) { + char *gatewayed = NULL; + + StrAllocCopy(gatewayed, proxy); + if (!StrNCmp(gatewayed, "http", 4)) { + char *cp = strrchr(gatewayed, '/'); + + /* Append a slash to the proxy specification if it doesn't + * end in one but otherwise looks normal (starts with "http", + * has no '/' other than ones before the hostname). - kw */ + if (cp && (cp - gatewayed) <= 7) + LYAddHtmlSep(&gatewayed); + } + /* + * Ensure that the proxy server uses ftp for file URLs. - FM + */ + if (!StrNCmp(addr, "file", 4)) { + StrAllocCat(gatewayed, "ftp"); + StrAllocCat(gatewayed, (addr + 4)); + } else + StrAllocCat(gatewayed, addr); + using_proxy = YES; + if (anchor->isISMAPScript == TRUE) + StrAllocCat(gatewayed, "?0,0"); + HTAnchor_setPhysical(anchor, gatewayed); + FREE(gatewayed); + FREE(acc_method); + + acc_method = HTParse(HTAnchor_physical(anchor), + STR_HTTP_URL, PARSE_ACCESS); + + } else if (gateway) { + char *path = HTParse(addr, "", + PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION); + + /* Chop leading / off to make host into part of path */ + char *gatewayed = HTParse(path + 1, gateway, PARSE_ALL); + + FREE(path); + HTAnchor_setPhysical(anchor, gatewayed); + FREE(gatewayed); + FREE(acc_method); + + acc_method = HTParse(HTAnchor_physical(anchor), + STR_HTTP_URL, PARSE_ACCESS); + } + } + FREE(Server_addr); +#endif /* use gateways */ + + /* + * Search registered protocols to find suitable one. + */ + result = HT_NO_ACCESS; + { + int i, n; + +#ifndef NO_INIT + if (!protocols) + HTAccessInit(); +#endif + n = HTList_count(protocols); + for (i = 0; i < n; i++) { + HTProtocol *p = (HTProtocol *) HTList_objectAt(protocols, i); + + if (!strcmp(p->name, acc_method)) { + HTAnchor_setProtocol(anchor, p); + FREE(acc_method); + result = HT_OK; + break; + } + } + } + + FREE(acc_method); + return result; +} + +/* + * Temporarily set the int UCLYhndl_for_unspec and string UCLYhndl_for_unspec + * used for charset "assuming" to the values implied by a HTParentAnchor's + * UCStages, after saving the current values for later restoration. - kw @@@ + * These functions may not really belong here, but where else? I want the + * "pop" to occur as soon as possible after loading has finished. - kw @@@ + */ +void LYUCPushAssumed(HTParentAnchor *anchor) +{ + int anchor_LYhndl = -1; + LYUCcharset *anchor_UCI = NULL; + + if (anchor) { + anchor_LYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_PARSER); + if (anchor_LYhndl >= 0) + anchor_UCI = HTAnchor_getUCInfoStage(anchor, + UCT_STAGE_PARSER); + if (anchor_UCI && anchor_UCI->MIMEname) { + pushed_assume_MIMEname = UCAssume_MIMEcharset; + UCAssume_MIMEcharset = NULL; + if (HTCJK == JAPANESE) + StrAllocCopy(UCAssume_MIMEcharset, pushed_assume_MIMEname); + else + StrAllocCopy(UCAssume_MIMEcharset, anchor_UCI->MIMEname); + pushed_assume_LYhndl = anchor_LYhndl; + /* some diagnostics */ + if (UCLYhndl_for_unspec != anchor_LYhndl) + CTRACE((tfp, + "LYUCPushAssumed: UCLYhndl_for_unspec changed %d -> %d\n", + UCLYhndl_for_unspec, + anchor_LYhndl)); + UCLYhndl_for_unspec = anchor_LYhndl; + return; + } + } + pushed_assume_LYhndl = -1; + FREE(pushed_assume_MIMEname); +} + +/* + * Restore the int UCLYhndl_for_unspec and string UCLYhndl_for_unspec used for + * charset "assuming" from the values saved by LYUCPushAssumed, if any. - kw + */ +int LYUCPopAssumed(void) +{ + if (pushed_assume_LYhndl >= 0) { + /* some diagnostics */ + if (UCLYhndl_for_unspec != pushed_assume_LYhndl) + CTRACE((tfp, + "LYUCPopAssumed: UCLYhndl_for_unspec changed %d -> %d\n", + UCLYhndl_for_unspec, + pushed_assume_LYhndl)); + UCLYhndl_for_unspec = pushed_assume_LYhndl; + pushed_assume_LYhndl = -1; + FREE(UCAssume_MIMEcharset); + UCAssume_MIMEcharset = pushed_assume_MIMEname; + pushed_assume_MIMEname = NULL; + return UCLYhndl_for_unspec; + } + return -1; +} + +/* Load a document HTLoad() + * --------------- + * + * This is an internal routine, which has an address AND a matching + * anchor. (The public routines are called with one OR the other.) + * + * On entry, + * addr must point to the fully qualified hypertext reference. + * anchor a parent anchor with whose address is addr + * + * On exit, + * returns <0 Error has occurred. + * HT_LOADED Success + * HT_NO_DATA Success, but no document loaded. + * (telnet session started etc) + */ +static int HTLoad(const char *addr, + HTParentAnchor *anchor, + HTFormat format_out, + HTStream *sink) +{ + HTProtocol *p; + int status = get_physical(addr, anchor); + + if (reloading) { + FREE(anchor->charset); + FREE(anchor->UCStages); + } + + if (status == HT_FORBIDDEN) { + /* prevent crash if telnet or similar was forbidden by rule. - kw */ + LYFixCursesOn("show alert:"); + status = HTLoadError(sink, 500, gettext("Access forbidden by rule")); + } else if (status == HT_REDIRECTING) { + ; /* fake redirection by rule, to redirecting_url */ + } else if (status >= 0) { + /* prevent crash if telnet or similar mapped or proxied by rule. - kw */ + LYFixCursesOnForAccess(addr, HTAnchor_physical(anchor)); + p = (HTProtocol *) HTAnchor_protocol(anchor); + anchor->parent->underway = TRUE; /* Hack to deal with caching */ + status = p->load(HTAnchor_physical(anchor), + anchor, format_out, sink); + anchor->parent->underway = FALSE; + LYUCPopAssumed(); + } + return status; +} + +/* Get a save stream for a document HTSaveStream() + * -------------------------------- + */ +HTStream *HTSaveStream(HTParentAnchor *anchor) +{ + HTProtocol *p = (HTProtocol *) HTAnchor_protocol(anchor); + + if (!p) + return NULL; + + return p->saveStream(anchor); +} + +int redirection_attempts = 0; /* counter in HTLoadDocument */ + +/* Load a document - with logging etc HTLoadDocument() + * ---------------------------------- + * + * - Checks or documents already loaded + * - Logs the access + * - Allows stdin filter option + * - Trace output and error messages + * + * On Entry, + * anchor is the node_anchor for the document + * full_address The address of the document to be accessed. + * filter if YES, treat stdin as HTML + * + * On Exit, + * returns YES Success in opening document + * NO Failure + */ +static BOOL HTLoadDocument(const char *full_address, /* may include #fragment */ + HTParentAnchor *anchor, + HTFormat format_out, + HTStream *sink) +{ + int status; + HText *text; + const char *address_to_load = full_address; + char *cp; + BOOL ForcingNoCache = LYforce_no_cache; + + CTRACE((tfp, "HTAccess: loading document %s\n", address_to_load)); + + /* + * Free use_this_url_instead and reset permanent_redirection if not done + * elsewhere. - FM + */ + FREE(use_this_url_instead); + permanent_redirection = FALSE; + + /* + * Make sure some yoyo doesn't send us 'round in circles with redirecting + * URLs that point back to themselves. We'll set the original Lynx limit + * of 10 redirections per requested URL from a user, because the HTTP/1.1 + * will no longer specify a restriction to 5, but will leave it up to the + * browser's discretion, in deference to Microsoft. - FM + */ + if (redirection_attempts > 10) { + redirection_attempts = 0; + HTAlert(TOO_MANY_REDIRECTIONS); + return NO; + } + + /* + * If this is marked as an internal link but we don't have the document + * loaded any more, and we haven't explicitly flagged that we want to + * reload with LYforce_no_cache, then something has disappeared from the + * cache when we expected it to be still there. The user probably doesn't + * expect a new network access. So if we have POST data and safe is not + * set in the anchor, ask for confirmation, and fail if not granted. The + * exception are LYNXIMGMAP documents, for which we defer to LYLoadIMGmap + * for prompting if necessary. - kw + */ + text = (HText *) HTAnchor_document(anchor); + if (LYinternal_flag && !text && !LYforce_no_cache && + anchor->post_data && !anchor->safe && + !isLYNXIMGMAP(full_address) && + HTConfirm(gettext("Document with POST content not found in cache. Resubmit?")) + != TRUE) { + return NO; + } + + /* + * If we don't have POST content, check whether this is a previous + * redirecting URL, and keep re-checking until we get to the final + * destination or redirection limit. If we do have POST content, we didn't + * allow permanent redirection, and an interactive user will be deciding + * whether to keep redirecting. - FM + */ + if (!anchor->post_data) { + while ((cp = HTAnchor_physical(anchor)) != NULL && + !StrNCmp(cp, "Location=", 9)) { + DocAddress NewDoc; + + CTRACE((tfp, "HTAccess: '%s' is a redirection URL.\n", + anchor->address)); + CTRACE((tfp, "HTAccess: Redirecting to '%s'\n", cp + 9)); + + /* + * Don't exceed the redirection_attempts limit. - FM + */ + if (++redirection_attempts > 10) { + HTAlert(TOO_MANY_REDIRECTIONS); + redirection_attempts = 0; + FREE(use_this_url_instead); + return NO; + } + + /* + * Set up the redirection. - FM + */ + StrAllocCopy(use_this_url_instead, cp + 9); + NewDoc.address = use_this_url_instead; + NewDoc.post_data = NULL; + NewDoc.post_content_type = NULL; + NewDoc.bookmark = anchor->bookmark; + NewDoc.isHEAD = anchor->isHEAD; + NewDoc.safe = anchor->safe; + anchor = HTAnchor_findAddress(&NewDoc); + } + } + /* + * If we had previous redirection, go back and check out that the URL under + * the current restrictions. - FM + */ + if (use_this_url_instead) { + FREE(redirecting_url); + return (NO); + } + + /* + * See if we can use an already loaded document. + */ + text = (HText *) HTAnchor_document(anchor); + if (text && !LYforce_no_cache) { + /* + * We have a cached rendition of the target document. Check if it's OK + * to re-use it. We consider it OK if: + * (1) the anchor does not have the no_cache element set, or + * (2) we've overridden it, e.g., because we are acting on a PREV_DOC + * command or a link in the History Page and it's not a reply from a + * POST with the LYresubmit_posts flag set, or + * (3) we are repositioning within the currently loaded document based + * on the target anchor's address (URL_Reference). + * + * If DONT_TRACK_INTERNAL_LINKS is defined, HText_AreDifferent() is + * used to determine whether (3) applies. If the target address + * differs from that of the current document only by a fragment and the + * target address has an appended fragment, repositioning without + * reloading is always assumed. Note that HText_AreDifferent() + * currently always returns TRUE if the target has a LYNXIMGMAP URL, so + * that an internally generated pseudo-document will normally not be + * re-used unless condition (2) applies. (Condition (1) cannot apply + * since in LYMap.c, no_cache is always set in the anchor object). + * This doesn't guarantee that the resource from which the MAP element + * is taken will be read again (reloaded) when the list of links for a + * client-side image map is regenerated, when in some cases it should + * (e.g., user requested RELOAD, or HTTP response with no-cache header + * and we are not overriding). + * + * If DONT_TRACK_INTERNAL_LINKS is undefined, a target address that + * points to the same URL as the current document may still result in + * reloading, depending on whether the original URL-Reference was given + * as an internal link in the context of the previously loaded + * document. HText_AreDifferent() is not used here for testing whether + * we are just repositioning. For an internal link, the potential + * callers of this function from mainloop() down will either avoid + * making the call (and do the repositioning differently) or set + * LYinternal_flag (or LYoverride_no_cache). Note that (a) LYNXIMGMAP + * pseudo-documents and (b) The "List Page" document are treated + * logically as being part of the document on which they are based, for + * the purpose of whether to treat a link as internal, but the logic + * for this (by setting LYinternal_flag as necessary) is implemented + * elsewhere. There is a specific test for LYNXIMGMAP here so that the + * generated pseudo-document will not be re-used unless + * LYoverride_no_cache is set. The same caveat as above applies w.r.t. + * reloading of the underlying resource. + * + * We also should be checking other aspects of cache regulation (e.g., + * based on an If-Modified-Since check, etc.) but the code for doing + * those other things isn't available yet. + */ + if (!reloading && + (LYoverride_no_cache || +#ifdef DONT_TRACK_INTERNAL_LINKS + !HText_hasNoCacheSet(text) || + !HText_AreDifferent(anchor, full_address) +#else + ((LYinternal_flag || !HText_hasNoCacheSet(text)) && + !isLYNXIMGMAP(full_address)) +#endif /* TRACK_INTERNAL_LINKS */ + )) { + CTRACE((tfp, "HTAccess: Document already in memory.\n")); + HText_select(text); + +#ifdef DIRED_SUPPORT + if (HTAnchor_format(anchor) == WWW_DIRED) + lynx_edit_mode = TRUE; +#endif + redirection_attempts = 0; + return YES; + } else { + ForcingNoCache = YES; + BStrFree(anchor->post_data); + CTRACE((tfp, "HTAccess: Auto-reloading document.\n")); + } + } + + if (HText_HaveUserChangedForms(text)) { + /* + * Issue a warning. User forms content will be lost. + * Will not restore changed forms, currently. + */ + HTAlert(RELOADING_FORM); + } + + /* + * Get the document from the net. If we are auto-reloading, the mutable + * anchor elements from the previous rendition should be freed in + * conjunction with loading of the new rendition. - FM + */ + LYforce_no_cache = NO; /* reset after each time through */ + if (ForcingNoCache) { + FREE(anchor->title); /* ??? */ + } + status = HTLoad(address_to_load, anchor, format_out, sink); + CTRACE((tfp, "HTAccess: status=%d\n", status)); + + /* + * RECOVERY: if the loading failed, and we had a cached HText copy, and no + * new HText created - use a previous copy, issue a warning. + */ + if (text && status < 0 && (HText *) HTAnchor_document(anchor) == text) { + HTAlert(gettext("Loading failed, use a previous copy.")); + CTRACE((tfp, "HTAccess: Loading failed, use a previous copy.\n")); + HText_select(text); + +#ifdef DIRED_SUPPORT + if (HTAnchor_format(anchor) == WWW_DIRED) + lynx_edit_mode = TRUE; +#endif + redirection_attempts = 0; + return YES; + } + + /* + * Log the access if necessary. + */ + if (HTlogfile) { + time_t theTime; + + time(&theTime); + fprintf(HTlogfile, "%24.24s %s %s %s\n", + ctime(&theTime), + HTClientHost ? HTClientHost : "local", + status < 0 ? "FAIL" : "GET", + full_address); + fflush(HTlogfile); /* Actually update it on disk */ + CTRACE((tfp, "Log: %24.24s %s %s %s\n", + ctime(&theTime), + HTClientHost ? HTClientHost : "local", + status < 0 ? "FAIL" : "GET", + full_address)); + } + + /* + * Check out what we received from the net. + */ + if (status == HT_REDIRECTING) { + /* Exported from HTMIME.c, of all places. */ + /* NO!! - FM */ + /* + * Doing this via HTMIME.c meant that the redirection cover page was + * already loaded before we learned that we want a different URL. + * Also, changing anchor->address, as Lynx was doing, meant we could + * never again access its hash table entry, creating an insolvable + * memory leak. Instead, if we had a 301 status and set + * permanent_redirection, we'll load the new URL in anchor->physical, + * preceded by a token, which we can check to make replacements on + * subsequent access attempts. We'll check recursively, and retrieve + * the final URL if we had multiple redirections to it. If we just + * went to HTLoad now, as Lou originally had this, we couldn't do + * Lynx's security checks and alternate handling of some URL types. + * So, instead, we'll go all the way back to the top of getfile in + * LYGetFile.c when the status is HT_REDIRECTING. This may seem + * bizarre, but it works like a charm! - FM + * + * Actually, the location header for redirections is now again picked + * up in HTMIME.c. But that's an internal matter between HTTP.c and + * HTMIME.c, is still under control of HTLoadHTTP for http URLs, is + * done in a way that doesn't load the redirection response's body + * (except when wanted as an error fallback), and thus need not concern + * us here. - kw 1999-12-02 + */ + CTRACE((tfp, "HTAccess: '%s' is a redirection URL.\n", + address_to_load)); + CTRACE((tfp, "HTAccess: Redirecting to '%s'\n", + redirecting_url)); + /* + * Prevent circular references. + */ + if (strcmp(address_to_load, redirecting_url)) { /* if different */ + /* + * Load token and redirecting url into anchor->physical if we had + * 301 Permanent redirection. HTTP.c does not allow this if we + * have POST content. - FM + */ + if (permanent_redirection) { + StrAllocCopy(anchor->physical, "Location="); + StrAllocCat(anchor->physical, redirecting_url); + } + + /* + * Set up flags before return to getfile. - FM + */ + StrAllocCopy(use_this_url_instead, redirecting_url); + if (ForcingNoCache) + LYforce_no_cache = YES; + ++redirection_attempts; + FREE(redirecting_url); + permanent_redirection = FALSE; + return (NO); + } + ++redirection_attempts; + FREE(redirecting_url); + permanent_redirection = FALSE; + return (YES); + } + + /* + * We did not receive a redirecting URL. - FM + */ + redirection_attempts = 0; + FREE(redirecting_url); + permanent_redirection = FALSE; + + if (status == HT_LOADED) { + CTRACE((tfp, "HTAccess: `%s' has been accessed.\n", + full_address)); + return YES; + } + if (status == HT_PARTIAL_CONTENT) { + HTAlert(gettext("Loading incomplete.")); + CTRACE((tfp, "HTAccess: `%s' has been accessed, partial content.\n", + full_address)); + return YES; + } + + if (status == HT_NO_DATA) { + CTRACE((tfp, "HTAccess: `%s' has been accessed, No data left.\n", + full_address)); + return NO; + } + + if (status == HT_NOT_LOADED) { + CTRACE((tfp, "HTAccess: `%s' has been accessed, No data loaded.\n", + full_address)); + return NO; + } + + if (status == HT_INTERRUPTED) { + CTRACE((tfp, + "HTAccess: `%s' has been accessed, transfer interrupted.\n", + full_address)); + return NO; + } + + if (status > 0) { + /* + * If you get this, then please find which routine is returning a + * positive unrecognized error code! + */ + fprintf(stderr, + gettext("**** HTAccess: socket or file number returned by obsolete load routine!\n")); + fprintf(stderr, + gettext("**** HTAccess: Internal software error. Please mail lynx-dev@nongnu.org!\n")); + fprintf(stderr, gettext("**** HTAccess: Status returned was: %d\n"), status); + exit_immediately(EXIT_FAILURE); + } + + /* Failure in accessing a document */ + cp = NULL; + StrAllocCopy(cp, gettext("Can't Access")); + StrAllocCat(cp, " `"); + StrAllocCat(cp, full_address); + StrAllocCat(cp, "'"); + _HTProgress(cp); + FREE(cp); + + CTRACE((tfp, "HTAccess: Can't access `%s'\n", full_address)); + HTLoadError(sink, 500, gettext("Unable to access document.")); + return NO; +} /* HTLoadDocument */ + +/* Load a document from absolute name. HTLoadAbsolute() + * ----------------------------------- + * + * On Entry, + * addr The absolute address of the document to be accessed. + * filter if YES, treat document as HTML + * + * On Exit, + * returns YES Success in opening document + * NO Failure + */ +BOOL HTLoadAbsolute(const DocAddress *docaddr) +{ + return HTLoadDocument(docaddr->address, + HTAnchor_findAddress(docaddr), + (HTOutputFormat ? HTOutputFormat : WWW_PRESENT), + HTOutputStream); +} + +#ifdef NOT_USED_CODE +/* Load a document from absolute name to stream. HTLoadToStream() + * --------------------------------------------- + * + * On Entry, + * addr The absolute address of the document to be accessed. + * sink if non-NULL, send data down this stream + * + * On Exit, + * returns YES Success in opening document + * NO Failure + */ +BOOL HTLoadToStream(const char *addr, + BOOL filter, + HTStream *sink) +{ + return HTLoadDocument(addr, + HTAnchor_findSimpleAddress(addr), + (HTOutputFormat ? HTOutputFormat : WWW_PRESENT), + sink); +} +#endif /* NOT_USED_CODE */ + +/* Load a document from relative name. HTLoadRelative() + * ----------------------------------- + * + * On Entry, + * relative_name The relative address of the document + * to be accessed. + * + * On Exit, + * returns YES Success in opening document + * NO Failure + */ +BOOL HTLoadRelative(const char *relative_name, + HTParentAnchor *here) +{ + DocAddress full_address; + BOOL result; + char *mycopy = NULL; + char *stripped = NULL; + + full_address.address = NULL; + full_address.post_data = NULL; + full_address.post_content_type = NULL; + full_address.bookmark = NULL; + full_address.isHEAD = FALSE; + full_address.safe = FALSE; + + StrAllocCopy(mycopy, relative_name); + + stripped = HTStrip(mycopy); + full_address.address = + HTParse(stripped, + here->address, + PARSE_ALL_WITHOUT_ANCHOR); + result = HTLoadAbsolute(&full_address); + /* + * If we got redirection, result will be NO, but use_this_url_instead will + * be set. The calling routine should check both and do whatever is + * appropriate. - FM + */ + FREE(full_address.address); + FREE(mycopy); /* Memory leak fixed 10/7/92 -- JFG */ + return result; +} + +/* Load if necessary, and select an anchor. HTLoadAnchor() + * ---------------------------------------- + * + * On Entry, + * destination The child or parent anchor to be loaded. + * + * On Exit, + * returns YES Success + * NO Failure + */ +BOOL HTLoadAnchor(HTAnchor * destination) +{ + HTParentAnchor *parent; + BOOL loaded = NO; + + if (!destination) + return NO; /* No link */ + + parent = HTAnchor_parent(destination); + + if (HTAnchor_document(parent) == NULL) { /* If not already loaded */ + /* TBL 921202 */ + BOOL result; + + result = HTLoadDocument(parent->address, + parent, + HTOutputFormat ? + HTOutputFormat : WWW_PRESENT, + HTOutputStream); + if (!result) + return NO; + loaded = YES; + } { + HText *text = (HText *) HTAnchor_document(parent); + + if ((destination != (HTAnchor *) parent) && + (destination != (HTAnchor *) (parent->parent))) { + /* If child anchor */ + HText_selectAnchor(text, /* Double display? @@ */ + (HTChildAnchor *) destination); + } else { + if (!loaded) + HText_select(text); + } + } + return YES; + +} /* HTLoadAnchor */ + +/* Search. HTSearch() + * ------- + * + * Performs a keyword search on word given by the user. Adds the + * keyword to the end of the current address and attempts to open + * the new address. + * + * On Entry, + * *keywords space-separated keyword list or similar search list + * here is anchor search is to be done on. + */ +static char hex(int i) +{ + const char *hexchars = "0123456789ABCDEF"; + + return hexchars[i]; +} + +BOOL HTSearch(const char *keywords, + HTParentAnchor *here) +{ +#define acceptable \ +"1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_" + + char *q, *u; + const char *p, *s, *e; /* Pointers into keywords */ + char *address = NULL; + BOOL result; + char *escaped = typecallocn(char, (strlen(keywords) * 3) + 1); + static const BOOL isAcceptable[96] = + /* *INDENT-OFF* */ + /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ + { 0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0, /* 2x !"#$%&'()*+,-./ */ + 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 4x @ABCDEFGHIJKLMNO */ + 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* 5X PQRSTUVWXYZ[\]^_ */ + 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 6x `abcdefghijklmno */ + 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; /* 7X pqrstuvwxyz{\}~ DEL */ + /* *INDENT-ON* */ + + if (escaped == NULL) + outofmem(__FILE__, "HTSearch"); + + assert(escaped != NULL); + + StrAllocCopy(address, here->isIndexAction); + + /* + * Convert spaces to + and hex escape unacceptable characters. + */ + for (s = keywords; *s && WHITE(*s); s++) /* Scan */ + ; /* Skip white space */ + for (e = s + strlen(s); e > s && WHITE(*(e - 1)); e--) /* Scan */ + ; /* Skip trailers */ + for (q = escaped, p = s; p < e; p++) { /* Scan stripped field */ + unsigned char c = UCH(TOASCII(*p)); + + if (WHITE(*p)) { + *q++ = '+'; + } else if (IS_CJK_TTY) { + *q++ = *p; + } else if (c >= 32 && c <= UCH(127) && isAcceptable[c - 32]) { + *q++ = *p; /* 930706 TBL for MVS bug */ + } else { + *q++ = '%'; + *q++ = hex((int) (c >> 4)); + *q++ = hex((int) (c & 15)); + } + } /* Loop over string */ + *q = '\0'; /* Terminate escaped string */ + u = strchr(address, '?'); /* Find old search string */ + if (u != NULL) + *u = '\0'; /* Chop old search off */ + + StrAllocCat(address, "?"); + StrAllocCat(address, escaped); + FREE(escaped); + result = HTLoadRelative(address, here); + FREE(address); + + /* + * If we got redirection, result will be NO, but use_this_url_instead will + * be set. The calling routine should check both and do whatever is + * appropriate. Only an http server (not a gopher or wais server) could + * return redirection. Lynx will go all the way back to its mainloop() and + * subject a redirecting URL to all of its security and restrictions + * checks. - FM + */ + return result; +} + +/* Search Given Indexname. HTSearchAbsolute() + * ----------------------- + * + * Performs a keyword search on word given by the user. Adds the + * keyword to the end of the current address and attempts to open + * the new address. + * + * On Entry, + * *keywords space-separated keyword list or similar search list + * *indexname is name of object search is to be done on. + */ +BOOL HTSearchAbsolute(const char *keywords, + char *indexname) +{ + DocAddress abs_doc; + HTParentAnchor *anchor; + + abs_doc.address = indexname; + abs_doc.post_data = NULL; + abs_doc.post_content_type = NULL; + abs_doc.bookmark = NULL; + abs_doc.isHEAD = FALSE; + abs_doc.safe = FALSE; + + anchor = HTAnchor_findAddress(&abs_doc); + return HTSearch(keywords, anchor); +} + +#ifdef NOT_USED_CODE +/* Generate the anchor for the home page. HTHomeAnchor() + * -------------------------------------- + * + * As it involves file access, this should only be done once + * when the program first runs. + * This is a default algorithm -- browser don't HAVE to use this. + * But consistency between browsers is STRONGLY recommended! + * + * Priority order is: + * 1 WWW_HOME environment variable (logical name, etc) + * 2 ~/WWW/default.html + * 3 /usr/local/bin/default.html + * 4 http://www.w3.org/default.html + */ +HTParentAnchor *HTHomeAnchor(void) +{ + char *my_home_document = NULL; + char *home = LYGetEnv(LOGICAL_DEFAULT); + char *ref; + HTParentAnchor *anchor; + + if (home) { + StrAllocCopy(my_home_document, home); +#define MAX_FILE_NAME 1024 /* @@@ */ + } else if (HTClientHost) { /* Telnet server */ + /* + * Someone telnets in, they get a special home. + */ + FILE *fp = fopen(REMOTE_POINTER, "r"); + char *status; + + if (fp) { + my_home_document = typecallocn(char, MAX_FILE_NAME); + + if (my_home_document == NULL) + outofmem(__FILE__, "HTHomeAnchor"); + status = fgets(my_home_document, MAX_FILE_NAME, fp); + if (!status) { + FREE(my_home_document); + } + fclose(fp); + } + if (my_home_document == NULL) + StrAllocCopy(my_home_document, REMOTE_ADDRESS); + } +#ifdef UNIX + if (my_home_document == NULL) { + FILE *fp = NULL; + char *home = LYGetEnv("HOME"); + + if (home != 0) { + HTSprintf0(&my_home_document, "%s/%s", home, PERSONAL_DEFAULT); + fp = fopen(my_home_document, "r"); + } + + if (!fp) { + StrAllocCopy(my_home_document, LOCAL_DEFAULT_FILE); + fp = fopen(my_home_document, "r"); + } + if (fp) { + fclose(fp); + } else { + CTRACE((tfp, "HTBrowse: No local home document ~/%s or %s\n", + PERSONAL_DEFAULT, LOCAL_DEFAULT_FILE)); + FREE(my_home_document); + } + } +#endif /* UNIX */ + ref = HTParse((my_home_document ? + my_home_document : (HTClientHost ? + REMOTE_ADDRESS : LAST_RESORT)), + STR_FILE_URL, + PARSE_ALL_WITHOUT_ANCHOR); + if (my_home_document) { + CTRACE((tfp, "HTAccess: Using custom home page %s i.e., address %s\n", + my_home_document, ref)); + FREE(my_home_document); + } + anchor = HTAnchor_findSimpleAddress(ref); + FREE(ref); + return anchor; +} +#endif /* NOT_USED_CODE */ diff --git a/WWW/Library/Implementation/HTAccess.h b/WWW/Library/Implementation/HTAccess.h new file mode 100644 index 00000000..3aa9c6d9 --- /dev/null +++ b/WWW/Library/Implementation/HTAccess.h @@ -0,0 +1,267 @@ +/* + * $LynxId: HTAccess.h,v 1.20 2008/01/03 00:24:16 tom Exp $ + * HTAccess: Access manager for libwww + * ACCESS MANAGER + * + * This module keeps a list of valid protocol (naming scheme) specifiers with + * associated access code. It allows documents to be loaded given various + * combinations of parameters. New access protocols may be registered at any + * time. + * + * Part of the libwww library . + * + */ +#ifndef HTACCESS_H +#define HTACCESS_H + +/* Definition uses: +*/ +#include <HTAnchor.h> +#include <HTFormat.h> + +#ifdef __cplusplus +extern "C" { +#endif + extern char *use_this_url_instead; + + extern int redirection_attempts; + +/* Return codes from load routines: + * + * These codes may be returned by the protocol modules, + * and by the HTLoad routines. + * In general, positive codes are OK and negative ones are bad. + */ + +/* + +Default Addresses + + These control the home page selection. To mess with these for normal browses is asking + for user confusion. + + */ +#define LOGICAL_DEFAULT "WWW_HOME" /* Defined to be the home page */ + +#ifndef PERSONAL_DEFAULT +#define PERSONAL_DEFAULT "WWW/default.html" /* in home directory */ +#endif +#ifndef LOCAL_DEFAULT_FILE +#define LOCAL_DEFAULT_FILE "/usr/local/lib/WWW/default.html" +#endif +/* If one telnets to a www access point, + it will look in this file for home page */ +#ifndef REMOTE_POINTER +#define REMOTE_POINTER "/etc/www-remote.url" /* can't be file */ +#endif +/* and if that fails it will use this. */ +#ifndef REMOTE_ADDRESS +#define REMOTE_ADDRESS "http://www.w3.org/remote.html" /* can't be file */ +#endif + +/* If run from telnet daemon and no -l specified, use this file: +*/ +#ifndef DEFAULT_LOGFILE +#define DEFAULT_LOGFILE "/usr/adm/www-log/www-log" +#endif + +/* If the home page isn't found, use this file: +*/ +#ifndef LAST_RESORT +#define LAST_RESORT "http://www.w3.org/default.html" +#endif + +/* + +Flags which may be set to control this module + + */ +#ifdef NOT + extern int HTDiag; /* Flag: load source as plain text */ +#endif /* NOT */ + extern char *HTClientHost; /* Name or number of telnetting host */ + extern FILE *HTlogfile; /* File to output one-liners to */ + extern BOOL HTSecure; /* Disable security holes? */ + extern BOOL HTPermitRedir; /* Special flag for getfile() */ + extern HTStream *HTOutputStream; /* For non-interactive, set this */ + extern HTFormat HTOutputFormat; /* To convert on load, set this */ + +/* Check for proxy override. override_proxy() + * + * Check the no_proxy environment variable to get the list + * of hosts for which proxy server is not consulted. + * + * no_proxy is a comma- or space-separated list of machine + * or domain names, with optional :port part. If no :port + * part is present, it applies to all ports on that domain. + * + * Example: + * no_proxy="cern.ch,some.domain:8001" + * + * Use "*" to override all proxy service: + * no_proxy="*" + */ + extern BOOL override_proxy(const char *addr); + +/* + +Load a document from relative name + + ON ENTRY, + relative_name The relative address of the file to be accessed. + here The anchor of the object being searched + + ON EXIT, + returns YES Success in opening file + NO Failure + + */ + extern BOOL HTLoadRelative(const char *relative_name, + HTParentAnchor *here); + +/* + +Load a document from absolute name + + ON ENTRY, + addr The absolute address of the document to be accessed. + filter_it if YES, treat document as HTML + + ON EXIT, + returns YES Success in opening document + NO Failure + + */ + extern BOOL HTLoadAbsolute(const DocAddress *addr); + +/* + +Load a document from absolute name to a stream + + ON ENTRY, + addr The absolute address of the document to be accessed. + filter_it if YES, treat document as HTML + + ON EXIT, + returns YES Success in opening document + NO Failure + + Note: This is equivalent to HTLoadDocument + + */ + extern BOOL HTLoadToStream(const char *addr, BOOL filter_it, + HTStream *sink); + +/* + +Load if necessary, and select an anchor + + ON ENTRY, + destination The child or parent anchor to be loaded. + + ON EXIT, + returns YES Success + returns NO Failure + + */ + extern BOOL HTLoadAnchor(HTAnchor * destination); + +/* + +Make a stream for Saving object back + + ON ENTRY, + anchor is valid anchor which has previously been loaded + + ON EXIT, + returns 0 if error else a stream to save the object to. + + */ + extern HTStream *HTSaveStream(HTParentAnchor *anchor); + +/* + +Search + + Performs a search on word given by the user. Adds the search words to the end of the + current address and attempts to open the new address. + + ON ENTRY, + *keywords space-separated keyword list or similar search list + here The anchor of the object being searched + + */ + extern BOOL HTSearch(const char *keywords, HTParentAnchor *here); + +/* + +Search Given Indexname + + Performs a keyword search on word given by the user. Adds the keyword to the end of + the current address and attempts to open the new address. + + ON ENTRY, + *keywords space-separated keyword list or similar search list + *indexname is name of object search is to be done on. + + */ + extern BOOL HTSearchAbsolute(const char *keywords, + char *indexname); + +/* + +Register an access method + + */ + + typedef struct _HTProtocol { + const char *name; + + int (*load) (const char *full_address, + HTParentAnchor *anchor, + HTFormat format_out, + HTStream *sink); + + HTStream *(*saveStream) (HTParentAnchor *anchor); + + } HTProtocol; + + extern BOOL HTRegisterProtocol(HTProtocol * protocol); + +/* + +Generate the anchor for the home page + + */ + +/* + + As it involves file access, this should only be done once when the program first runs. + This is a default algorithm -- browser don't HAVE to use this. + + */ + extern HTParentAnchor *HTHomeAnchor(void); + +/* + +Return Host Name + + */ + extern const char *HTHostName(void); + +/* + +For registering protocols supported by Lynx + +*/ + extern void LYRegisterLynxProtocols(void); + + extern void LYUCPushAssumed(HTParentAnchor *anchor); + extern int LYUCPopAssumed(void); + + extern BOOL using_proxy; /* Are we using an NNTP proxy? */ + +#ifdef __cplusplus +} +#endif +#endif /* HTACCESS_H */ diff --git a/WWW/Library/Implementation/HTAnchor.c b/WWW/Library/Implementation/HTAnchor.c new file mode 100644 index 00000000..a3ebaeb7 --- /dev/null +++ b/WWW/Library/Implementation/HTAnchor.c @@ -0,0 +1,1468 @@ +/* + * $LynxId: HTAnchor.c,v 1.71 2012/02/03 01:52:50 tom Exp $ + * + * Hypertext "Anchor" Object HTAnchor.c + * ========================== + * + * An anchor represents a region of a hypertext document which is linked to + * another anchor in the same or a different document. + * + * History + * + * Nov 1990 Written in Objective-C for the NeXT browser (TBL) + * 24-Oct-1991 (JFG), written in C, browser-independent + * 21-Nov-1991 (JFG), first complete version + * + * (c) Copyright CERN 1991 - See Copyright.html + */ + +#define HASH_SIZE 1001 /* Arbitrary prime. Memory/speed tradeoff */ + +#include <HTUtils.h> +#include <HTAnchor.h> +#include <HTParse.h> +#include <HTString.h> +#include <UCAux.h> +#include <UCMap.h> + +#include <GridText.h> +#include <LYUtils.h> +#include <LYCharSets.h> +#include <LYUtils.h> +#include <LYLeaks.h> + +#define HASH_TYPE unsigned short + +#ifdef NOT_DEFINED +/* + * This is the hashing function used to determine which list in the + * adult_table a parent anchor should be put in. This is a + * much simpler function than the original used. + */ +#define HASH_FUNCTION(cp_address) \ + ( (HASH_TYPE)strlen(cp_address) *\ + (HASH_TYPE)TOUPPER(*cp_address) % HASH_SIZE ) +#endif /* NOT_DEFINED */ + +/* + * This is the original function. We'll use it again. - FM + */ +static HASH_TYPE HASH_FUNCTION(const char *cp_address) +{ + HASH_TYPE hash; + const unsigned char *p; + + for (p = (const unsigned char *) cp_address, hash = 0; *p; p++) + hash = (HASH_TYPE) (hash * 3 + (*(const unsigned char *) p)) % HASH_SIZE; + + return (hash); +} + +typedef struct _HyperDoc Hyperdoc; + +#ifdef VMS +struct _HyperDoc { + int junk; /* VMS cannot handle pointers to undefined structs */ +}; +#endif /* VMS */ + +/* Table of lists of all parents */ +static HTList adult_table[HASH_SIZE] = +{ + {NULL, NULL}}; + +/* Creation Methods + * ================ + * + * Do not use "new" by itself outside this module. In order to enforce + * consistency, we insist that you furnish more information about the + * anchor you are creating : use newWithParent or newWithAddress. + */ +static HTParentAnchor0 *HTParentAnchor0_new(const char *address, + unsigned hash) +{ + HTParentAnchor0 *newAnchor = typecalloc(HTParentAnchor0); + + if (newAnchor == NULL) + outofmem(__FILE__, "HTParentAnchor0_new"); + + assert(newAnchor != NULL); + + newAnchor->parent = newAnchor; /* self */ + StrAllocCopy(newAnchor->address, address); + newAnchor->adult_hash = (HASH_TYPE) hash; + + return (newAnchor); +} + +static HTParentAnchor *HTParentAnchor_new(HTParentAnchor0 *parent) +{ + HTParentAnchor *newAnchor = typecalloc(HTParentAnchor); + + if (newAnchor == NULL) + outofmem(__FILE__, "HTParentAnchor_new"); + + assert(newAnchor != NULL); + + newAnchor->parent = parent; /* cross reference */ + parent->info = newAnchor; /* cross reference */ + newAnchor->address = parent->address; /* copy pointer */ + + newAnchor->isISMAPScript = FALSE; /* Lynx appends ?0,0 if TRUE. - FM */ + newAnchor->isHEAD = FALSE; /* HEAD request if TRUE. - FM */ + newAnchor->safe = FALSE; /* Safe. - FM */ + newAnchor->no_cache = FALSE; /* no-cache? - FM */ + newAnchor->inBASE = FALSE; /* duplicated from HTML.c/h */ + newAnchor->content_length = 0; /* Content-Length. - FM */ + return (newAnchor); +} + +static HTChildAnchor *HTChildAnchor_new(HTParentAnchor0 *parent) +{ + HTChildAnchor *p = typecalloc(HTChildAnchor); + + if (p == NULL) + outofmem(__FILE__, "HTChildAnchor_new"); + + assert(p != NULL); + + p->parent = parent; /* parent reference */ + return p; +} + +static HTChildAnchor *HText_pool_ChildAnchor_new(HTParentAnchor *parent) +{ + HTChildAnchor *p = (HTChildAnchor *) HText_pool_calloc((HText *) (parent->document), + (unsigned) sizeof(HTChildAnchor)); + + if (p == NULL) + outofmem(__FILE__, "HText_pool_ChildAnchor_new"); + + assert(p != NULL); + + p->parent = parent->parent; /* parent reference */ + return p; +} + +#ifdef CASE_INSENSITIVE_ANCHORS +/* Case insensitive string comparison */ +#define HT_EQUIV(a,b) (TOUPPER(a) == TOUPPER(b)) +#else +/* Case sensitive string comparison */ +#define HT_EQUIV(a,b) ((a) == (b)) +#endif + +/* Null-terminated string comparison + * --------------------------------- + * On entry, + * s Points to one string, null terminated + * t points to the other. + * On exit, + * returns YES if the strings are equivalent + * NO if they differ. + */ +static BOOL HTSEquivalent(const char *s, + const char *t) +{ + if (s && t) { /* Make sure they point to something */ + for (; *s && *t; s++, t++) { + if (!HT_EQUIV(*s, *t)) { + return (NO); + } + } + return (BOOL) (HT_EQUIV(*s, *t)); + } else { + return (BOOL) (s == t); /* Two NULLs are equivalent, aren't they ? */ + } +} + +/* Binary string comparison + * ------------------------ + * On entry, + * s Points to one bstring + * t points to the other. + * On exit, + * returns YES if the strings are equivalent + * NO if they differ. + */ +static BOOL HTBEquivalent(const bstring *s, + const bstring *t) +{ + if (s && t && BStrLen(s) == BStrLen(t)) { + int j; + int len = BStrLen(s); + + for (j = 0; j < len; ++j) { + if (!HT_EQUIV(BStrData(s)[j], BStrData(t)[j])) { + return (NO); + } + } + return (YES); + } else { + return (BOOL) (s == t); /* Two NULLs are equivalent, aren't they ? */ + } +} + +/* + * Three-way compare function + */ +static int compare_anchors(void *l, + void *r) +{ + const char *a = ((HTChildAnchor *) l)->tag; + const char *b = ((HTChildAnchor *) r)->tag; + + /* both tags are not NULL */ + +#ifdef CASE_INSENSITIVE_ANCHORS + return strcasecomp(a, b); /* Case insensitive */ +#else + return strcmp(a, b); /* Case sensitive - FM */ +#endif /* CASE_INSENSITIVE_ANCHORS */ +} + +/* Create new or find old sub-anchor + * --------------------------------- + * + * This one is for a named child. + * The parent anchor must already exist. + */ +static HTChildAnchor *HTAnchor_findNamedChild(HTParentAnchor0 *parent, + const char *tag) +{ + HTChildAnchor *child; + + if (parent && tag && *tag) { /* TBL */ + if (parent->children) { + /* + * Parent has children. Search them. + */ + HTChildAnchor sample; + + sample.tag = (char *) tag; /* for compare_anchors() only */ + + child = (HTChildAnchor *) HTBTree_search(parent->children, &sample); + if (child != NULL) { + CTRACE((tfp, + "Child anchor %p of parent %p with name `%s' already exists.\n", + (void *) child, (void *) parent, tag)); + return (child); + } + } else { /* parent doesn't have any children yet : create family */ + parent->children = HTBTree_new(compare_anchors); + } + + child = HTChildAnchor_new(parent); + CTRACE((tfp, "HTAnchor: New Anchor %p named `%s' is child of %p\n", + (void *) child, + NonNull(tag), + (void *) child->parent)); + + StrAllocCopy(child->tag, tag); /* should be set before HTBTree_add */ + HTBTree_add(parent->children, child); + return (child); + + } else { + CTRACE((tfp, "HTAnchor_findNamedChild called with NULL parent.\n")); + return (NULL); + } + +} + +/* + * This one is for a new unnamed child being edited into an existing + * document. The parent anchor and the document must already exist. + * (Just add new unnamed child). + */ +static HTChildAnchor *HTAnchor_addChild(HTParentAnchor *parent) +{ + HTChildAnchor *child; + + if (!parent) { + CTRACE((tfp, "HTAnchor_addChild called with NULL parent.\n")); + return (NULL); + } + + child = HText_pool_ChildAnchor_new(parent); + CTRACE((tfp, "HTAnchor: New unnamed Anchor %p is child of %p\n", + (void *) child, + (void *) child->parent)); + + child->tag = 0; + HTList_linkObject(&parent->children_notag, child, &child->_add_children_notag); + + return (child); +} + +static HTParentAnchor0 *HTAnchor_findAddress_in_adult_table(const DocAddress *newdoc); + +static BOOL HTAnchor_link(HTChildAnchor *child, + HTAnchor * destination, + HTLinkType *type); + +/* Create or find a child anchor with a possible link + * -------------------------------------------------- + * + * Create new anchor with a given parent and possibly + * a name, and possibly a link to a _relatively_ named anchor. + * (Code originally in ParseHTML.h) + */ +HTChildAnchor *HTAnchor_findChildAndLink(HTParentAnchor *parent, /* May not be 0 */ + const char *tag, /* May be "" or 0 */ + const char *href, /* May be "" or 0 */ + HTLinkType *ltype) /* May be 0 */ +{ + HTChildAnchor *child; + + CTRACE((tfp, "Entered HTAnchor_findChildAndLink: tag=`%s',%s href=`%s'\n", + NonNull(tag), + (ltype == HTInternalLink) ? " (internal link)" : "", + NonNull(href))); + + if (tag && *tag) { + child = HTAnchor_findNamedChild(parent->parent, tag); + } else { + child = HTAnchor_addChild(parent); + } + + if (href && *href) { + const char *fragment = NULL; + HTParentAnchor0 *dest; + + if (ltype == HTInternalLink && *href == '#') { + dest = parent->parent; + } else { + const char *relative_to = ((parent->inBASE && *href != '#') + ? parent->content_base + : parent->address); + DocAddress parsed_doc; + + parsed_doc.address = HTParse(href, relative_to, + PARSE_ALL_WITHOUT_ANCHOR); + + parsed_doc.post_data = NULL; + parsed_doc.post_content_type = NULL; + if (ltype && parent->post_data && ltype == HTInternalLink) { + /* for internal links, find a destination with the same + post data if the source of the link has post data. - kw + Example: LYNXIMGMAP: */ + parsed_doc.post_data = parent->post_data; + parsed_doc.post_content_type = parent->post_content_type; + } + parsed_doc.bookmark = NULL; + parsed_doc.isHEAD = FALSE; + parsed_doc.safe = FALSE; + + dest = HTAnchor_findAddress_in_adult_table(&parsed_doc); + FREE(parsed_doc.address); + } + + /* + * [from HTAnchor_findAddress()] + * If the address represents a sub-anchor, we load its parent (above), + * then we create a named child anchor within that parent. + */ + fragment = (*href == '#') ? href + 1 : HTParseAnchor(href); + + if (*fragment) + dest = (HTParentAnchor0 *) HTAnchor_findNamedChild(dest, fragment); + + if (tag && *tag) { + if (child->dest) { /* DUPLICATE_ANCHOR_NAME_WORKAROUND - kw */ + CTRACE((tfp, + "*** Duplicate ChildAnchor %p named `%s'", + (void *) child, tag)); + if ((HTAnchor *) dest != child->dest || ltype != child->type) { + CTRACE((tfp, + ", different dest %p or type, creating unnamed child\n", + (void *) child->dest)); + child = HTAnchor_addChild(parent); + } + } + } + HTAnchor_link(child, (HTAnchor *) dest, ltype); + } + return child; +} + +/* Create new or find old parent anchor + * ------------------------------------ + * + * Me one is for a reference which is found in a document, and might + * not be already loaded. + * Note: You are not guaranteed a new anchor -- you might get an old one, + * like with fonts. + */ +HTParentAnchor *HTAnchor_findAddress(const DocAddress *newdoc) +{ + /* Anchor tag specified ? */ + const char *tag = HTParseAnchor(newdoc->address); + + CTRACE((tfp, "Entered HTAnchor_findAddress\n")); + + /* + * If the address represents a sub-anchor, we load its parent, then we + * create a named child anchor within that parent. + */ + if (*tag) { + DocAddress parsed_doc; + HTParentAnchor0 *foundParent; + + parsed_doc.address = HTParse(newdoc->address, "", + PARSE_ALL_WITHOUT_ANCHOR); + parsed_doc.post_data = newdoc->post_data; + parsed_doc.post_content_type = newdoc->post_content_type; + parsed_doc.bookmark = newdoc->bookmark; + parsed_doc.isHEAD = newdoc->isHEAD; + parsed_doc.safe = newdoc->safe; + + foundParent = HTAnchor_findAddress_in_adult_table(&parsed_doc); + (void) HTAnchor_findNamedChild(foundParent, tag); + FREE(parsed_doc.address); + return HTAnchor_parent((HTAnchor *) foundParent); + } + return HTAnchor_parent((HTAnchor *) HTAnchor_findAddress_in_adult_table(newdoc)); +} + +/* The address has no anchor tag, for sure. + */ +static HTParentAnchor0 *HTAnchor_findAddress_in_adult_table(const DocAddress *newdoc) +{ + /* + * Check whether we have this node. + */ + HASH_TYPE hash; + HTList *adults; + HTList *grownups; + HTParentAnchor0 *foundAnchor; + BOOL need_extra_info = (BOOL) (newdoc->post_data || + newdoc->post_content_type || + newdoc->bookmark || + newdoc->isHEAD || + newdoc->safe); + + /* + * We need not free adult_table[] atexit - it should be perfectly empty + * after free'ing all HText's. (There is an error if it is not empty at + * exit). -LP + */ + + /* + * Select list from hash table, + */ + hash = HASH_FUNCTION(newdoc->address); + adults = &(adult_table[hash]); + + /* + * Search list for anchor. + */ + grownups = adults; + while (NULL != (foundAnchor = + (HTParentAnchor0 *) HTList_nextObject(grownups))) { + if (HTSEquivalent(foundAnchor->address, newdoc->address) && + + ((!foundAnchor->info && !need_extra_info) || + (foundAnchor->info && + HTBEquivalent(foundAnchor->info->post_data, newdoc->post_data) && + foundAnchor->info->isHEAD == newdoc->isHEAD))) { + CTRACE((tfp, "Anchor %p with address `%s' already exists.\n", + (void *) foundAnchor, newdoc->address)); + return foundAnchor; + } + } + + /* + * Node not found: create new anchor. + */ + foundAnchor = HTParentAnchor0_new(newdoc->address, hash); + CTRACE((tfp, "New anchor %p has hash %d and address `%s'\n", + (void *) foundAnchor, hash, newdoc->address)); + + if (need_extra_info) { + /* rare case, create a big structure */ + HTParentAnchor *p = HTParentAnchor_new(foundAnchor); + + if (newdoc->post_data) + BStrCopy(p->post_data, newdoc->post_data); + if (newdoc->post_content_type) + StrAllocCopy(p->post_content_type, + newdoc->post_content_type); + if (newdoc->bookmark) + StrAllocCopy(p->bookmark, newdoc->bookmark); + p->isHEAD = newdoc->isHEAD; + p->safe = newdoc->safe; + } + HTList_linkObject(adults, foundAnchor, &foundAnchor->_add_adult); + + return foundAnchor; +} + +/* Create new or find old named anchor - simple form + * ------------------------------------------------- + * + * Like HTAnchor_findAddress, but simpler to use for simple cases. + * No post data etc. can be supplied. - kw + */ +HTParentAnchor *HTAnchor_findSimpleAddress(const char *url) +{ + DocAddress urldoc; + + urldoc.address = (char *) url; /* ignore warning, it IS treated like const - kw */ + urldoc.post_data = NULL; + urldoc.post_content_type = NULL; + urldoc.bookmark = NULL; + urldoc.isHEAD = FALSE; + urldoc.safe = FALSE; + return HTAnchor_findAddress(&urldoc); +} + +/* Link me Anchor to another given one + * ------------------------------------- + */ +static BOOL HTAnchor_link(HTChildAnchor *child, + HTAnchor * destination, + HTLinkType *type) +{ + if (!(child && destination)) + return (NO); /* Can't link to/from non-existing anchor */ + + CTRACE((tfp, "Linking child %p to anchor %p\n", (void *) child, (void *) destination)); + if (child->dest) { + CTRACE((tfp, "*** child anchor already has destination, exiting!\n")); + return (NO); + } + + child->dest = destination; + child->type = type; + + if (child->parent != destination->parent) + /* link only foreign children */ + HTList_linkObject(&destination->parent->sources, child, &child->_add_sources); + + return (YES); /* Success */ +} + +/* Delete an anchor and possibly related things (auto garbage collection) + * -------------------------------------------- + * + * The anchor is only deleted if the corresponding document is not loaded. + * All outgoing links from children are deleted, and children are + * removed from the sources lists of theirs targets. + * We also try to delete the targets whose documents are not loaded. + * If this anchor's sources list is empty, we delete it and its children. + */ + +/* + * Recursively try to delete destination anchor of this child. + * In any event, this will tell destination anchor that we + * no longer consider it a destination. + */ +static void deleteLinks(HTChildAnchor *me) +{ + /* + * Unregister me with our destination anchor's parent. + */ + if (me->dest) { + HTParentAnchor0 *parent = me->dest->parent; + + /* + * Start. Set the dest pointer to zero. + */ + me->dest = NULL; + + /* + * Remove me from the parent's sources so that the parent knows one + * less anchor is its dest. + */ + if ((me->parent != parent) && !HTList_isEmpty(&parent->sources)) { + /* + * Really should only need to deregister once. + */ + HTList_unlinkObject(&parent->sources, (void *) me); + } + + /* + * Recursive call. Test here to avoid calling overhead. Don't delete + * if document is loaded or being loaded. + */ + if ((me->parent != parent) && + parent != NULL && + !parent->underway && + (!parent->info || !parent->info->document)) { + HTAnchor_delete(parent); + } + + /* + * At this point, we haven't a destination. Set it to be so. Leave + * the HTAtom pointed to by type up to other code to handle (reusable, + * near static). + */ + me->type = NULL; + } +} + +static void HTParentAnchor_free(HTParentAnchor *me); + +BOOL HTAnchor_delete(HTParentAnchor0 *me) +{ + /* + * Memory leaks fixed. + * 05-27-94 Lynx 2-3-1 Garrett Arch Blythe + */ + HTBTElement *ele; + HTChildAnchor *child; + + /* + * Do nothing if nothing to do. + */ + if (!me) { + return (NO); + } + + /* + * Don't delete if document is loaded or being loaded. + */ + if (me->underway || (me->info && me->info->document)) { + return (NO); + } + + /* + * Mark ourselves busy, so that recursive calls of this function on this + * HTParentAnchor0 will not free it from under our feet. - kw + */ + me->underway = TRUE; + + { + /* + * Delete all outgoing links from named children. Do not delete named + * children itself (may have incoming links). + */ + if (me->children) { + ele = HTBTree_next(me->children, NULL); + while (ele != NULL) { + child = (HTChildAnchor *) HTBTree_object(ele); + if (child->dest) + deleteLinks(child); + ele = HTBTree_next(me->children, ele); + } + } + } + me->underway = FALSE; + + /* + * There are still incoming links to this one (we are the + * destination of another anchor). + */ + if (!HTList_isEmpty(&me->sources)) { + /* + * Can't delete parent, still have sources. + */ + return (NO); + } + + /* + * No more incoming and outgoing links : kill everything First, delete + * named children. + */ + if (me->children) { + ele = HTBTree_next(me->children, NULL); + while (ele != NULL) { + child = (HTChildAnchor *) HTBTree_object(ele); + FREE(child->tag); + FREE(child); + ele = HTBTree_next(me->children, ele); + } + HTBTree_free(me->children); + } + + /* + * Delete the ParentAnchor, if any. (Document was already deleted). + */ + if (me->info) { + HTParentAnchor_free(me->info); + FREE(me->info); + } + + /* + * Remove ourselves from the hash table's list. + */ + HTList_unlinkObject(&(adult_table[me->adult_hash]), (void *) me); + + /* + * Free the address. + */ + FREE(me->address); + + /* + * Finally, kill the parent anchor passed in. + */ + FREE(me); + + return (YES); +} + +/* + * Unnamed children (children_notag) have no sence without HText - delete them + * and their links if we are about to free HText. Document currently exists. + * Called within HText_free(). + */ +void HTAnchor_delete_links(HTParentAnchor *me) +{ + HTList *cur; + HTChildAnchor *child; + + /* + * Do nothing if nothing to do. + */ + if (!me || !me->document) { + return; + } + + /* + * Mark ourselves busy, so that recursive calls on this HTParentAnchor0 + * will not free it from under our feet. - kw + */ + me->parent->underway = TRUE; + + /* + * Delete all outgoing links from unnamed children. + */ + if (!HTList_isEmpty(&me->children_notag)) { + cur = &me->children_notag; + while ((child = + (HTChildAnchor *) HTList_unlinkLastObject(cur)) != 0) { + deleteLinks(child); + /* child allocated in HText pool, HText_free() will free it later */ + } + } + me->parent->underway = FALSE; +} + +static void HTParentAnchor_free(HTParentAnchor *me) +{ + /* + * Delete the methods list. + */ + if (me->methods) { + /* + * Leave what the methods point to up in memory for other code (near + * static stuff). + */ + HTList_delete(me->methods); + me->methods = NULL; + } + + /* + * Free up all allocated members. + */ + FREE(me->charset); + FREE(me->isIndexAction); + FREE(me->isIndexPrompt); + FREE(me->title); + FREE(me->physical); + BStrFree(me->post_data); + FREE(me->post_content_type); + FREE(me->bookmark); + FREE(me->owner); + FREE(me->RevTitle); + FREE(me->citehost); +#ifdef USE_SOURCE_CACHE + HTAnchor_clearSourceCache(me); +#endif + if (me->FileCache) { + FILE *fd; + + if ((fd = fopen(me->FileCache, "r")) != NULL) { + fclose(fd); + remove(me->FileCache); + } + FREE(me->FileCache); + } + FREE(me->SugFname); + FREE(me->cache_control); +#ifdef EXP_HTTP_HEADERS + HTChunkClear(&(me->http_headers)); +#endif + FREE(me->content_type_params); + FREE(me->content_type); + FREE(me->content_language); + FREE(me->content_encoding); + FREE(me->content_base); + FREE(me->content_disposition); + FREE(me->content_location); + FREE(me->content_md5); + FREE(me->message_id); + FREE(me->subject); + FREE(me->date); + FREE(me->expires); + + FREE(me->last_modified); + FREE(me->ETag); + FREE(me->server); +#ifdef USE_COLOR_STYLE + FREE(me->style); +#endif + + /* + * Original code wanted a way to clean out the HTFormat if no longer needed + * (ref count?). I'll leave it alone since those HTAtom objects are a + * little harder to know where they are being referenced all at one time. + * (near static) + */ + + FREE(me->UCStages); + ImageMapList_free(me->imaps); +} + +#ifdef USE_SOURCE_CACHE +void HTAnchor_clearSourceCache(HTParentAnchor *me) +{ + /* + * Clean up the source cache, if any. + */ + if (me->source_cache_file) { + CTRACE((tfp, "SourceCache: Removing file %s\n", + me->source_cache_file)); + LYRemoveTemp(me->source_cache_file); + FREE(me->source_cache_file); + } + if (me->source_cache_chunk) { + CTRACE((tfp, "SourceCache: Removing memory chunk %p\n", + (void *) me->source_cache_chunk)); + HTChunkFree(me->source_cache_chunk); + me->source_cache_chunk = NULL; + } +} +#endif /* USE_SOURCE_CACHE */ + +/* Data access functions + * --------------------- + */ +HTParentAnchor *HTAnchor_parent(HTAnchor * me) +{ + if (!me) + return NULL; + + if (me->parent->info) + return me->parent->info; + + /* else: create a new structure */ + return HTParentAnchor_new(me->parent); +} + +void HTAnchor_setDocument(HTParentAnchor *me, + HyperDoc *doc) +{ + if (me) + me->document = doc; +} + +HyperDoc *HTAnchor_document(HTParentAnchor *me) +{ + return (me ? me->document : NULL); +} + +char *HTAnchor_address(HTAnchor * me) +{ + char *addr = NULL; + + if (me) { + if (((HTParentAnchor0 *) me == me->parent) || + ((HTParentAnchor *) me == me->parent->info) || + !((HTChildAnchor *) me)->tag) { /* it's an adult or no tag */ + StrAllocCopy(addr, me->parent->address); + } else { /* it's a named child */ + HTSprintf0(&addr, "%s#%s", + me->parent->address, ((HTChildAnchor *) me)->tag); + } + } + return (addr); +} + +void HTAnchor_setFormat(HTParentAnchor *me, + HTFormat form) +{ + if (me) + me->format = form; +} + +HTFormat HTAnchor_format(HTParentAnchor *me) +{ + return (me ? me->format : NULL); +} + +void HTAnchor_setIndex(HTParentAnchor *me, + const char *address) +{ + if (me) { + me->isIndex = YES; + StrAllocCopy(me->isIndexAction, address); + } +} + +void HTAnchor_setPrompt(HTParentAnchor *me, + const char *prompt) +{ + if (me) { + StrAllocCopy(me->isIndexPrompt, prompt); + } +} + +BOOL HTAnchor_isIndex(HTParentAnchor *me) +{ + return (BOOL) (me + ? me->isIndex + : NO); +} + +/* Whether Anchor has been designated as an ISMAP link + * (normally by presence of an ISMAP attribute on A or IMG) - KW + */ +BOOL HTAnchor_isISMAPScript(HTAnchor * me) +{ + return (BOOL) ((me && me->parent->info) + ? me->parent->info->isISMAPScript + : NO); +} + +#if defined(USE_COLOR_STYLE) +/* Style handling. +*/ +const char *HTAnchor_style(HTParentAnchor *me) +{ + return (me ? me->style : NULL); +} + +void HTAnchor_setStyle(HTParentAnchor *me, + const char *style) +{ + if (me) { + StrAllocCopy(me->style, style); + } +} +#endif + +/* Title handling. +*/ +const char *HTAnchor_title(HTParentAnchor *me) +{ + return (me ? me->title : NULL); +} + +void HTAnchor_setTitle(HTParentAnchor *me, + const char *title) +{ + int i; + + if (me) { + if (title) { + StrAllocCopy(me->title, title); + for (i = 0; me->title[i]; i++) { + if (UCH(me->title[i]) == 1 || + UCH(me->title[i]) == 2) { + me->title[i] = ' '; + } + } + } else { + CTRACE((tfp, "HTAnchor_setTitle: New title is NULL! ")); + if (me->title) { + CTRACE((tfp, "Old title was \"%s\".\n", me->title)); + FREE(me->title); + } else { + CTRACE((tfp, "Old title was NULL.\n")); + } + } + } +} + +void HTAnchor_appendTitle(HTParentAnchor *me, + const char *title) +{ + int i; + + if (me) { + StrAllocCat(me->title, title); + for (i = 0; me->title[i]; i++) { + if (UCH(me->title[i]) == 1 || + UCH(me->title[i]) == 2) { + me->title[i] = ' '; + } + } + } +} + +/* Bookmark handling. +*/ +const char *HTAnchor_bookmark(HTParentAnchor *me) +{ + return (me ? me->bookmark : NULL); +} + +void HTAnchor_setBookmark(HTParentAnchor *me, + const char *bookmark) +{ + if (me) + StrAllocCopy(me->bookmark, bookmark); +} + +/* Owner handling. +*/ +const char *HTAnchor_owner(HTParentAnchor *me) +{ + return (me ? me->owner : NULL); +} + +void HTAnchor_setOwner(HTParentAnchor *me, + const char *owner) +{ + if (me) { + StrAllocCopy(me->owner, owner); + } +} + +/* TITLE handling in LINKs with REV="made" or REV="owner". - FM +*/ +const char *HTAnchor_RevTitle(HTParentAnchor *me) +{ + return (me ? me->RevTitle : NULL); +} + +void HTAnchor_setRevTitle(HTParentAnchor *me, + const char *title) +{ + int i; + + if (me) { + StrAllocCopy(me->RevTitle, title); + for (i = 0; me->RevTitle[i]; i++) { + if (UCH(me->RevTitle[i]) == 1 || + UCH(me->RevTitle[i]) == 2) { + me->RevTitle[i] = ' '; + } + } + } +} + +#ifndef DISABLE_BIBP +/* Citehost for bibp links from LINKs with REL="citehost". - RDC +*/ +const char *HTAnchor_citehost(HTParentAnchor *me) +{ + return (me ? me->citehost : NULL); +} + +void HTAnchor_setCitehost(HTParentAnchor *me, + const char *citehost) +{ + if (me) { + StrAllocCopy(me->citehost, citehost); + } +} +#endif /* !DISABLE_BIBP */ + +/* Suggested filename handling. - FM + * (will be loaded if we had a Content-Disposition + * header or META element with filename=name.suffix) + */ +const char *HTAnchor_SugFname(HTParentAnchor *me) +{ + return (me ? me->SugFname : NULL); +} + +#ifdef EXP_HTTP_HEADERS +/* HTTP Headers. +*/ +const char *HTAnchor_http_headers(HTParentAnchor *me) +{ + return (me ? me->http_headers.data : NULL); +} +#endif + +/* Content-Type handling (parameter list). +*/ +const char *HTAnchor_content_type_params(HTParentAnchor *me) +{ + return (me ? me->content_type_params : NULL); +} + +/* Content-Encoding handling. - FM + * (will be loaded if we had a Content-Encoding + * header.) + */ +const char *HTAnchor_content_encoding(HTParentAnchor *me) +{ + return (me ? me->content_encoding : NULL); +} + +/* Content-Type handling. - FM +*/ +const char *HTAnchor_content_type(HTParentAnchor *me) +{ + return (me ? me->content_type : NULL); +} + +/* Last-Modified header handling. - FM +*/ +const char *HTAnchor_last_modified(HTParentAnchor *me) +{ + return (me ? me->last_modified : NULL); +} + +/* Date header handling. - FM +*/ +const char *HTAnchor_date(HTParentAnchor *me) +{ + return (me ? me->date : NULL); +} + +/* Server header handling. - FM +*/ +const char *HTAnchor_server(HTParentAnchor *me) +{ + return (me ? me->server : NULL); +} + +/* Safe header handling. - FM +*/ +BOOL HTAnchor_safe(HTParentAnchor *me) +{ + return (BOOL) (me ? me->safe : FALSE); +} + +/* Content-Base header handling. - FM +*/ +const char *HTAnchor_content_base(HTParentAnchor *me) +{ + return (me ? me->content_base : NULL); +} + +/* Content-Location header handling. - FM +*/ +const char *HTAnchor_content_location(HTParentAnchor *me) +{ + return (me ? me->content_location : NULL); +} + +/* Message-ID, used for mail replies - kw +*/ +const char *HTAnchor_messageID(HTParentAnchor *me) +{ + return (me ? me->message_id : NULL); +} + +BOOL HTAnchor_setMessageID(HTParentAnchor *me, + const char *messageid) +{ + if (!(me && messageid && *messageid)) { + return FALSE; + } + StrAllocCopy(me->message_id, messageid); + return TRUE; +} + +/* Subject, used for mail replies - kw +*/ +const char *HTAnchor_subject(HTParentAnchor *me) +{ + return (me ? me->subject : NULL); +} + +BOOL HTAnchor_setSubject(HTParentAnchor *me, + const char *subject) +{ + if (!(me && subject && *subject)) { + return FALSE; + } + StrAllocCopy(me->subject, subject); + return TRUE; +} + +/* Manipulation of links + * --------------------- + */ +HTAnchor *HTAnchor_followLink(HTChildAnchor *me) +{ + return (me->dest); +} + +HTAnchor *HTAnchor_followTypedLink(HTChildAnchor *me, + HTLinkType *type) +{ + if (me->type == type) + return (me->dest); + return (NULL); /* No link of me type */ +} + +/* Methods List + * ------------ + */ +HTList *HTAnchor_methods(HTParentAnchor *me) +{ + if (!me->methods) { + me->methods = HTList_new(); + } + return (me->methods); +} + +/* Protocol + * -------- + */ +void *HTAnchor_protocol(HTParentAnchor *me) +{ + return (me->protocol); +} + +void HTAnchor_setProtocol(HTParentAnchor *me, + void *protocol) +{ + me->protocol = protocol; +} + +/* Physical Address + * ---------------- + */ +char *HTAnchor_physical(HTParentAnchor *me) +{ + return (me->physical); +} + +void HTAnchor_setPhysical(HTParentAnchor *me, + char *physical) +{ + if (me) { + StrAllocCopy(me->physical, physical); + } +} + +#ifdef DEBUG +static void show_stages(HTParentAnchor *me, const char *tag, int which_stage) +{ + int j; + + CTRACE((tfp, "Stages %s*%s", NonNull(me->charset), tag)); + for (j = 0; j < UCT_STAGEMAX; j++) { + CTRACE((tfp, " ")); + if (j == which_stage) + CTRACE((tfp, "(")); + CTRACE((tfp, "%d:%d:%s", + j, + me->UCStages->s[j].LYhndl, + NonNull(me->UCStages->s[j].C.MIMEname))); + if (j == which_stage) + CTRACE((tfp, ")")); + } + CTRACE((tfp, "\n")); +} +#else +#define show_stages(me,tag,which_stage) /* nothing */ +#endif + +/* + * We store charset info in the HTParentAnchor object, for several + * "stages". (See UCDefs.h) + * A stream method is supposed to know what stage in the model it is. + * + * General model MIME -> parser -> structured -> HText + * e.g., text/html + * from HTTP: HTMIME.c -> SGML.c -> HTML.c -> GridText.c + * text/plain + * from file: HTFile.c -> HTPlain.c -> GridText.c + * + * The lock/set_by is used to lock e.g. a charset set by an explicit + * HTTP MIME header against overriding by a HTML META tag - the MIME + * header has higher priority. Defaults (from -assume_.. options etc.) + * will not override charset explicitly given by server. + * + * Some advantages of keeping this in the HTAnchor: + * - Global variables are bad. + * - Can remember a charset given by META tag when toggling to SOURCE view. + * - Can remember a charset given by <A CHARSET=...> href in another doc. + * + * We don't modify the HTParentAnchor's charset element + * here, that one will only be set when explicitly given. + */ +LYUCcharset *HTAnchor_getUCInfoStage(HTParentAnchor *me, + int which_stage) +{ + LYUCcharset *result = NULL; + + if (me) { + if (!me->UCStages) { + int i; + int chndl = UCLYhndl_for_unspec; /* always >= 0 */ + UCAnchorInfo *stages = typecalloc(UCAnchorInfo); + + if (stages == NULL) + outofmem(__FILE__, "HTAnchor_getUCInfoStage"); + + assert(stages != NULL); + + for (i = 0; i < UCT_STAGEMAX; i++) { + stages->s[i].C.MIMEname = ""; + stages->s[i].LYhndl = -1; + } + if (me->charset) { + chndl = UCGetLYhndl_byMIME(me->charset); + if (chndl < 0) + chndl = UCLYhndl_for_unrec; + if (chndl < 0) + /* + * UCLYhndl_for_unrec not defined :-( + * fallback to UCLYhndl_for_unspec which always valid. + */ + chndl = UCLYhndl_for_unspec; /* always >= 0 */ + } + MemCpy(&stages->s[UCT_STAGE_MIME].C, &LYCharSet_UC[chndl], + sizeof(LYUCcharset)); + + stages->s[UCT_STAGE_MIME].lock = UCT_SETBY_DEFAULT; + stages->s[UCT_STAGE_MIME].LYhndl = chndl; + me->UCStages = stages; + } + result = (&me->UCStages->s[which_stage].C); + show_stages(me, "_getUCInfoStage", which_stage); + } + return (result); +} + +int HTAnchor_getUCLYhndl(HTParentAnchor *me, + int which_stage) +{ + if (me) { + if (!me->UCStages) { + /* + * This will allocate and initialize, if not yet done. + */ + (void) HTAnchor_getUCInfoStage(me, which_stage); + } + if (me->UCStages->s[which_stage].lock > UCT_SETBY_NONE) { + return (me->UCStages->s[which_stage].LYhndl); + } + } + return (-1); +} + +#ifdef CAN_SWITCH_DISPLAY_CHARSET +static void setup_switch_display_charset(HTParentAnchor *me, int h) +{ + if (!Switch_Display_Charset(h, SWITCH_DISPLAY_CHARSET_MAYBE)) + return; + HTAnchor_setUCInfoStage(me, current_char_set, + UCT_STAGE_HTEXT, UCT_SETBY_MIME); /* highest priorty! */ + HTAnchor_setUCInfoStage(me, current_char_set, + UCT_STAGE_STRUCTURED, UCT_SETBY_MIME); /* highest priorty! */ + CTRACE((tfp, + "changing UCInfoStage: HTEXT/STRUCTURED stages charset='%s'.\n", + LYCharSet_UC[current_char_set].MIMEname)); +} +#endif + +LYUCcharset *HTAnchor_setUCInfoStage(HTParentAnchor *me, + int LYhndl, + int which_stage, + int set_by) +{ + if (me) { + /* + * This will allocate and initialize, if not yet done. + */ + LYUCcharset *p = HTAnchor_getUCInfoStage(me, which_stage); + + /* + * Can we override? + */ + if (set_by >= me->UCStages->s[which_stage].lock) { +#ifdef CAN_SWITCH_DISPLAY_CHARSET + int ohandle = me->UCStages->s[which_stage].LYhndl; +#endif + me->UCStages->s[which_stage].lock = set_by; + me->UCStages->s[which_stage].LYhndl = LYhndl; + if (LYhndl >= 0) { + MemCpy(p, &LYCharSet_UC[LYhndl], sizeof(LYUCcharset)); + +#ifdef CAN_SWITCH_DISPLAY_CHARSET + /* Allow a switch to a more suitable display charset */ + if (LYhndl != ohandle && which_stage == UCT_STAGE_PARSER) + setup_switch_display_charset(me, LYhndl); +#endif + } else { + p->UChndl = -1; + } + show_stages(me, "_setUCInfoStage", which_stage); + return (p); + } + } + return (NULL); +} + +LYUCcharset *HTAnchor_resetUCInfoStage(HTParentAnchor *me, + int LYhndl, + int which_stage, + int set_by) +{ + LYUCcharset *result = NULL; + int ohandle; + + if (me && me->UCStages) { + me->UCStages->s[which_stage].lock = set_by; + ohandle = me->UCStages->s[which_stage].LYhndl; + me->UCStages->s[which_stage].LYhndl = LYhndl; +#ifdef CAN_SWITCH_DISPLAY_CHARSET + /* Allow a switch to a more suitable display charset */ + if (LYhndl >= 0 && LYhndl != ohandle + && which_stage == UCT_STAGE_PARSER) + setup_switch_display_charset(me, LYhndl); +#else + (void) ohandle; +#endif + show_stages(me, "_resetUCInfoStage", which_stage); + result = (&me->UCStages->s[which_stage].C); + } + return result; +} + +/* + * A set_by of (-1) means use the lock value from the from_stage. + */ +LYUCcharset *HTAnchor_copyUCInfoStage(HTParentAnchor *me, + int to_stage, + int from_stage, + int set_by) +{ + if (me) { + /* + * This will allocate and initialize, if not yet done. + */ + LYUCcharset *p_from = HTAnchor_getUCInfoStage(me, from_stage); + LYUCcharset *p_to = HTAnchor_getUCInfoStage(me, to_stage); + + /* + * Can we override? + */ + if (set_by == -1) + set_by = me->UCStages->s[from_stage].lock; + if (set_by == UCT_SETBY_NONE) + set_by = UCT_SETBY_DEFAULT; + if (set_by >= me->UCStages->s[to_stage].lock) { +#ifdef CAN_SWITCH_DISPLAY_CHARSET + int ohandle = me->UCStages->s[to_stage].LYhndl; +#endif + me->UCStages->s[to_stage].lock = set_by; + me->UCStages->s[to_stage].LYhndl = + me->UCStages->s[from_stage].LYhndl; +#ifdef CAN_SWITCH_DISPLAY_CHARSET + /* Allow a switch to a more suitable display charset */ + if (me->UCStages->s[to_stage].LYhndl >= 0 + && me->UCStages->s[to_stage].LYhndl != ohandle + && to_stage == UCT_STAGE_PARSER) + setup_switch_display_charset(me, + me->UCStages->s[to_stage].LYhndl); +#endif + if (p_to != p_from) + MemCpy(p_to, p_from, sizeof(LYUCcharset)); + + return (p_to); + } + } + return (NULL); +} diff --git a/WWW/Library/Implementation/HTAnchor.h b/WWW/Library/Implementation/HTAnchor.h new file mode 100644 index 00000000..43b64b90 --- /dev/null +++ b/WWW/Library/Implementation/HTAnchor.h @@ -0,0 +1,408 @@ +/* + * $LynxId: HTAnchor.h,v 1.34 2010/06/17 00:13:37 tom Exp $ + * + * Hypertext "Anchor" Object HTAnchor.h + * ========================== + * + * An anchor represents a region of a hypertext document which is linked + * to another anchor in the same or a different document. + */ + +#ifndef HTANCHOR_H +#define HTANCHOR_H + +/* Version 0 (TBL) written in Objective-C for the NeXT browser */ +/* Version 1 of 24-Oct-1991 (JFG), written in C, browser-independent */ + +#include <HTList.h> +#include <HTBTree.h> +#include <HTChunk.h> +#include <HTAtom.h> +#include <UCDefs.h> + +typedef struct _HyperDoc HyperDoc; /* Ready for forward references */ +typedef struct _HTAnchor HTAnchor; +typedef struct _HTParentAnchor HTParentAnchor; +typedef struct _HTParentAnchor0 HTParentAnchor0; + +#include <HTFormat.h> + +#ifdef __cplusplus +extern "C" { +#endif + struct _HTAnchor { + /* Generic anchor */ + HTParentAnchor0 *parent; /* Parent of this anchor (self for adults) */ + }; + +#define HASH_TYPE unsigned short + + struct _HTParentAnchor0 { /* One for adult_table, + * generally not used outside HTAnchor.c */ + /* Common part from the generic anchor structure */ + HTParentAnchor0 *parent; /* (self) */ + + /* ParentAnchor0-specific information */ + char *address; /* Absolute address of this node */ + HTParentAnchor *info; /* additional info, allocated on demand */ + + HTBTree *children; /* Subanchors <a name="tag">, sorted by tag */ + HTList sources; /* List of anchors pointing to this, if any */ + + HTList _add_adult; /* - just a memory for list entry:) */ + HASH_TYPE adult_hash; /* adult list number */ + BOOL underway; /* Document about to be attached to it */ + }; + + /* + * Separated from the above to save memory: allocated on demand, + * it is nearly 1:1 to HText (well, sometimes without HText...), + * available for SGML, HTML, and HText stages. + * [being precise, we currently allocate it before HTLoadDocument(), + * in HTAnchor_findAddress() and HTAnchor_parent()]. + */ + struct _HTParentAnchor { + /* Common part from the generic anchor structure */ + HTParentAnchor0 *parent; /* Parent of this anchor */ + + /* ParentAnchor-specific information */ + HTList children_notag; /* Subanchors <a href=...>, tag is NULL */ + HyperDoc *document; /* The document within which this is an anchor */ + + char *address; /* parent->address, a pointer */ + bstring *post_data; /* Posting data */ + char *post_content_type; /* Type of post data */ + char *bookmark; /* Bookmark filename */ + HTFormat format; /* Pointer to node format descriptor */ + char *charset; /* Pointer to character set (kludge, for now */ + BOOL isIndex; /* Acceptance of a keyword search */ + char *isIndexAction; /* URL of isIndex server */ + char *isIndexPrompt; /* Prompt for isIndex query */ + char *title; /* Title of document */ + char *owner; /* Owner of document */ + char *RevTitle; /* TITLE in REV="made" or REV="owner" LINK */ + char *citehost; /* Citehost from REL="citehost" LINK */ +#ifdef USE_COLOR_STYLE + char *style; +#endif + + HTList *methods; /* Methods available as HTAtoms */ + void *protocol; /* Protocol object */ + char *physical; /* Physical address */ + BOOL isISMAPScript; /* Script for clickable image map */ + BOOL isHEAD; /* Document is headers from a HEAD request */ + BOOL safe; /* Safe */ +#ifdef USE_SOURCE_CACHE + char *source_cache_file; + HTChunk *source_cache_chunk; +#endif + char *FileCache; /* Path to a disk-cached copy (see src/HTFWriter.c) */ + char *SugFname; /* Suggested filename */ + char *cache_control; /* Cache-Control */ + BOOL no_cache; /* Cache-Control, Pragma or META "no-cache"? */ + BOOL inBASE; /* duplicated from HTStructured (HTML.c/h) */ +#ifdef EXP_HTTP_HEADERS + HTChunk http_headers; +#endif + char *content_type_params; /* Content-Type (with parameters if any) */ + char *content_type; /* Content-Type */ + char *content_language; /* Content-Language */ + char *content_encoding; /* Compression algorithm */ + char *content_base; /* Content-Base */ + char *content_disposition; /* Content-Disposition */ + char *content_location; /* Content-Location */ + char *content_md5; /* Content-MD5 */ + char *message_id; /* Message-ID */ + char *subject; /* Subject */ + long content_length; /* Content-Length */ + char *date; /* Date */ + char *expires; /* Expires */ + char *last_modified; /* Last-Modified */ + char *ETag; /* ETag (HTTP1.1 cache validator) */ + char *server; /* Server */ + UCAnchorInfo *UCStages; /* chartrans stages */ + HTList *imaps; /* client side image maps */ + }; + + typedef HTAtom HTLinkType; + + typedef struct { + /* Common part from the generic anchor structure */ + HTParentAnchor0 *parent; /* Parent of this anchor */ + + /* ChildAnchor-specific information */ + char *tag; /* #fragment, relative to the parent */ + + HTAnchor *dest; /* The anchor to which this leads */ + HTLinkType *type; /* Semantics of this link */ + + HTList _add_children_notag; /* - just a memory for list entry:) */ + HTList _add_sources; /* - just a memory for list entry:) */ + } HTChildAnchor; + + /* + * DocAddress structure is used for loading an absolute anchor with all + * needed information including posting data and post content type. + */ + typedef struct _DocAddress { + char *address; + bstring *post_data; + char *post_content_type; + char *bookmark; + BOOL isHEAD; + BOOL safe; + } DocAddress; + + /* "internal" means "within the same document, with certainty". */ + extern HTLinkType *HTInternalLink; + + /* Create or find a child anchor with a possible link + * -------------------------------------------------- + * + * Create new anchor with a given parent and possibly + * a name, and possibly a link to a _relatively_ named anchor. + * (Code originally in ParseHTML.h) + */ + extern HTChildAnchor *HTAnchor_findChildAndLink(HTParentAnchor *parent, /* May not be 0 */ + const char *tag, /* May be "" or 0 */ + const char *href, /* May be "" or 0 */ + HTLinkType *ltype); /* May be 0 */ + + /* Create new or find old parent anchor + * ------------------------------------ + * + * This one is for a reference which is found in a document, and might + * not be already loaded. + * Note: You are not guaranteed a new anchor -- you might get an old one, + * like with fonts. + */ + extern HTParentAnchor *HTAnchor_findAddress(const DocAddress *address); + + /* Create new or find old named anchor - simple form + * ------------------------------------------------- + * + * Like the previous one, but simpler to use for simple cases. + * No post data etc. can be supplied. - kw + */ + extern HTParentAnchor *HTAnchor_findSimpleAddress(const char *url); + + /* Delete an anchor and possibly related things (auto garbage collection) + * -------------------------------------------- + * + * The anchor is only deleted if the corresponding document is not loaded. + * All outgoing links from children are deleted, and children are + * removed from the sources lists of their targets. + * We also try to delete the targets whose documents are not loaded. + * If this anchor's sources list is empty, we delete it and its children. + */ + extern BOOL HTAnchor_delete(HTParentAnchor0 *me); + + /* + * Unnamed children (children_notag) have no sense without HText - + * delete them and their links if we are about to free HText. + * Document currently exists. Called within HText_free(). + */ + extern void HTAnchor_delete_links(HTParentAnchor *me); + +#ifdef USE_SOURCE_CACHE + extern void HTAnchor_clearSourceCache(HTParentAnchor *me); +#endif + + /* Data access functions + * --------------------- + */ + extern HTParentAnchor *HTAnchor_parent(HTAnchor * me); + + extern void HTAnchor_setDocument(HTParentAnchor *me, + HyperDoc *doc); + + extern HyperDoc *HTAnchor_document(HTParentAnchor *me); + + /* Returns the full URI of the anchor, child or parent + * as a malloc'd string to be freed by the caller. + */ + extern char *HTAnchor_address(HTAnchor * me); + + extern void HTAnchor_setFormat(HTParentAnchor *me, + HTFormat form); + + extern HTFormat HTAnchor_format(HTParentAnchor *me); + + extern void HTAnchor_setIndex(HTParentAnchor *me, + const char *address); + + extern void HTAnchor_setPrompt(HTParentAnchor *me, + const char *prompt); + + extern BOOL HTAnchor_isIndex(HTParentAnchor *me); + + extern BOOL HTAnchor_isISMAPScript(HTAnchor * me); + +#if defined(USE_COLOR_STYLE) + extern const char *HTAnchor_style(HTParentAnchor *me); + + extern void HTAnchor_setStyle(HTParentAnchor *me, + const char *style); +#endif + + /* Title handling. + */ + extern const char *HTAnchor_title(HTParentAnchor *me); + + extern void HTAnchor_setTitle(HTParentAnchor *me, + const char *title); + + extern void HTAnchor_appendTitle(HTParentAnchor *me, + const char *title); + + /* Bookmark handling. + */ + extern const char *HTAnchor_bookmark(HTParentAnchor *me); + + extern void HTAnchor_setBookmark(HTParentAnchor *me, + const char *bookmark); + + /* Owner handling. + */ + extern const char *HTAnchor_owner(HTParentAnchor *me); + + extern void HTAnchor_setOwner(HTParentAnchor *me, + const char *owner); + + /* TITLE handling in LINKs with REV="made" or REV="owner". - FM + */ + extern const char *HTAnchor_RevTitle(HTParentAnchor *me); + + extern void HTAnchor_setRevTitle(HTParentAnchor *me, + const char *title); + + /* Citehost for bibp links from LINKs with REL="citehost". - RDC + */ + extern const char *HTAnchor_citehost(HTParentAnchor *me); + + extern void HTAnchor_setCitehost(HTParentAnchor *me, + const char *citehost); + + /* Suggested filename handling. - FM + * (will be loaded if we had a Content-Disposition + * header or META element with filename=name.suffix) + */ + extern const char *HTAnchor_SugFname(HTParentAnchor *me); + + /* HTTP Headers. + */ + extern const char *HTAnchor_http_headers(HTParentAnchor *me); + + /* Content-Type handling (parameter list). + */ + extern const char *HTAnchor_content_type_params(HTParentAnchor *me); + + /* Content-Type handling. - FM + */ + extern const char *HTAnchor_content_type(HTParentAnchor *me); + + /* Content-Encoding handling. - FM + * (will be loaded if we had a Content-Encoding + * header.) + */ + extern const char *HTAnchor_content_encoding(HTParentAnchor *me); + + /* Last-Modified header handling. - FM + */ + extern const char *HTAnchor_last_modified(HTParentAnchor *me); + + /* Date header handling. - FM + */ + extern const char *HTAnchor_date(HTParentAnchor *me); + + /* Server header handling. - FM + */ + extern const char *HTAnchor_server(HTParentAnchor *me); + + /* Safe header handling. - FM + */ + extern BOOL HTAnchor_safe(HTParentAnchor *me); + + /* Content-Base header handling. - FM + */ + extern const char *HTAnchor_content_base(HTParentAnchor *me); + + /* Content-Location header handling. - FM + */ + extern const char *HTAnchor_content_location(HTParentAnchor *me); + + /* Message-ID, used for mail replies - kw + */ + extern const char *HTAnchor_messageID(HTParentAnchor *me); + + extern BOOL HTAnchor_setMessageID(HTParentAnchor *me, + const char *messageid); + + /* Subject, used for mail replies - kw + */ + extern const char *HTAnchor_subject(HTParentAnchor *me); + + extern BOOL HTAnchor_setSubject(HTParentAnchor *me, + const char *subject); + + /* Manipulation of links + * --------------------- + */ + extern HTAnchor *HTAnchor_followLink(HTChildAnchor *me); + + extern HTAnchor *HTAnchor_followTypedLink(HTChildAnchor *me, + HTLinkType *type); + + /* Read and write methods + * ---------------------- + */ + extern HTList *HTAnchor_methods(HTParentAnchor *me); + + /* Protocol + * -------- + */ + extern void *HTAnchor_protocol(HTParentAnchor *me); + + extern void HTAnchor_setProtocol(HTParentAnchor *me, + void *protocol); + + /* Physical address + * ---------------- + */ + extern char *HTAnchor_physical(HTParentAnchor *me); + + extern void HTAnchor_setPhysical(HTParentAnchor *me, + char *protocol); + + extern LYUCcharset *HTAnchor_getUCInfoStage(HTParentAnchor *me, + int which_stage); + + extern int HTAnchor_getUCLYhndl(HTParentAnchor *me, + int which_stage); + + extern LYUCcharset *HTAnchor_setUCInfoStage(HTParentAnchor *me, + int LYhndl, + int which_stage, + int set_by); + + extern LYUCcharset *HTAnchor_setUCInfoStage(HTParentAnchor *me, + int LYhndl, + int which_stage, + int set_by); + + extern LYUCcharset *HTAnchor_resetUCInfoStage(HTParentAnchor *me, + int LYhndl, + int which_stage, + int set_by); + + extern LYUCcharset *HTAnchor_copyUCInfoStage(HTParentAnchor *me, + int to_stage, + int from_stage, + int set_by); + + extern void ImageMapList_free(HTList *list); + +#ifdef __cplusplus +} +#endif +#endif /* HTANCHOR_H */ diff --git a/WWW/Library/Implementation/HTAssoc.c b/WWW/Library/Implementation/HTAssoc.c new file mode 100644 index 00000000..22c1126c --- /dev/null +++ b/WWW/Library/Implementation/HTAssoc.c @@ -0,0 +1,84 @@ +/* + * $LynxId: HTAssoc.c,v 1.10 2010/04/29 09:34:03 tom Exp $ + * + * MODULE HTAssoc.c + * ASSOCIATION LIST FOR STORING NAME-VALUE PAIRS. + * NAMES NOT CASE SENSITIVE, AND ONLY COMMON LENGTH + * IS CHECKED (allows abbreviations; well, length is + * taken from lookup-up name, so if table contains + * a shorter abbrev it is not found). + * AUTHORS: + * AL Ari Luotonen luotonen@dxcern.cern.ch + * + * HISTORY: + * + * + * BUGS: + * + * + */ + +#include <HTUtils.h> + +#include <HTAssoc.h> + +#include <LYLeaks.h> + +HTAssocList *HTAssocList_new(void) +{ + return HTList_new(); +} + +void HTAssocList_delete(HTAssocList *alist) +{ + if (alist) { + HTAssocList *cur = alist; + HTAssoc *assoc; + + while (NULL != (assoc = (HTAssoc *) HTList_nextObject(cur))) { + FREE(assoc->name); + FREE(assoc->value); + FREE(assoc); + } + HTList_delete(alist); + alist = NULL; + } +} + +void HTAssocList_add(HTAssocList *alist, + const char *name, + const char *value) +{ + HTAssoc *assoc; + + if (alist) { + if (!(assoc = (HTAssoc *) malloc(sizeof(HTAssoc)))) + outofmem(__FILE__, "HTAssoc_add"); + + assert(assoc != NULL); + + assoc->name = NULL; + assoc->value = NULL; + + if (name) + StrAllocCopy(assoc->name, name); + if (value) + StrAllocCopy(assoc->value, value); + HTList_addObject(alist, (void *) assoc); + } else { + CTRACE((tfp, "HTAssoc_add: ERROR: assoc list NULL!!\n")); + } +} + +char *HTAssocList_lookup(HTAssocList *alist, + const char *name) +{ + HTAssocList *cur = alist; + HTAssoc *assoc; + + while (NULL != (assoc = (HTAssoc *) HTList_nextObject(cur))) { + if (!strncasecomp(assoc->name, name, (int) strlen(name))) + return assoc->value; + } + return NULL; +} diff --git a/WWW/Library/Implementation/HTAssoc.h b/WWW/Library/Implementation/HTAssoc.h new file mode 100644 index 00000000..327809c2 --- /dev/null +++ b/WWW/Library/Implementation/HTAssoc.h @@ -0,0 +1,35 @@ +/* ASSOCIATION LIST FOR STORING NAME-VALUE PAIRS + + Lookups from association list are not case-sensitive. + + */ + +#ifndef HTASSOC_H +#define HTASSOC_H + +#include <HTList.h> + +#ifdef __cplusplus +extern "C" { +#endif + typedef HTList HTAssocList; + + typedef struct { + char *name; + char *value; + } HTAssoc; + + extern HTAssocList *HTAssocList_new(void); + extern void HTAssocList_delete(HTAssocList *alist); + + extern void HTAssocList_add(HTAssocList *alist, + const char *name, + const char *value); + + extern char *HTAssocList_lookup(HTAssocList *alist, + const char *name); + +#ifdef __cplusplus +} +#endif +#endif /* not HTASSOC_H */ diff --git a/WWW/Library/Implementation/HTAtom.c b/WWW/Library/Implementation/HTAtom.c new file mode 100644 index 00000000..d01455ca --- /dev/null +++ b/WWW/Library/Implementation/HTAtom.c @@ -0,0 +1,178 @@ +/* + * $LynxId: HTAtom.c,v 1.18 2010/09/25 11:43:12 tom Exp $ + * + * Atoms: Names to numbers HTAtom.c + * ======================= + * + * Atoms are names which are given representative pointer values + * so that they can be stored more efficiently, and comparisons + * for equality done more efficiently. + * + * Atoms are kept in a hash table consisting of an array of linked lists. + * + * Authors: + * TBL Tim Berners-Lee, WorldWideWeb project, CERN + * (c) Copyright CERN 1991 - See Copyright.html + * + */ + +#include <HTUtils.h> + +#define HASH_SIZE 101 /* Tunable */ +#include <HTAtom.h> + +#include <HTList.h> + +#include <LYexit.h> +#include <LYLeaks.h> + +static HTAtom *hash_table[HASH_SIZE]; +static BOOL initialised = NO; + +/* + * To free off all atoms. + */ +#ifdef LY_FIND_LEAKS +static void free_atoms(void); +#endif + +/* + * Alternate hashing function. + */ +#define HASH_FUNCTION(cp_hash) ((strlen(cp_hash) * UCH(*cp_hash)) % HASH_SIZE) + +HTAtom *HTAtom_for(const char *string) +{ + size_t hash; + HTAtom *a; + + /* First time around, clear hash table + */ + /* + * Memory leak fixed. + * 05-29-94 Lynx 2-3-1 Garrett Arch Blythe + */ + if (!initialised) { + int i; + + for (i = 0; i < HASH_SIZE; i++) + hash_table[i] = (HTAtom *) 0; + initialised = YES; +#ifdef LY_FIND_LEAKS + atexit(free_atoms); +#endif + } + + /* Generate hash function + */ + hash = HASH_FUNCTION(string); + + /* Search for the string in the list + */ + for (a = hash_table[hash]; a; a = a->next) { + if (0 == strcasecomp(a->name, string)) { + /* CTRACE((tfp, "HTAtom: Old atom %p for `%s'\n", a, string)); */ + return a; /* Found: return it */ + } + } + + /* Generate a new entry + */ + a = (HTAtom *) malloc(sizeof(*a)); + if (a == NULL) + outofmem(__FILE__, "HTAtom_for"); + + assert(a != NULL); + + a->name = (char *) malloc(strlen(string) + 1); + if (a->name == NULL) + outofmem(__FILE__, "HTAtom_for"); + + assert(a->name != NULL); + + strcpy(a->name, string); + a->next = hash_table[hash]; /* Put onto the head of list */ + hash_table[hash] = a; +#ifdef NOT_DEFINED + CTRACE((tfp, "HTAtom: New atom %p for `%s'\n", a, string)); +#endif /* NOT_DEFINED */ + return a; +} + +#ifdef LY_FIND_LEAKS +/* + * Purpose: Free off all atoms. + * Arguments: void + * Return Value: void + * Remarks/Portability/Dependencies/Restrictions: + * To be used at program exit. + * Revision History: + * 05-29-94 created Lynx 2-3-1 Garrett Arch Blythe + */ +static void free_atoms(void) +{ + auto int i_counter; + HTAtom *HTAp_freeme; + + /* + * Loop through all lists of atoms. + */ + for (i_counter = 0; i_counter < HASH_SIZE; i_counter++) { + /* + * Loop through the list. + */ + while (hash_table[i_counter] != NULL) { + /* + * Free off atoms and any members. + */ + HTAp_freeme = hash_table[i_counter]; + hash_table[i_counter] = HTAp_freeme->next; + FREE(HTAp_freeme->name); + FREE(HTAp_freeme); + } + } +} +#endif /* LY_FIND_LEAKS */ + +static BOOL mime_match(const char *name, + const char *templ) +{ + if (name && templ) { + static char *n1 = NULL; + static char *t1 = NULL; + char *n2; + char *t2; + + StrAllocCopy(n1, name); /* These also free the ones */ + StrAllocCopy(t1, templ); /* from previous call. */ + + if (!(n2 = strchr(n1, '/')) || !(t2 = strchr(t1, '/'))) + return NO; + + *(n2++) = (char) 0; + *(t2++) = (char) 0; + + if ((0 == strcmp(t1, "*") || 0 == strcmp(t1, n1)) && + (0 == strcmp(t2, "*") || 0 == strcmp(t2, n2))) + return YES; + } + return NO; +} + +HTList *HTAtom_templateMatches(const char *templ) +{ + HTList *matches = HTList_new(); + + if (initialised && templ) { + int i; + HTAtom *cur; + + for (i = 0; i < HASH_SIZE; i++) { + for (cur = hash_table[i]; cur; cur = cur->next) { + if (mime_match(cur->name, templ)) + HTList_addObject(matches, (void *) cur); + } + } + } + return matches; +} diff --git a/WWW/Library/Implementation/HTAtom.h b/WWW/Library/Implementation/HTAtom.h new file mode 100644 index 00000000..4125d31b --- /dev/null +++ b/WWW/Library/Implementation/HTAtom.h @@ -0,0 +1,54 @@ +/* */ + +/* Atoms: Names to numbers HTAtom.h + * ======================= + * + * Atoms are names which are given representative pointer values + * so that they can be stored more efficiently, and compaisons + * for equality done more efficiently. + * + * HTAtom_for(string) returns a representative value such that it + * will always (within one run of the program) return the same + * value for the same given string. + * + * Authors: + * TBL Tim Berners-Lee, WorldWideWeb project, CERN + * + * (c) Copyright CERN 1991 - See Copyright.html + * + */ + +#ifndef HTATOM_H +#define HTATOM_H + +#include <HTList.h> + +#ifdef __cplusplus +extern "C" { +#endif + typedef struct _HTAtom HTAtom; + + struct _HTAtom { + HTAtom *next; + char *name; + }; /* struct _HTAtom */ + + extern HTAtom *HTAtom_for(const char *string); + extern HTList *HTAtom_templateMatches(const char *templ); + +#define HTAtom_name(a) ((a)->name) + +/* + +The HTFormat type + + We use the HTAtom object for holding representations. This allows faster manipulation + (comparison and copying) that if we stayed with strings. + + */ + typedef HTAtom *HTFormat; + +#ifdef __cplusplus +} +#endif +#endif /* HTATOM_H */ diff --git a/WWW/Library/Implementation/HTBTree.c b/WWW/Library/Implementation/HTBTree.c new file mode 100644 index 00000000..3a76550e --- /dev/null +++ b/WWW/Library/Implementation/HTBTree.c @@ -0,0 +1,687 @@ +/* Binary Tree for sorting things + * ============================== + * Author: Arthur Secret + * + * 4 March 94: Bug fixed in the balancing procedure + * + */ + +#include <HTUtils.h> +#include <HTBTree.h> + +#define MAXIMUM(a,b) ((a)>(b)?(a):(b)) + +#include <LYLeaks.h> + +/********************************************************* + * This function returns an HTBTree with memory allocated + * for it when given a mean to compare things + */ +HTBTree *HTBTree_new(HTComparer comp) +{ + HTBTree *tree = typeMalloc(HTBTree); + + if (tree == NULL) + outofmem(__FILE__, "HTBTree_new"); + + assert(tree != NULL); + + tree->compare = comp; + tree->top = NULL; + + return tree; +} + +/********************************************************* + * This void will free the memory allocated for one element + */ +static void HTBTElement_free(HTBTElement *element) +{ + if (element) { + if (element->left != NULL) + HTBTElement_free(element->left); + if (element->right != NULL) + HTBTElement_free(element->right); + FREE(element); + } +} + +/************************************************************* + * This void will free the memory allocated for the whole tree + */ +void HTBTree_free(HTBTree *tree) +{ + HTBTElement_free(tree->top); + FREE(tree); +} + +/********************************************************* + * This void will free the memory allocated for one element + */ +static void HTBTElementAndObject_free(HTBTElement *element) +{ + if (element) { /* Just in case nothing was in the tree anyway */ + if (element->left != NULL) + HTBTElementAndObject_free(element->left); + if (element->right != NULL) + HTBTElementAndObject_free(element->right); + FREE(element->object); + FREE(element); + } +} + +/************************************************************* + * This void will free the memory allocated for the whole tree + */ +void HTBTreeAndObject_free(HTBTree *tree) +{ + HTBTElementAndObject_free(tree->top); + FREE(tree); +} + +/********************************************************************* + * Returns a pointer to equivalent object in a tree or NULL if none. + */ +void *HTBTree_search(HTBTree *tree, + void *object) +{ + HTBTElement *cur = tree->top; + int res; + + while (cur != NULL) { + res = tree->compare(object, cur->object); + + if (res == 0) + return cur->object; + else if (res < 0) + cur = cur->left; + else if (res > 0) + cur = cur->right; + } + return NULL; +} + +/********************************************************************* + * This void is the core of HTBTree.c . It will + * 1/ add a new element to the tree at the right place + * so that the tree remains sorted + * 2/ balance the tree to be as fast as possible when reading it + */ +void HTBTree_add(HTBTree *tree, + void *object) +{ + HTBTElement *father_of_element; + HTBTElement *added_element; + HTBTElement *forefather_of_element; + HTBTElement *father_of_forefather; + BOOL father_found, top_found; + int depth, depth2, corrections; + + /* father_of_element is a pointer to the structure that is the father of + * the new object "object". added_element is a pointer to the structure + * that contains or will contain the new object "object". + * father_of_forefather and forefather_of_element are pointers that are + * used to modify the depths of upper elements, when needed. + * + * father_found indicates by a value NO when the future father of "object" + * is found. top_found indicates by a value NO when, in case of a + * difference of depths < 2, the top of the tree is encountered and forbids + * any further try to balance the tree. corrections is an integer used to + * avoid infinite loops in cases such as: + * + * 3 3 + * 4 4 + * 5 5 + * + * 3 is used here to show that it need not be the top of the tree. + */ + + /* + * 1/ Adding of the element to the binary tree + */ + + if (tree->top == NULL) { + tree->top = typeMalloc(HTBTElement); + + if (tree->top == NULL) + outofmem(__FILE__, "HTBTree_add"); + + assert(tree->top != NULL); + + tree->top->up = NULL; + tree->top->object = object; + tree->top->left = NULL; + tree->top->left_depth = 0; + tree->top->right = NULL; + tree->top->right_depth = 0; + } else { + father_found = YES; + father_of_element = tree->top; + added_element = NULL; + father_of_forefather = NULL; + forefather_of_element = NULL; + while (father_found) { + int res = tree->compare(object, father_of_element->object); + + if (res < 0) { + if (father_of_element->left != NULL) + father_of_element = father_of_element->left; + else { + father_found = NO; + father_of_element->left = typeMalloc(HTBTElement); + + if (father_of_element->left == NULL) + outofmem(__FILE__, "HTBTree_add"); + + assert(father_of_element->left != NULL); + + added_element = father_of_element->left; + added_element->up = father_of_element; + added_element->object = object; + added_element->left = NULL; + added_element->left_depth = 0; + added_element->right = NULL; + added_element->right_depth = 0; + } + } else { /* res >= 0 */ + if (father_of_element->right != NULL) { + father_of_element = father_of_element->right; + } else { + father_found = NO; + father_of_element->right = typeMalloc(HTBTElement); + + if (father_of_element->right == NULL) + outofmem(__FILE__, "HTBTree_add"); + assert(father_of_element->right != NULL); + + added_element = father_of_element->right; + added_element->up = father_of_element; + added_element->object = object; + added_element->left = NULL; + added_element->left_depth = 0; + added_element->right = NULL; + added_element->right_depth = 0; + } + } + } + + /* + * Changing of all depths that need to be changed + */ + father_of_forefather = father_of_element; + forefather_of_element = added_element; + do { + if (father_of_forefather->left == forefather_of_element) { + depth = father_of_forefather->left_depth; + father_of_forefather->left_depth = 1 + + MAXIMUM(forefather_of_element->right_depth, + forefather_of_element->left_depth); + depth2 = father_of_forefather->left_depth; + } else { + depth = father_of_forefather->right_depth; + father_of_forefather->right_depth = 1 + + MAXIMUM(forefather_of_element->right_depth, + forefather_of_element->left_depth); + depth2 = father_of_forefather->right_depth; + } + forefather_of_element = father_of_forefather; + father_of_forefather = father_of_forefather->up; + } while ((depth != depth2) && (father_of_forefather != NULL)); + + /* + * 2/ Balancing the binary tree, if necessary + */ + top_found = YES; + corrections = 0; + while ((top_found) && (corrections < 7)) { + if ((abs(father_of_element->left_depth + - father_of_element->right_depth)) < 2) { + if (father_of_element->up != NULL) + father_of_element = father_of_element->up; + else + top_found = NO; + } else { /* We start the process of balancing */ + + corrections = corrections + 1; + /* + * corrections is an integer used to avoid infinite + * loops in cases such as: + * + * 3 3 + * 4 4 + * 5 5 + * + * 3 is used to show that it need not be the top of the tree + * But let's avoid these two exceptions anyhow + * with the two following conditions (4 March 94 - AS) + */ + + if (father_of_element->left == NULL) { + if ((father_of_element->right != NULL) + && (father_of_element->right->right == NULL) + && (father_of_element->right->left != NULL) + && (father_of_element->right->left->left == NULL) + && (father_of_element->right->left->right == NULL)) { + corrections = 7; + } + } else { + if ((father_of_element->right == NULL) + && (father_of_element->left->left == NULL) + && (father_of_element->left->right != NULL) + && (father_of_element->left->right->right == NULL) + && (father_of_element->left->right->left == NULL)) { + corrections = 7; + } + } + + if ((father_of_element->left != NULL) + && (father_of_element->left_depth > father_of_element->right_depth)) { + added_element = father_of_element->left; + father_of_element->left_depth = added_element->right_depth; + added_element->right_depth = 1 + + MAXIMUM(father_of_element->right_depth, + father_of_element->left_depth); + if (father_of_element->up != NULL) { + /* Bug fixed in March 94 - AS */ + BOOL first_time; + + father_of_forefather = father_of_element->up; + forefather_of_element = added_element; + first_time = YES; + do { + if (father_of_forefather->left + == forefather_of_element->up) { + depth = father_of_forefather->left_depth; + if (first_time) { + father_of_forefather->left_depth = 1 + + MAXIMUM(forefather_of_element->left_depth, + forefather_of_element->right_depth); + first_time = NO; + } else + father_of_forefather->left_depth = 1 + + MAXIMUM(forefather_of_element->up->left_depth, + forefather_of_element->up->right_depth); + + depth2 = father_of_forefather->left_depth; + } else { + depth = father_of_forefather->right_depth; + if (first_time) { + father_of_forefather->right_depth = 1 + + MAXIMUM(forefather_of_element->left_depth, + forefather_of_element->right_depth); + first_time = NO; + } else + father_of_forefather->right_depth = 1 + + MAXIMUM(forefather_of_element->up->left_depth, + forefather_of_element->up->right_depth); + depth2 = father_of_forefather->right_depth; + } + forefather_of_element = forefather_of_element->up; + father_of_forefather = father_of_forefather->up; + } while ((depth != depth2) && + (father_of_forefather != NULL)); + father_of_forefather = father_of_element->up; + if (father_of_forefather->left == father_of_element) { + /* + * 3 3 + * 4 5 + * When tree 5 6 becomes 7 4 + * 7 8 8 6 + * + * 3 is used to show that it may not be the top of the + * tree. + */ + father_of_forefather->left = added_element; + father_of_element->left = added_element->right; + added_element->right = father_of_element; + } + if (father_of_forefather->right == father_of_element) { + /* + * 3 3 + * 4 5 + * When tree 5 6 becomes 7 4 + * 7 8 8 6 + * + * 3 is used to show that it may not be the top of the + * tree + */ + father_of_forefather->right = added_element; + father_of_element->left = added_element->right; + added_element->right = father_of_element; + } + added_element->up = father_of_forefather; + } else { + /* + + * 1 2 + * When tree 2 3 becomes 4 1 + * 4 5 5 3 + * + * 1 is used to show that it is the top of the tree + */ + added_element->up = NULL; + father_of_element->left = added_element->right; + added_element->right = father_of_element; + } + father_of_element->up = added_element; + if (father_of_element->left != NULL) + father_of_element->left->up = father_of_element; + } else if (father_of_element->right != NULL) { + added_element = father_of_element->right; + father_of_element->right_depth = added_element->left_depth; + added_element->left_depth = 1 + + MAXIMUM(father_of_element->right_depth, + father_of_element->left_depth); + if (father_of_element->up != NULL) + /* Bug fixed in March 94 - AS */ + { + BOOL first_time; + + father_of_forefather = father_of_element->up; + forefather_of_element = added_element; + first_time = YES; + do { + if (father_of_forefather->left + == forefather_of_element->up) { + depth = father_of_forefather->left_depth; + if (first_time) { + father_of_forefather->left_depth = 1 + + MAXIMUM(forefather_of_element->left_depth, + forefather_of_element->right_depth); + first_time = NO; + } else + father_of_forefather->left_depth = 1 + + MAXIMUM(forefather_of_element->up->left_depth, + forefather_of_element->up->right_depth); + depth2 = father_of_forefather->left_depth; + } else { + depth = father_of_forefather->right_depth; + if (first_time) { + father_of_forefather->right_depth = 1 + + MAXIMUM(forefather_of_element->left_depth, + forefather_of_element->right_depth); + first_time = NO; + } else + father_of_forefather->right_depth = 1 + + MAXIMUM(forefather_of_element->up->left_depth, + forefather_of_element->up->right_depth); + depth2 = father_of_forefather->right_depth; + } + father_of_forefather = father_of_forefather->up; + forefather_of_element = forefather_of_element->up; + } while ((depth != depth2) && + (father_of_forefather != NULL)); + father_of_forefather = father_of_element->up; + if (father_of_forefather->left == father_of_element) { + /* + * 3 3 + * 4 6 + * When tree 5 6 becomes 4 8 + * 7 8 5 7 + * + * 3 is used to show that it may not be the top of the + * tree. + */ + father_of_forefather->left = added_element; + father_of_element->right = added_element->left; + added_element->left = father_of_element; + } + if (father_of_forefather->right == father_of_element) { + /* + * 3 3 + * 4 6 + * When tree 5 6 becomes 4 8 + * 7 8 5 7 + * + * 3 is used to show that it may not be the top of the + * tree + */ + father_of_forefather->right = added_element; + father_of_element->right = added_element->left; + added_element->left = father_of_element; + } + added_element->up = father_of_forefather; + } else { + /* + + * 1 3 + * When tree 2 3 becomes 1 5 + * 4 5 2 4 + * + * 1 is used to show that it is the top of the tree. + */ + added_element->up = NULL; + father_of_element->right = added_element->left; + added_element->left = father_of_element; + } + father_of_element->up = added_element; + if (father_of_element->right != NULL) + father_of_element->right->up = father_of_element; + } + } + } + while (father_of_element->up != NULL) { + father_of_element = father_of_element->up; + } + tree->top = father_of_element; + } +} + +/************************************************************************* + * this function returns a pointer to the leftmost element if ele is NULL, + * and to the next object to the right otherwise. + * If no elements left, returns a pointer to NULL. + */ +HTBTElement *HTBTree_next(HTBTree *tree, + HTBTElement *ele) +{ + HTBTElement *father_of_element; + HTBTElement *father_of_forefather; + + if (ele == NULL) { + father_of_element = tree->top; + if (father_of_element != NULL) + while (father_of_element->left != NULL) + father_of_element = father_of_element->left; + } else { + father_of_element = ele; + if (father_of_element->right != NULL) { + father_of_element = father_of_element->right; + while (father_of_element->left != NULL) + father_of_element = father_of_element->left; + } else { + father_of_forefather = father_of_element->up; + while (father_of_forefather && + (father_of_forefather->right == father_of_element)) { + father_of_element = father_of_forefather; + father_of_forefather = father_of_element->up; + } + father_of_element = father_of_forefather; + } + } +#ifdef BTREE_TRACE + /* The option -DBTREE_TRACE will give much more information + * about the way the process is running, for debugging matters + */ + if (father_of_element != NULL) { + printf("\nObject = %s\t", (char *) father_of_element->object); + if (father_of_element->up != NULL) + printf("Objet du pere = %s\n", + (char *) father_of_element->up->object); + else + printf("Pas de Pere\n"); + if (father_of_element->left != NULL) + printf("Objet du fils gauche = %s\t", + (char *) father_of_element->left->object); + else + printf("Pas de fils gauche\t"); + if (father_of_element->right != NULL) + printf("Objet du fils droit = %s\n", + (char *) father_of_element->right->object); + else + printf("Pas de fils droit\n"); + printf("Profondeur gauche = %d\t", father_of_element->left_depth); + printf("Profondeur droite = %d\n", father_of_element->right_depth); + printf(" **************\n"); + } +#endif + return father_of_element; +} + +#ifdef TEST +/***************************************************** + * This is just a test to show how to handle HTBTree.c + */ +main() +{ + HTBTree *tree; + HTBTElement *next_element; + + tree = HTBTree_new((HTComparer) strcasecomp); + HTBTree_add(tree, "hypertext"); + HTBTree_add(tree, "Addressing"); + HTBTree_add(tree, "X11"); + HTBTree_add(tree, "Tools"); + HTBTree_add(tree, "Proposal.wn"); + HTBTree_add(tree, "Protocols"); + HTBTree_add(tree, "NeXT"); + HTBTree_add(tree, "Daemon"); + HTBTree_add(tree, "Test"); + HTBTree_add(tree, "Administration"); + HTBTree_add(tree, "LineMode"); + HTBTree_add(tree, "DesignIssues"); + HTBTree_add(tree, "MarkUp"); + HTBTree_add(tree, "Macintosh"); + HTBTree_add(tree, "Proposal.rtf.wn"); + HTBTree_add(tree, "FIND"); + HTBTree_add(tree, "Paper"); + HTBTree_add(tree, "Tcl"); + HTBTree_add(tree, "Talks"); + HTBTree_add(tree, "Architecture"); + HTBTree_add(tree, "VMSHelp"); + HTBTree_add(tree, "Provider"); + HTBTree_add(tree, "Archive"); + HTBTree_add(tree, "SLAC"); + HTBTree_add(tree, "Project"); + HTBTree_add(tree, "News"); + HTBTree_add(tree, "Viola"); + HTBTree_add(tree, "Users"); + HTBTree_add(tree, "FAQ"); + HTBTree_add(tree, "WorkingNotes"); + HTBTree_add(tree, "Windows"); + HTBTree_add(tree, "FineWWW"); + HTBTree_add(tree, "Frame"); + HTBTree_add(tree, "XMosaic"); + HTBTree_add(tree, "People"); + HTBTree_add(tree, "All"); + HTBTree_add(tree, "Curses"); + HTBTree_add(tree, "Erwise"); + HTBTree_add(tree, "Carl"); + HTBTree_add(tree, "MidasWWW"); + HTBTree_add(tree, "XPM"); + HTBTree_add(tree, "MailRobot"); + HTBTree_add(tree, "Illustrations"); + HTBTree_add(tree, "VMClient"); + HTBTree_add(tree, "XPA"); + HTBTree_add(tree, "Clients.html"); + HTBTree_add(tree, "Library"); + HTBTree_add(tree, "CERNLIB_Distribution"); + HTBTree_add(tree, "libHTML"); + HTBTree_add(tree, "WindowsPC"); + HTBTree_add(tree, "tkWWW"); + HTBTree_add(tree, "tk2.3"); + HTBTree_add(tree, "CVS-RCS"); + HTBTree_add(tree, "DecnetSockets"); + HTBTree_add(tree, "SGMLStream"); + HTBTree_add(tree, "NextStep"); + HTBTree_add(tree, "CVSRepository_old"); + HTBTree_add(tree, "ArthurSecret"); + HTBTree_add(tree, "CVSROOT"); + HTBTree_add(tree, "HytelnetGate"); + HTBTree_add(tree, "cern.www.new.src"); + HTBTree_add(tree, "Conditions"); + HTBTree_add(tree, "HTMLGate"); + HTBTree_add(tree, "Makefile"); + HTBTree_add(tree, "Newsgroups.html"); + HTBTree_add(tree, "People.html"); + HTBTree_add(tree, "Bugs.html"); + HTBTree_add(tree, "Summary.html"); + HTBTree_add(tree, "zDesignIssues.wn"); + HTBTree_add(tree, "HT.draw"); + HTBTree_add(tree, "HTandCERN.wn"); + HTBTree_add(tree, "Ideas.wn"); + HTBTree_add(tree, "MarkUp.wn"); + HTBTree_add(tree, "Proposal.html"); + HTBTree_add(tree, "SearchPanel.draw"); + HTBTree_add(tree, "Comments.wn"); + HTBTree_add(tree, "Xanadu.html"); + HTBTree_add(tree, "Storinglinks.html"); + HTBTree_add(tree, "TheW3Book.html"); + HTBTree_add(tree, "Talk_Feb-91.html"); + HTBTree_add(tree, "JFosterEntry.txt"); + HTBTree_add(tree, "Summary.txt"); + HTBTree_add(tree, "Bibliography.html"); + HTBTree_add(tree, "HTandCern.txt"); + HTBTree_add(tree, "Talk.draw"); + HTBTree_add(tree, "zDesignNotes.html"); + HTBTree_add(tree, "Link.html"); + HTBTree_add(tree, "Status.html"); + HTBTree_add(tree, "http.txt"); + HTBTree_add(tree, "People.html~"); + HTBTree_add(tree, "TAGS"); + HTBTree_add(tree, "summary.txt"); + HTBTree_add(tree, "Technical.html"); + HTBTree_add(tree, "Terms.html"); + HTBTree_add(tree, "JANETAccess.html"); + HTBTree_add(tree, "People.txt"); + HTBTree_add(tree, "README.txt"); + HTBTree_add(tree, "CodingStandards.html"); + HTBTree_add(tree, "Copyright.txt"); + HTBTree_add(tree, "Status_old.html"); + HTBTree_add(tree, "patches~"); + HTBTree_add(tree, "RelatedProducts.html"); + HTBTree_add(tree, "Implementation"); + HTBTree_add(tree, "History.html"); + HTBTree_add(tree, "Makefile.bak"); + HTBTree_add(tree, "Makefile.old"); + HTBTree_add(tree, "Policy.html"); + HTBTree_add(tree, "WhatIs.html"); + HTBTree_add(tree, "TheProject.html"); + HTBTree_add(tree, "Notation.html"); + HTBTree_add(tree, "Helping.html"); + HTBTree_add(tree, "Cyber-WWW.sit.Hqx"); + HTBTree_add(tree, "Glossary.html"); + HTBTree_add(tree, "maketags.html"); + HTBTree_add(tree, "IntroCS.html"); + HTBTree_add(tree, "Contrib"); + HTBTree_add(tree, "Help.html"); + HTBTree_add(tree, "CodeManagExec"); + HTBTree_add(tree, "HT-0.1draz"); + HTBTree_add(tree, "Cello"); + HTBTree_add(tree, "TOPUB"); + HTBTree_add(tree, "BUILD"); + HTBTree_add(tree, "BUILDALL"); + HTBTree_add(tree, "Lynx"); + HTBTree_add(tree, "ArthurLibrary"); + HTBTree_add(tree, "RashtyClient"); + HTBTree_add(tree, "#History.html#"); + HTBTree_add(tree, "PerlServers"); + HTBTree_add(tree, "modules"); + HTBTree_add(tree, "NCSA_httpd"); + HTBTree_add(tree, "MAIL2HTML"); + HTBTree_add(tree, "core"); + HTBTree_add(tree, "EmacsWWW"); +#ifdef BTREE_TRACE + printf("\nTreeTopObject=%s\n\n", tree->top->object); +#endif + next_element = HTBTree_next(tree, NULL); + while (next_element != NULL) { +#ifndef BTREE_TRACE + printf("The next element is %s\n", next_element->object); +#endif + next_element = HTBTree_next(tree, next_element); + } + HTBTree_free(tree); +} + +#endif diff --git a/WWW/Library/Implementation/HTBTree.h b/WWW/Library/Implementation/HTBTree.h new file mode 100644 index 00000000..a4f78f94 --- /dev/null +++ b/WWW/Library/Implementation/HTBTree.h @@ -0,0 +1,104 @@ +/* /Net/dxcern/userd/timbl/hypertext/WWW/Library/Implementation/HTBTree.html + BALANCED BINARY TREE FOR SORTING THINGS + + Tree creation, traversal and freeing. User-supplied comparison routine. + + Author: Arthur Secret, CERN. Public domain. Please mail bugs and changes to + www-request@info.cern.ch + + part of libWWW + + */ +#ifndef HTBTREE_H +#define HTBTREE_H 1 + +#ifndef HTUTILS_H +#include <HTUtils.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif +/* + +Data structures + + */ typedef struct _HTBTree_element { + void *object; /* User object */ + struct _HTBTree_element *up; + struct _HTBTree_element *left; + int left_depth; + struct _HTBTree_element *right; + int right_depth; + } HTBTElement; + + typedef int (*HTComparer) (void *a, void *b); + + typedef struct _HTBTree_top { + HTComparer compare; + struct _HTBTree_element *top; + } HTBTree; + +/* + +Create a binary tree given its discrimination routine + + */ + extern HTBTree *HTBTree_new(HTComparer comp); + +/* + +Free storage of the tree but not of the objects + + */ + extern void HTBTree_free(HTBTree *tree); + +/* + +Free storage of the tree and of the objects + + */ + extern void HTBTreeAndObject_free(HTBTree *tree); + +/* + +Add an object to a binary tree + + */ + + extern void HTBTree_add(HTBTree *tree, void *object); + +/* + +Search an object in a binary tree + + returns Pointer to equivalent object in a tree or NULL if none. + */ + + extern void *HTBTree_search(HTBTree *tree, void *object); + +/* + +Find user object for element + + */ +#define HTBTree_object(element) ((element)->object) + +/* + +Find next element in depth-first order + + ON ENTRY, + + ele if NULL, start with leftmost element. if != 0 give next object to + the right. + + returns Pointer to element or NULL if none left. + + */ + extern HTBTElement *HTBTree_next(HTBTree *tree, HTBTElement *ele); + +#ifdef __cplusplus +} +#endif +#endif /* HTBTREE_H */ diff --git a/WWW/Library/Implementation/HTCJK.h b/WWW/Library/Implementation/HTCJK.h new file mode 100644 index 00000000..555cff1a --- /dev/null +++ b/WWW/Library/Implementation/HTCJK.h @@ -0,0 +1,112 @@ +/* + * $LynxId: HTCJK.h,v 1.18 2010/09/25 11:41:29 tom Exp $ + * + * CJK character converter HTCJK.h + * ======================= + * + * Added 11-Jun-96 by FM, based on jiscode.h for + * Yutaka Sato's (ysato@etl.go.jp) SJIS.c, and + * Takuya ASADA's (asada@three-a.co.jp) CJK patches. + * (see SGML.c). + * + */ + +#ifndef HTCJK_H +#define HTCJK_H + +#ifndef HTUTILS_H +#include <HTUtils.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif +/* + * STATUS CHANGE CODES + */ +#define TO_2BCODE '$' +#define TO_1BCODE '(' +#define TO_KANA '\016' +#define TO_KANAOUT '\017' +#define TO_KANJI "\033$B" +#define TO_HANJI "\033$A" +#define TO_HANGUL "\033$(C" +#define TO_ASCII "\033(B" +#define IS_SJIS_LO(lo) ((0x40<=lo)&&(lo!=0x7F)&&(lo<=0xFC)) +#define IS_SJIS_HI1(hi) ((0x81<=hi)&&(hi<=0x9F)) /* 1st lev. */ +#define IS_SJIS_HI2(hi) ((0xE0<=hi)&&(hi<=0xEF)) /* 2nd lev. */ +#define IS_SJIS(hi,lo,in_sjis) (!IS_SJIS_LO(lo)?0:IS_SJIS_HI1(hi)?(in_sjis=1):in_sjis&&IS_SJIS_HI2(hi)) +#define IS_SJIS_2BYTE(hi,lo) (IS_SJIS_LO(lo)&&(IS_SJIS_HI1(hi)||IS_SJIS_HI2(hi))) +#define IS_SJIS_X0201KANA(lo) ((0xA1<=lo)&&(lo<=0xDF)) +#define IS_EUC_LOX(lo) ((0xA1<=lo)&&(lo<=0xFE)) /* extended */ +#define IS_EUC_HI(hi) ((0xA1<=hi)&&(hi<=0xFE)) +#define IS_EUC_X0201KANA(hi,lo) ((hi==0x8E)&&(0xA1<=lo)&&(lo<=0xDF)) +#define IS_EUC(hi,lo) ((IS_EUC_HI(hi) && IS_EUC_LOX(lo))||IS_EUC_X0201KANA(hi,lo)) +#define IS_JAPANESE_2BYTE(hi,lo) (IS_SJIS_2BYTE(hi,lo) || IS_EUC(hi,lo)) +#define IS_BIG5_LOS(lo) ((0x40<=lo)&&(lo<=0x7E)) /* standard */ +#define IS_BIG5_LOX(lo) ((0xA1<=lo)&&(lo<=0xFE)) /* extended */ +#define IS_BIG5_HI(hi) ((0xA1<=hi)&&(hi<=0xFE)) +#define IS_BIG5(hi,lo) (IS_BIG5_HI(hi) && (IS_BIG5_LOS(lo) || IS_BIG5_LOX(lo))) + typedef enum { + NOKANJI = 0, EUC, SJIS, JIS + } HTkcode; + typedef enum { + NOCJK = 0, JAPANESE, CHINESE, KOREAN, TAIPEI + } HTCJKlang; + + extern HTCJKlang HTCJK; + +/* + * Function prototypes. + */ + extern void JISx0201TO0208_EUC(unsigned IHI, + unsigned ILO, + unsigned char *OHI, + unsigned char *OLO); + + extern unsigned char *SJIS_TO_JIS1(unsigned HI, + unsigned LO, + unsigned char *JCODE); + + extern unsigned char *JIS_TO_SJIS1(unsigned HI, + unsigned LO, + unsigned char *SJCODE); + + extern unsigned char *EUC_TO_SJIS1(unsigned HI, + unsigned LO, + register unsigned char *SJCODE); + + extern void JISx0201TO0208_SJIS(unsigned I, + unsigned char *OHI, + unsigned char *OLO); + + extern unsigned char *SJIS_TO_EUC1(unsigned HI, + unsigned LO, + unsigned char *EUCp); + + extern unsigned char *SJIS_TO_EUC(unsigned char *src, + unsigned char *dst); + + extern unsigned char *EUC_TO_SJIS(unsigned char *src, + unsigned char *dst); + + extern unsigned char *EUC_TO_JIS(unsigned char *src, + unsigned char *dst, + const char *toK, + const char *toA); + + extern unsigned char *TO_EUC(const unsigned char *jis, + unsigned char *euc); + + extern void TO_SJIS(const unsigned char *any, + unsigned char *sjis); + + extern void TO_JIS(const unsigned char *any, + unsigned char *jis); + + extern char *str_kcode(HTkcode code); + +#ifdef __cplusplus +} +#endif +#endif /* HTCJK_H */ diff --git a/WWW/Library/Implementation/HTChunk.c b/WWW/Library/Implementation/HTChunk.c new file mode 100644 index 00000000..2ea36c85 --- /dev/null +++ b/WWW/Library/Implementation/HTChunk.c @@ -0,0 +1,336 @@ +/* + * $LynxId: HTChunk.c,v 1.27 2012/02/07 11:28:44 tom Exp $ + * + * Chunk handling: Flexible arrays + * =============================== + * + */ + +#include <HTUtils.h> +#include <HTChunk.h> + +#include <LYLeaks.h> + +/* + * Initialize a chunk with a certain allocation unit + */ +void HTChunkInit(HTChunk *ch, int grow) +{ + ch->data = 0; + ch->growby = grow; + ch->size = 0; + ch->allocated = 0; +} + +/* Create a chunk with a certain allocation unit + * -------------- + */ +HTChunk *HTChunkCreate(int grow) +{ + HTChunk *ch = typecalloc(HTChunk); + + if (ch == NULL) + outofmem(__FILE__, "creation of chunk"); + + HTChunkInit(ch, grow); + return ch; +} + +HTChunk *HTChunkCreateMayFail(int grow, int failok) +{ + HTChunk *ch = typecalloc(HTChunk); + + if (ch == NULL) { + if (!failok) { + outofmem(__FILE__, "creation of chunk"); + } else { + return ch; + } + } + + assert(ch != NULL); + + HTChunkInit(ch, grow); + ch->failok = failok; + return ch; +} + +/* Create a chunk with a certain allocation unit and ensured size + * -------------- + */ +HTChunk *HTChunkCreate2(int grow, size_t needed) +{ + HTChunk *ch = typecalloc(HTChunk); + + if (ch == NULL) + outofmem(__FILE__, "HTChunkCreate2"); + + assert(ch != NULL); + + HTChunkInit(ch, grow); + if (needed-- > 0) { + /* Round up */ + ch->allocated = (int) (needed - (needed % (size_t) ch->growby) + + (unsigned) ch->growby); + CTRACE((tfp, "HTChunkCreate2: requested %d, allocate %u\n", + (int) needed, (unsigned) ch->allocated)); + ch->data = typecallocn(char, (unsigned) ch->allocated); + + if (!ch->data) + outofmem(__FILE__, "HTChunkCreate2 data"); + } + return ch; +} + +/* Clear a chunk of all data + * -------------------------- + */ +void HTChunkClear(HTChunk *ch) +{ + FREE(ch->data); + ch->size = 0; + ch->allocated = 0; +} + +/* Free a chunk (and it's chain, if any) + * ------------------------------------- + */ +void HTChunkFree(HTChunk *ch) +{ + HTChunk *next; + + do { + next = ch->next; + FREE(ch->data); + FREE(ch); + ch = next; + } while (ch != NULL); +} + +/* Realloc the chunk + * ----------------- + */ +BOOL HTChunkRealloc(HTChunk *ch, int growby) +{ + char *data; + + ch->allocated = ch->allocated + growby; + + data = (ch->data + ? typeRealloc(char, ch->data, ch->allocated) + : typecallocn(char, ch->allocated)); + + if (data) { + ch->data = data; + } else if (ch->failok) { + HTChunkClear(ch); /* allocation failed, clear all data - kw */ + return FALSE; /* caller should check ch->allocated - kw */ + } else { + outofmem(__FILE__, "HTChunkRealloc"); + } + return TRUE; +} + +/* Append a character + * ------------------ + */ +void HTChunkPutc(HTChunk *ch, unsigned c) +{ + if (ch->size >= ch->allocated) { + if (!HTChunkRealloc(ch, ch->growby)) + return; + } + ch->data[ch->size++] = (char) c; +} + +/* like above but no realloc: extend to another chunk if necessary */ +HTChunk *HTChunkPutc2(HTChunk *ch, int c) +{ + if (ch->size >= ch->allocated) { + HTChunk *chunk = HTChunkCreateMayFail(ch->growby, ch->failok); + + ch->next = chunk; + ch = chunk; + HTChunkPutc(ch, UCH(c)); + } else { + ch->data[ch->size++] = (char) c; + } + return ch; +} + +/* Ensure a certain size + * --------------------- + */ +void HTChunkEnsure(HTChunk *ch, int needed) +{ + if (needed <= ch->allocated) + return; + ch->allocated = needed - 1 - ((needed - 1) % ch->growby) + + ch->growby; /* Round up */ + ch->data = (ch->data + ? typeRealloc(char, ch->data, ch->allocated) + : typecallocn(char, ch->allocated)); + + if (ch->data == NULL) + outofmem(__FILE__, "HTChunkEnsure"); +} + +/* + * Append a block of characters. + */ +void HTChunkPutb(HTChunk *ch, const char *b, int l) +{ + if (l <= 0) + return; + if (ch->size + l > ch->allocated) { + int growby = l - (l % ch->growby) + ch->growby; /* Round up */ + + if (!HTChunkRealloc(ch, growby)) + return; + } + MemCpy(ch->data + ch->size, b, l); + ch->size += l; +} + +/* like above but no realloc: extend to another chunk if necessary */ +HTChunk *HTChunkPutb2(HTChunk *ch, const char *b, int l) +{ + if (l <= 0) + return ch; + if (ch->size + l > ch->allocated) { + HTChunk *chunk; + int m = ch->allocated - ch->size; + + MemCpy(ch->data + ch->size, b, (unsigned) m); + ch->size += m; + + chunk = HTChunkCreateMayFail(ch->growby, ch->failok); + ch->next = chunk; + ch = chunk; + HTChunkPutb(ch, b + m, l - m); + } else { + MemCpy(ch->data + ch->size, b, (unsigned) l); + ch->size += l; + } + return ch; +} + +#define PUTC(code) ch->data[ch->size++] = (char)(code) +#define PUTC2(code) ch->data[ch->size++] = (char)(0x80|(0x3f &(code))) + +/* + * Append a character encoded as UTF-8. + */ +void HTChunkPutUtf8Char(HTChunk *ch, UCode_t code) +{ + int utflen; + + if (TOASCII(code) < 128) + utflen = 1; + else if (code < 0x800L) { + utflen = 2; + } else if (code < 0x10000L) { + utflen = 3; + } else if (code < 0x200000L) { + utflen = 4; + } else if (code < 0x4000000L) { + utflen = 5; + } else if (code <= 0x7fffffffL) { + utflen = 6; + } else + utflen = 0; + + if (ch->size + utflen > ch->allocated) { + int growby = (ch->growby >= utflen) ? ch->growby : utflen; + + if (!HTChunkRealloc(ch, growby)) + return; + } + + switch (utflen) { + case 0: + return; + case 1: + ch->data[ch->size++] = (char) code; + return; + case 2: + PUTC(0xc0 | (code >> 6)); + break; + case 3: + PUTC(0xe0 | (code >> 12)); + break; + case 4: + PUTC(0xf0 | (code >> 18)); + break; + case 5: + PUTC(0xf8 | (code >> 24)); + break; + case 6: + PUTC(0xfc | (code >> 30)); + break; + } + switch (utflen) { + case 6: + PUTC2(code >> 24); + /* FALLTHRU */ + case 5: + PUTC2(code >> 18); + /* FALLTHRU */ + case 4: + PUTC2(code >> 12); + /* FALLTHRU */ + case 3: + PUTC2(code >> 6); + /* FALLTHRU */ + case 2: + PUTC2(code); + break; + } +} + +/* Terminate a chunk + * ----------------- + */ +void HTChunkTerminate(HTChunk *ch) +{ + HTChunkPutc(ch, (char) 0); +} + +/* Append a string + * --------------- + */ +void HTChunkPuts(HTChunk *ch, const char *s) +{ + const char *p; + + if (s != NULL) { + for (p = s; *p; p++) { + if (ch->size >= ch->allocated) { + if (!HTChunkRealloc(ch, ch->growby)) + break; + } + ch->data[ch->size++] = *p; + } + } +} + +/* like above but no realloc: extend to another chunk if necessary */ +HTChunk *HTChunkPuts2(HTChunk *ch, const char *s) +{ + const char *p; + + if (s != NULL) { + for (p = s; *p; p++) { + if (ch->size >= ch->allocated) { + HTChunk *chunk = HTChunkCreateMayFail(ch->growby, ch->failok); + + ch->next = chunk; + ch = chunk; + HTChunkPuts(ch, p); + break; + } + ch->data[ch->size++] = *p; + } + } + return ch; +} diff --git a/WWW/Library/Implementation/HTChunk.h b/WWW/Library/Implementation/HTChunk.h new file mode 100644 index 00000000..1a9dbbae --- /dev/null +++ b/WWW/Library/Implementation/HTChunk.h @@ -0,0 +1,228 @@ +/* + * $LynxId: HTChunk.h,v 1.20 2010/09/24 08:37:39 tom Exp $ + * + * HTChunk: Flexible array handling for libwww + * CHUNK HANDLING: + * FLEXIBLE ARRAYS + * + * This module implements a flexible array. It is a general utility module. A + * chunk is a structure which may be extended. These routines create and + * append data to chunks, automatically reallocating them as necessary. + * + */ +#ifndef HTCHUNK_H +#define HTCHUNK_H 1 + +#ifndef HTUTILS_H +#include <HTUtils.h> +#endif + +#include <UCMap.h> + +#ifdef __cplusplus +extern "C" { +#endif + typedef struct _HTChunk HTChunk; + + struct _HTChunk { + int size; /* In bytes */ + int growby; /* Allocation unit in bytes */ + int allocated; /* Current size of *data */ + char *data; /* Pointer to malloc'd area or 0 */ + int failok; /* allowed to fail without exiting program? */ + HTChunk *next; /* pointer to the next chunk */ + }; + +/* + * Initialize a chunk's allocation data and allocation-increment. + */ + extern void HTChunkInit(HTChunk *ch, int grow); + +/* + * + * Create new chunk + * + * ON ENTRY, + * + * growby The number of bytes to allocate at a time when the chunk + * is later extended. Arbitrary but normally a trade-off + * of time vs memory. + * + * ON EXIT, + * + * returns A chunk pointer to the new chunk, + * + */ + + extern HTChunk *HTChunkCreate(int growby); + +/* + * Create a chunk for which an allocation error is not a fatal application + * error if failok != 0, but merely resets the chunk. When using a chunk + * created this way, the caller should always check whether the contents + * are ok each time after data have been appended. + * The create call may also fail and will reurn NULL in that case. - kw + */ + extern HTChunk *HTChunkCreateMayFail(int growby, int failok); + +/* + * Like HTChunkCreate but with initial allocation - kw + * + */ + extern HTChunk *HTChunkCreate2(int growby, size_t needed); + +/* + * + * Free a chunk + * + * ON ENTRY, + * + * ch A valid chunk pointer made by HTChunkCreate() + * + * ON EXIT, + * + * ch is invalid and may not be used. + * + */ + + extern void HTChunkFree(HTChunk *ch); + +/* + * + * Clear a chunk + * + * ON ENTRY, + * + * ch A valid chunk pointer made by HTChunkCreate() + * + * ON EXIT, + * + * *ch The size of the chunk is zero. + * + */ + + extern void HTChunkClear(HTChunk *ch); + +/* + * + * Realloc a chunk + * + * ON ENTRY, + * + * ch A valid chunk pointer made by HTChunkCreate() + * + * growby growby + * + * ON EXIT, + * + * *ch Expanded by growby + * + */ + + extern BOOL HTChunkRealloc(HTChunk *ch, int growby); + +/* + * + * Ensure a chunk has a certain space in + * + * ON ENTRY, + * + * ch A valid chunk pointer made by HTChunkCreate() + * + * s The size required + * + * ON EXIT, + * + * *ch Has size at least s + * + */ + + extern void HTChunkEnsure(HTChunk *ch, int s); + +/* + * + * Append a character to a chunk + * + * ON ENTRY, + * + * ch A valid chunk pointer made by HTChunkCreate() + * + * c The character to be appended + * + * ON EXIT, + * + * *ch Is one character bigger + * + */ + extern void HTChunkPutc(HTChunk *ch, unsigned c); + + extern void HTChunkPutb(HTChunk *ch, const char *b, int l); + + extern void HTChunkPutUtf8Char(HTChunk *ch, UCode_t code); + +/* + * Append a string to a chunk + * + * ON ENTRY, + * + * ch A valid chunk pointer made by HTChunkCreate() + * + * str Points to a zero-terminated string to be appended + * + * ON EXIT, + * + * *ch Is bigger by strlen(str) + * + */ + + extern void HTChunkPuts(HTChunk *ch, const char *str); + +/* + * + * Append a zero character to a chunk + * + */ + +/* + * + * ON ENTRY, + * + * ch A valid chunk pointer made by HTChunkCreate() + * + * ON EXIT, + * + * *ch Is one character bigger + * + */ + + extern void HTChunkTerminate(HTChunk *ch); + +/* like the above but no realloc: extend to another chunk if necessary */ +/* + * + * Append a character (string, data) to a chunk + * + * ON ENTRY, + * + * ch A valid chunk pointer made by HTChunkCreate() + * + * c The character to be appended + * + * ON EXIT, + * + * returns original chunk or a pointer to the new chunk + * (orginal chunk is referenced to the new one + * by the field 'next') + * + */ + extern HTChunk *HTChunkPutc2(HTChunk *ch, int c); + extern HTChunk *HTChunkPuts2(HTChunk *ch, const char *str); + extern HTChunk *HTChunkPutb2(HTChunk *ch, const char *b, int l); + +/* New pool infrastructure: UNlike the above, store data using alignment */ + extern HTChunk *HTChunkPutb0(HTChunk *ch, const char *b, int l); + +#ifdef __cplusplus +} +#endif +#endif /* HTCHUNK_H */ diff --git a/WWW/Library/Implementation/HTDOS.c b/WWW/Library/Implementation/HTDOS.c new file mode 100644 index 00000000..582a7626 --- /dev/null +++ b/WWW/Library/Implementation/HTDOS.c @@ -0,0 +1,241 @@ +/* + * $LynxId: HTDOS.c,v 1.39 2011/06/04 18:45:09 tom Exp $ + * DOS specific routines + */ + +#include <HTUtils.h> +#include <LYUtils.h> +#include <HTDOS.h> +#include <LYStrings.h> + +#include <LYLeaks.h> + +#ifdef _WINDOWS +#include <LYGlobalDefs.h> +#include <HTAlert.h> +#endif + +/* + * Make a copy of the source argument in the result, allowing some extra + * space so we can append directly onto the result without reallocating. + */ +static char *copy_plus(char **result, const char *source) +{ + int length = (int) strlen(source); + int extra = 10; + int n; + + for (n = 0; n < length; ++n) { + if (source[n] == ' ') + ++extra; + } + + HTSprintf0(result, "%-*s", length + extra, source); + (*result)[length] = 0; + return (*result); +} + +/* PUBLIC HTDOS_wwwName() + * CONVERTS DOS Name into WWW Name + * ON ENTRY: + * dosname DOS file specification (NO NODE) + * + * ON EXIT: + * returns WWW file specification + * + */ +const char *HTDOS_wwwName(const char *dosname) +{ + static char *wwwname = NULL; + char *cp_url = copy_plus(&wwwname, dosname); + int wwwname_len; + char ch; + + while ((ch = *dosname) != '\0') { + switch (ch) { + case '\\': + /* convert dos backslash to unix-style */ + *cp_url++ = '/'; + break; + case ' ': + *cp_url++ = '%'; + *cp_url++ = '2'; + *cp_url++ = '0'; + break; + default: + *cp_url++ = ch; + break; + } + dosname++; + } + *cp_url = '\0'; + + wwwname_len = (int) strlen(wwwname); + if (wwwname_len > 1) + cp_url--; /* point last char */ + + if (wwwname_len > 3 && *cp_url == '/') { + cp_url++; + *cp_url = '\0'; + } + return (wwwname); +} + +/* + * Convert slashes from Unix to DOS + */ +char *HTDOS_slashes(char *path) +{ + char *s; + + for (s = path; *s != '\0'; ++s) { + if (*s == '/') { + *s = '\\'; + } + } + return path; +} + +/* PUBLIC HTDOS_name() + * CONVERTS WWW name into a DOS name + * ON ENTRY: + * wwwname WWW file name + * + * ON EXIT: + * returns DOS file specification + */ +char *HTDOS_name(const char *wwwname) +{ + static char *result = NULL; + int joe; + +#if defined(SH_EX) /* 2000/03/07 (Tue) 18:32:42 */ + if (unsafe_filename(wwwname)) { + HTUserMsg2("unsafe filename : %s", wwwname); + copy_plus(&result, "BAD_LOCAL_FILE_NAME"); + } else { + copy_plus(&result, wwwname); + } +#else + copy_plus(&result, wwwname); +#endif +#ifdef __DJGPP__ + if (result[0] == '/' + && result[1] == 'd' + && result[2] == 'e' + && result[3] == 'v' + && result[4] == '/' + && isalpha(result[5])) { + return (result); + } +#endif /* __DJGPP__ */ + + (void) HTDOS_slashes(result); + + /* pesky leading slash, rudiment from file://localhost/ */ + /* the rest of path may be with or without drive letter */ + if ((result[1] != '\\') && (result[0] == '\\')) { + for (joe = 0; (result[joe] = result[joe + 1]) != 0; joe++) ; + } + /* convert '|' after the drive letter to ':' */ + if (isalpha(UCH(result[0])) && result[1] == '|') { + result[1] = ':'; + } +#ifdef _WINDOWS /* 1998/04/02 (Thu) 08:59:48 */ + if (LYLastPathSep(result) != NULL + && !LYIsDosDrive(result)) { + char temp_buff[LY_MAXPATH]; + + sprintf(temp_buff, "%.3s\\%.*s", windows_drive, + (int) (sizeof(temp_buff) - 5), result); + StrAllocCopy(result, temp_buff); + } +#endif + /* + * If we have only a device, add a trailing slash. Otherwise it just + * refers to the current directory on the given device. + */ + if (LYLastPathSep(result) == NULL + && LYIsDosDrive(result)) + LYAddPathSep0(result); + + CTRACE((tfp, "HTDOS_name changed `%s' to `%s'\n", wwwname, result)); + return (result); +} + +#ifdef WIN_EX +char *HTDOS_short_name(const char *path) +{ + static char sbuf[LY_MAXPATH]; + char *ret; + DWORD r; + + if (strchr(path, '/')) + path = HTDOS_name(path); + r = GetShortPathName(path, sbuf, sizeof sbuf); + if (r >= sizeof(sbuf) || r == 0) { + ret = LYStrNCpy(sbuf, path, sizeof(sbuf)); + } else { + ret = sbuf; + } + return ret; +} +#endif + +#if defined(DJGPP) +/* + * Poll tcp/ip lib and yield to DPMI-host while nothing in + * keyboard buffer (head = tail) (simpler than kbhit). + * This is required to be able to finish off dead sockets, + * answer pings etc. + */ +#include <pc.h> +#include <dpmi.h> +#include <libc/farptrgs.h> +#include <go32.h> + +void djgpp_idle_loop(void) +{ + while (_farpeekw(_dos_ds, 0x41a) == _farpeekw(_dos_ds, 0x41c)) { + tcp_tick(NULL); + __dpmi_yield(); +#if defined(USE_SLANG) + if (SLang_input_pending(1)) + break; +#endif + } +} + +/* PUBLIC getxkey() + * Replaces libc's getxkey() with polling of tcp/ip + * library (WatTcp or Watt-32). * + * ON EXIT: + * returns extended keypress. + */ + +/* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */ + +int getxkey(void) +{ +#if defined(DJGPP_KEYHANDLER) + __dpmi_regs r; + + djgpp_idle_loop(); + + r.h.ah = 0x10; + __dpmi_int(0x16, &r); + + if (r.h.al == 0x00) + return 0x0100 | r.h.ah; + if (r.h.al == 0xe0) + return 0x0200 | r.h.ah; + return r.h.al; + +#elif defined(USE_SLANG) + djgpp_idle_loop(); + return SLkp_getkey(); +#else + /* PDcurses uses myGetChar() in LYString.c */ +#endif +} +#endif /* DJGPP */ diff --git a/WWW/Library/Implementation/HTDOS.h b/WWW/Library/Implementation/HTDOS.h new file mode 100644 index 00000000..e1613cb0 --- /dev/null +++ b/WWW/Library/Implementation/HTDOS.h @@ -0,0 +1,56 @@ +/* + * $LynxId: HTDOS.h,v 1.14 2009/09/09 00:16:06 tom Exp $ + * + * DOS specific routines + */ + +#ifndef HTDOS_H +#define HTDOS_H + +#ifndef HTUTILS_H +#include <HTUtils.h> +#endif /* HTUTILS_H */ + +/* PUBLIC HTDOS_wwwName() + * CONVERTS DOS Name into WWW Name + * ON ENTRY: + * dosname DOS file specification (NO NODE) + * + * ON EXIT: + * returns WWW file specification + * + */ +const char *HTDOS_wwwName(const char *dosname); + +/* + * Converts Unix slashes to DOS + */ +char *HTDOS_slashes(char *path); + +/* PUBLIC HTDOS_name() + * CONVERTS WWW name into a DOS name + * ON ENTRY: + * wwwname WWW file name + * + * ON EXIT: + * returns DOS file specification + * + * Bug: Returns pointer to static -- non-reentrant + */ +char *HTDOS_name(const char *wwwname); + +#ifdef WIN_EX +char *HTDOS_short_name(const char *fn); + +#else +#define HTDOS_short_name(fn) fn +#endif + +#ifdef DJGPP +/* + * Poll tcp/ip lib and yield to DPMI-host while nothing in + * keyboard buffer (head = tail) (simpler than kbhit). + */ +void djgpp_idle_loop(void); +#endif +#endif /* HTDOS_H */ diff --git a/WWW/Library/Implementation/HTFTP.c b/WWW/Library/Implementation/HTFTP.c new file mode 100644 index 00000000..480eacfd --- /dev/null +++ b/WWW/Library/Implementation/HTFTP.c @@ -0,0 +1,4224 @@ +/* + * $LynxId: HTFTP.c,v 1.101 2012/02/09 12:34:48 tom Exp $ + * + * File Transfer Protocol (FTP) Client + * for a WorldWideWeb browser + * =================================== + * + * A cache of control connections is kept. + * + * Note: Port allocation + * + * It is essential that the port is allocated by the system, rather + * than chosen in rotation by us (POLL_PORTS), or the following + * problem occurs. + * + * It seems that an attempt by the server to connect to a port which has + * been used recently by a listen on the same socket, or by another + * socket this or another process causes a hangup of (almost exactly) + * one minute. Therefore, we have to use a rotating port number. + * The problem remains that if the application is run twice in quick + * succession, it will hang for what remains of a minute. + * + * Authors + * TBL Tim Berners-lee <timbl@info.cern.ch> + * DD Denis DeLaRoca 310 825-4580 <CSP1DWD@mvs.oac.ucla.edu> + * LM Lou Montulli <montulli@ukanaix.cc.ukans.edu> + * FM Foteos Macrides <macrides@sci.wfeb.edu> + * History: + * 2 May 91 Written TBL, as a part of the WorldWideWeb project. + * 15 Jan 92 Bug fix: close() was used for NETCLOSE for control soc + * 10 Feb 92 Retry if cached connection times out or breaks + * 8 Dec 92 Bug fix 921208 TBL after DD + * 17 Dec 92 Anon FTP password now just WWWuser@ suggested by DD + * fails on princeton.edu! + * 27 Dec 93 (FM) Fixed up so FTP now works with VMS hosts. Path + * must be Unix-style and cannot include the device + * or top directory. + * ?? ??? ?? (LM) Added code to prompt and send passwords for non + * anonymous FTP + * 25 Mar 94 (LM) Added code to recognize different ftp server types + * and code to parse dates and sizes on most hosts. + * 27 Mar 93 (FM) Added code for getting dates and sizes on VMS hosts. + * + * Notes: + * Portions Copyright 1994 Trustees of Dartmouth College + * Code for recognizing different FTP servers and + * parsing "ls -l" output taken from Macintosh Fetch + * program with permission from Jim Matthews, + * Dartmouth Software Development Team. + */ + +/* + * BUGS: @@@ Limit connection cache size! + * Error reporting to user. + * 400 & 500 errors are ack'ed by user with windows. + * Use configuration file for user names + * + * Note for portability this version does not use select() and + * so does not watch the control and data channels at the + * same time. + */ + +#include <HTUtils.h> + +#include <HTAlert.h> + +#include <HTFTP.h> /* Implemented here */ +#include <HTTCP.h> +#include <HTTP.h> +#include <HTFont.h> + +#define REPEAT_PORT /* Give the port number for each file */ +#define REPEAT_LISTEN /* Close each listen socket and open a new one */ + +/* define POLL_PORTS If allocation does not work, poll ourselves.*/ +#define LISTEN_BACKLOG 2 /* Number of pending connect requests (TCP) */ + +#define FIRST_TCP_PORT 1024 /* Region to try for a listening port */ +#define LAST_TCP_PORT 5999 + +#define LINE_LENGTH 256 + +#include <HTParse.h> +#include <HTAnchor.h> +#include <HTFile.h> /* For HTFileFormat() */ +#include <HTBTree.h> +#include <HTChunk.h> +#ifndef IPPORT_FTP +#define IPPORT_FTP 21 +#endif /* !IPORT_FTP */ + +#include <LYUtils.h> +#include <LYGlobalDefs.h> +#include <LYStrings.h> +#include <LYLeaks.h> + +typedef struct _connection { + struct _connection *next; /* Link on list */ + unsigned long addr; /* IP address */ + int socket; /* Socket number for communication */ + BOOL binary; /* Binary mode? */ +} connection; + +/* Hypertext object building machinery +*/ +#include <HTML.h> + +/* + * socklen_t is the standard, but there are many pre-standard variants. + * This ifdef works around a few of those cases. + * + * Information was obtained from header files on these platforms: + * AIX 4.3.2, 5.1 + * HPUX 10.20, 11.00, 11.11 + * IRIX64 6.5 + * Tru64 4.0G, 4.0D, 5.1 + */ +#if defined(SYS_IRIX64) + /* IRIX64 6.5 socket.h may use socklen_t if SGI_SOURCE is not defined */ +# if _NO_XOPEN4 && _NO_XOPEN5 +# define LY_SOCKLEN socklen_t +# elif _ABIAPI +# define LY_SOCKLEN int +# elif _XOPEN5 +# if (_MIPS_SIM != _ABIO32) +# define LY_SOCKLEN socklen_t +# else +# define LY_SOCKLEN int +# endif +# else +# define LY_SOCKLEN size_t +# endif +#elif defined(SYS_HPUX) +# if defined(_XOPEN_SOURCE_EXTENDED) && defined(SO_PROTOTYPE) +# define LY_SOCKLEN socklen_t +# else /* HPUX 10.20, etc. */ +# define LY_SOCKLEN int +# endif +#elif defined(SYS_TRU64) +# if defined(_POSIX_PII_SOCKET) +# define LY_SOCKLEN socklen_t +# elif defined(_XOPEN_SOURCE_EXTENDED) +# define LY_SOCKLEN size_t +# else +# define LY_SOCKLEN int +# endif +#else +# define LY_SOCKLEN socklen_t +#endif + +#define PUTC(c) (*target->isa->put_character) (target, c) +#define PUTS(s) (*target->isa->put_string) (target, s) +#define START(e) (*target->isa->start_element) (target, e, 0, 0, -1, 0) +#define END(e) (*target->isa->end_element) (target, e, 0) +#define FREE_TARGET (*target->isa->_free) (target) +#define ABORT_TARGET (*target->isa->_free) (target) + +struct _HTStructured { + const HTStructuredClass *isa; + /* ... */ +}; + +/* Global Variables + * --------------------- + */ +int HTfileSortMethod = FILE_BY_NAME; + +#ifndef DISABLE_FTP /*This disables everything to end-of-file */ +static char ThisYear[8]; +static char LastYear[8]; +static int TheDate; +static BOOLEAN HaveYears = FALSE; + +/* Module-Wide Variables + * --------------------- + */ +static connection *connections = NULL; /* Linked list of connections */ +static char response_text[LINE_LENGTH + 1]; /* Last response from ftp host */ +static connection *control = NULL; /* Current connection */ +static int data_soc = -1; /* Socket for data transfer =invalid */ +static char *user_entered_password = NULL; +static char *last_username_and_host = NULL; + +/* + * Some ftp servers are known to have a broken implementation of RETR. If + * asked to retrieve a directory, they get confused and fail subsequent + * commands such as CWD and LIST. + */ +static int Broken_RETR = FALSE; + +/* + * Some ftp servers are known to have a broken implementation of EPSV. The + * server will hang for a long time when we attempt to connect after issuing + * this command. + */ +#ifdef INET6 +static int Broken_EPSV = FALSE; +#endif + +typedef enum { + GENERIC_SERVER + ,MACHTEN_SERVER + ,UNIX_SERVER + ,VMS_SERVER + ,CMS_SERVER + ,DCTS_SERVER + ,TCPC_SERVER + ,PETER_LEWIS_SERVER + ,NCSA_SERVER + ,WINDOWS_NT_SERVER + ,WINDOWS_2K_SERVER + ,MS_WINDOWS_SERVER + ,MSDOS_SERVER + ,APPLESHARE_SERVER + ,NETPRESENZ_SERVER + ,DLS_SERVER +} eServerType; + +static eServerType server_type = GENERIC_SERVER; /* the type of ftp host */ +static int unsure_type = FALSE; /* sure about the type? */ +static BOOLEAN use_list = FALSE; /* use the LIST command? */ + +static int interrupted_in_next_data_char = FALSE; + +#ifdef POLL_PORTS +static PortNumber port_number = FIRST_TCP_PORT; +#endif /* POLL_PORTS */ + +static BOOL have_socket = FALSE; /* true if master_socket is valid */ +static unsigned master_socket; /* Listening socket = invalid */ + +static char port_command[255]; /* Command for setting the port */ +static fd_set open_sockets; /* Mask of active channels */ +static unsigned num_sockets; /* Number of sockets to scan */ +static PortNumber passive_port; /* Port server specified for data */ + +#define NEXT_CHAR HTGetCharacter() /* Use function in HTFormat.c */ + +#define DATA_BUFFER_SIZE 2048 +static char data_buffer[DATA_BUFFER_SIZE]; /* Input data buffer */ +static char *data_read_pointer; +static char *data_write_pointer; + +#define NEXT_DATA_CHAR next_data_char() +static int close_connection(connection * con); + +#ifdef LY_FIND_LEAKS +/* + * This function frees module globals. - FM + */ +static void free_FTPGlobals(void) +{ + FREE(user_entered_password); + FREE(last_username_and_host); + if (control) { + if (control->socket != -1) + close_connection(control); + FREE(control); + } +} +#endif /* LY_FIND_LEAKS */ + +/* PUBLIC HTVMS_name() + * CONVERTS WWW name into a VMS name + * ON ENTRY: + * nn Node Name (optional) + * fn WWW file name + * + * ON EXIT: + * returns vms file specification + * + * Bug: Returns pointer to static -- non-reentrant + */ +char *HTVMS_name(const char *nn, + const char *fn) +{ + /* We try converting the filename into Files-11 syntax. That is, we assume + * first that the file is, like us, on a VMS node. We try remote (or + * local) DECnet access. Files-11, VMS, VAX and DECnet are trademarks of + * Digital Equipment Corporation. The node is assumed to be local if the + * hostname WITHOUT DOMAIN matches the local one. @@@ + */ + static char *vmsname; + char *filename = (char *) malloc(strlen(fn) + 1); + char *nodename = (char *) malloc(strlen(nn) + 2 + 1); /* Copies to hack */ + char *second; /* 2nd slash */ + char *last; /* last slash */ + + const char *hostname = HTHostName(); + + if (!filename || !nodename) + outofmem(__FILE__, "HTVMSname"); + + assert(filename != NULL); + assert(nodename != NULL); + + strcpy(filename, fn); + strcpy(nodename, ""); /* On same node? Yes if node names match */ + if (StrNCmp(nn, "localhost", 9)) { + const char *p; + const char *q; + + for (p = hostname, q = nn; + *p && *p != '.' && *q && *q != '.'; p++, q++) { + if (TOUPPER(*p) != TOUPPER(*q)) { + char *r; + + strcpy(nodename, nn); + r = strchr(nodename, '.'); /* Mismatch */ + if (r) + *r = '\0'; /* Chop domain */ + strcat(nodename, "::"); /* Try decnet anyway */ + break; + } + } + } + + second = strchr(filename + 1, '/'); /* 2nd slash */ + last = strrchr(filename, '/'); /* last slash */ + + if (!second) { /* Only one slash */ + HTSprintf0(&vmsname, "%s%s", nodename, filename + 1); + } else if (second == last) { /* Exactly two slashes */ + *second = '\0'; /* Split filename from disk */ + HTSprintf0(&vmsname, "%s%s:%s", nodename, filename + 1, second + 1); + *second = '/'; /* restore */ + } else { /* More than two slashes */ + char *p; + + *second = '\0'; /* Split disk from directories */ + *last = '\0'; /* Split dir from filename */ + HTSprintf0(&vmsname, "%s%s:[%s]%s", + nodename, filename + 1, second + 1, last + 1); + *second = *last = '/'; /* restore filename */ + for (p = strchr(vmsname, '['); *p != ']'; p++) + if (*p == '/') + *p = '.'; /* Convert dir sep. to dots */ + } + FREE(nodename); + FREE(filename); + return vmsname; +} + +/* Procedure: Read a character from the data connection + * ---------------------------------------------------- + */ +static int next_data_char(void) +{ + int status; + + if (data_read_pointer >= data_write_pointer) { + status = NETREAD(data_soc, data_buffer, DATA_BUFFER_SIZE); + if (status == HT_INTERRUPTED) + interrupted_in_next_data_char = 1; + if (status <= 0) + return EOF; + data_write_pointer = data_buffer + status; + data_read_pointer = data_buffer; + } +#ifdef NOT_ASCII + { + char c = *data_read_pointer++; + + return FROMASCII(c); + } +#else + return UCH(*data_read_pointer++); +#endif /* NOT_ASCII */ +} + +/* Close an individual connection + * + */ +static int close_connection(connection * con) +{ + connection *scan; + int status; + + CTRACE((tfp, "HTFTP: Closing control socket %d\n", con->socket)); + status = NETCLOSE(con->socket); + if (TRACE && status != 0) { +#ifdef UNIX + CTRACE((tfp, "HTFTP:close_connection: %s", LYStrerror(errno))); +#else + if (con->socket != INVSOC) + HTInetStatus("HTFTP:close_connection"); +#endif + } + con->socket = -1; + if (connections == con) { + connections = con->next; + return status; + } + for (scan = connections; scan; scan = scan->next) { + if (scan->next == con) { + scan->next = con->next; /* Unlink */ + if (control == con) + control = (connection *) 0; + return status; + } /*if */ + } /* for */ + return -1; /* very strange -- was not on list. */ +} + +static char *help_message_buffer = NULL; /* global :( */ + +static void init_help_message_cache(void) +{ + FREE(help_message_buffer); +} + +static void help_message_cache_add(char *string) +{ + if (help_message_buffer) + StrAllocCat(help_message_buffer, string); + else + StrAllocCopy(help_message_buffer, string); + + CTRACE((tfp, "Adding message to help cache: %s\n", string)); +} + +static char *help_message_cache_non_empty(void) +{ + return (help_message_buffer); +} + +static char *help_message_cache_contents(void) +{ + return (help_message_buffer); +} + +/* Send One Command + * ---------------- + * + * This function checks whether we have a control connection, and sends + * one command if given. + * + * On entry, + * control points to the connection which is established. + * cmd points to a command, or is zero to just get the response. + * + * The command should already be terminated with the CRLF pair. + * + * On exit, + * returns: 1 for success, + * or negative for communication failure (in which case + * the control connection will be closed). + */ +static int write_cmd(const char *cmd) +{ + int status; + + if (!control) { + CTRACE((tfp, "HTFTP: No control connection set up!!\n")); + return HT_NO_CONNECTION; + } + + if (cmd) { + CTRACE((tfp, " Tx: %s", cmd)); +#ifdef NOT_ASCII + { + char *p; + + for (p = cmd; *p; p++) { + *p = TOASCII(*p); + } + } +#endif /* NOT_ASCII */ + status = (int) NETWRITE(control->socket, cmd, (unsigned) strlen(cmd)); + if (status < 0) { + CTRACE((tfp, + "HTFTP: Error %d sending command: closing socket %d\n", + status, control->socket)); + close_connection(control); + return status; + } + } + return 1; +} + +/* + * For each string in the list, check if it is found in the response text. + * If so, return TRUE. + */ +static BOOL find_response(HTList *list) +{ + BOOL result = FALSE; + HTList *p = list; + char *value; + + while ((value = (char *) HTList_nextObject(p)) != NULL) { + if (LYstrstr(response_text, value)) { + result = TRUE; + break; + } + } + return result; +} + +/* Execute Command and get Response + * -------------------------------- + * + * See the state machine illustrated in RFC959, p57. This implements + * one command/reply sequence. It also interprets lines which are to + * be continued, which are marked with a "-" immediately after the + * status code. + * + * Continuation then goes on until a line with a matching reply code + * an a space after it. + * + * On entry, + * control points to the connection which is established. + * cmd points to a command, or is zero to just get the response. + * + * The command must already be terminated with the CRLF pair. + * + * On exit, + * returns: The first digit of the reply type, + * or negative for communication failure. + */ +static int response(const char *cmd) +{ + int result; /* Three-digit decimal code */ + int continuation_response = -1; + int status; + + if ((status = write_cmd(cmd)) < 0) + return status; + + do { + char *p = response_text; + + for (;;) { + int ich = NEXT_CHAR; + + if (((*p++ = (char) ich) == LF) + || (p == &response_text[LINE_LENGTH])) { + + char continuation; + + if (interrupted_in_htgetcharacter) { + CTRACE((tfp, + "HTFTP: Interrupted in HTGetCharacter, apparently.\n")); + NETCLOSE(control->socket); + control->socket = -1; + return HT_INTERRUPTED; + } + + *p = '\0'; /* Terminate the string */ + CTRACE((tfp, " Rx: %s", response_text)); + + /* Check for login or help messages */ + if (!StrNCmp(response_text, "230-", 4) || + !StrNCmp(response_text, "250-", 4) || + !StrNCmp(response_text, "220-", 4)) + help_message_cache_add(response_text + 4); + + sscanf(response_text, "%d%c", &result, &continuation); + if (continuation_response == -1) { + if (continuation == '-') /* start continuation */ + continuation_response = result; + } else { /* continuing */ + if (continuation_response == result && + continuation == ' ') + continuation_response = -1; /* ended */ + } + if (result == 220 && find_response(broken_ftp_retr)) { + Broken_RETR = TRUE; + CTRACE((tfp, "This server is broken (RETR)\n")); + } +#ifdef INET6 + if (result == 220 && find_response(broken_ftp_epsv)) { + Broken_EPSV = TRUE; + CTRACE((tfp, "This server is broken (EPSV)\n")); + } +#endif + break; + } + /* if end of line */ + if (interrupted_in_htgetcharacter) { + CTRACE((tfp, + "HTFTP: Interrupted in HTGetCharacter, apparently.\n")); + NETCLOSE(control->socket); + control->socket = -1; + return HT_INTERRUPTED; + } + + if (ich == EOF) { + CTRACE((tfp, "Error on rx: closing socket %d\n", + control->socket)); + strcpy(response_text, "000 *** TCP read error on response\n"); + close_connection(control); + return -1; /* End of file on response */ + } + } /* Loop over characters */ + + } while (continuation_response != -1); + + if (result == 421) { + CTRACE((tfp, "HTFTP: They close so we close socket %d\n", + control->socket)); + close_connection(control); + return -1; + } + if ((result == 255 && server_type == CMS_SERVER) && + (0 == strncasecomp(cmd, "CWD", 3) || + 0 == strcasecomp(cmd, "CDUP"))) { + /* + * Alas, CMS returns 255 on failure to CWD to parent of root. - PG + */ + result = 555; + } + return result / 100; +} + +static int send_cmd_1(const char *verb) +{ + char command[80]; + + sprintf(command, "%.*s%c%c", (int) sizeof(command) - 4, verb, CR, LF); + return response(command); +} + +static int send_cmd_2(const char *verb, const char *param) +{ + char *command = 0; + int status; + + HTSprintf0(&command, "%s %s%c%c", verb, param, CR, LF); + status = response(command); + FREE(command); + + return status; +} + +#define send_cwd(path) send_cmd_2("CWD", path) + +/* + * This function should try to set the macintosh server into binary mode. Some + * servers need an additional letter after the MACB command. + */ +static int set_mac_binary(eServerType ServerType) +{ + /* try to set mac binary mode */ + if (ServerType == APPLESHARE_SERVER || + ServerType == NETPRESENZ_SERVER) { + /* + * Presumably E means "Enable". - KW + */ + return (2 == response("MACB E\r\n")); + } else { + return (2 == response("MACB\r\n")); + } +} + +/* This function gets the current working directory to help + * determine what kind of host it is + */ + +static void get_ftp_pwd(eServerType *ServerType, BOOLEAN *UseList) +{ + char *cp; + + /* get the working directory (to see what it looks like) */ + int status = response("PWD\r\n"); + + if (status < 0) { + return; + } else { + cp = strchr(response_text + 5, '"'); + if (cp) + *cp = '\0'; + if (*ServerType == TCPC_SERVER) { + *ServerType = ((response_text[5] == '/') ? + NCSA_SERVER : TCPC_SERVER); + CTRACE((tfp, "HTFTP: Treating as %s server.\n", + ((*ServerType == NCSA_SERVER) ? + "NCSA" : "TCPC"))); + } else if (response_text[5] == '/') { + /* path names beginning with / imply Unix, + * right? + */ + if (set_mac_binary(*ServerType)) { + *ServerType = NCSA_SERVER; + CTRACE((tfp, "HTFTP: Treating as NCSA server.\n")); + } else { + *ServerType = UNIX_SERVER; + *UseList = TRUE; + CTRACE((tfp, "HTFTP: Treating as Unix server.\n")); + } + return; + } else if (response_text[strlen(response_text) - 1] == ']') { + /* path names ending with ] imply VMS, right? */ + *ServerType = VMS_SERVER; + *UseList = TRUE; + CTRACE((tfp, "HTFTP: Treating as VMS server.\n")); + } else { + *ServerType = GENERIC_SERVER; + CTRACE((tfp, "HTFTP: Treating as Generic server.\n")); + } + + if ((*ServerType == NCSA_SERVER) || + (*ServerType == TCPC_SERVER) || + (*ServerType == PETER_LEWIS_SERVER) || + (*ServerType == NETPRESENZ_SERVER)) + set_mac_binary(*ServerType); + } +} + +/* This function turns MSDOS-like directory output off for + * Windows NT servers. + */ + +static void set_unix_dirstyle(eServerType *ServerType, BOOLEAN *UseList) +{ + char *cp; + + /* This is a toggle. It seems we have to toggle in order to see + * the current state (after toggling), so we may end up toggling + * twice. - kw + */ + int status = response("SITE DIRSTYLE\r\n"); + + if (status != 2) { + *ServerType = GENERIC_SERVER; + CTRACE((tfp, "HTFTP: DIRSTYLE failed, treating as Generic server.\n")); + return; + } else { + *UseList = TRUE; + /* Expecting one of: + * 200 MSDOS-like directory output is off + * 200 MSDOS-like directory output is on + * The following code doesn't look for the full exact string - + * who knows how the wording may change in some future version. + * If the first response isn't recognized, we toggle again + * anyway, under the assumption that it's more likely that + * the MSDOS setting was "off" originally. - kw + */ + cp = strstr(response_text + 4, "MSDOS"); + if (cp && strstr(cp, " off")) { + return; /* already off now. */ + } else { + response("SITE DIRSTYLE\r\n"); + } + } +} + +/* Get a valid connection to the host + * ---------------------------------- + * + * On entry, + * arg points to the name of the host in a hypertext address + * On exit, + * returns <0 if error + * socket number if success + * + * This routine takes care of managing timed-out connections, and + * limiting the number of connections in use at any one time. + * + * It ensures that all connections are logged in if they exist. + * It ensures they have the port number transferred. + */ +static int get_connection(const char *arg, + HTParentAnchor *anchor) +{ + int status; + char *command = 0; + connection *con; + char *username = NULL; + char *password = NULL; + static BOOLEAN firstuse = TRUE; + + if (firstuse) { + /* + * Set up freeing at exit. - FM + */ +#ifdef LY_FIND_LEAKS + atexit(free_FTPGlobals); +#endif + firstuse = FALSE; + } + + if (control) { + /* + * Reuse this object - KW, DW & FM + */ + if (control->socket != -1) { + NETCLOSE(control->socket); + } + con = control; + con->addr = 0; + con->binary = FALSE; + } else { + /* + * Allocate and init control struct. + */ + con = typecalloc(connection); + if (con == NULL) + outofmem(__FILE__, "get_connection"); + + assert(con != NULL); + } + con->socket = -1; + + if (!arg) + return -1; /* Bad if no name specified */ + if (!*arg) + return -1; /* Bad if name had zero length */ + +/* Get node name: +*/ + CTRACE((tfp, "get_connection(%s)\n", arg)); + { + char *p1 = HTParse(arg, "", PARSE_HOST); + char *p2 = strrchr(p1, '@'); /* user? */ + char *pw = NULL; + + if (p2 != NULL) { + username = p1; + *p2 = '\0'; /* terminate */ + p1 = p2 + 1; /* point to host */ + pw = strchr(username, ':'); + if (pw != NULL) { + *pw++ = '\0'; + password = HTUnEscape(pw); + } + if (*username) + HTUnEscape(username); + + /* + * If the password doesn't exist then we are going to have to ask + * the user for it. The only problem is that we don't want to ask + * for it every time, so we will store away in a primitive fashion. + */ + if (!password) { + char *tmp = NULL; + + HTSprintf0(&tmp, "%s@%s", username, p1); + /* + * If the user@host is not equal to the last time through or + * user_entered_password has no data then we need to ask the + * user for the password. + */ + if (!last_username_and_host || + strcmp(tmp, last_username_and_host) || + !user_entered_password) { + + StrAllocCopy(last_username_and_host, tmp); + HTSprintf0(&tmp, gettext("Enter password for user %s@%s:"), + username, p1); + FREE(user_entered_password); + user_entered_password = HTPromptPassword(tmp); + + } /* else we already know the password */ + password = user_entered_password; + FREE(tmp); + } + } + + if (!username) + FREE(p1); + } /* scope of p1 */ + + status = HTDoConnect(arg, "FTP", IPPORT_FTP, (int *) &con->socket); + + if (status < 0) { + if (status == HT_INTERRUPTED) { + CTRACE((tfp, "HTFTP: Interrupted on connect\n")); + } else { + CTRACE((tfp, "HTFTP: Unable to connect to remote host for `%s'.\n", + arg)); + } + if (status == HT_INTERRUPTED) { + _HTProgress(CONNECTION_INTERRUPTED); + status = HT_NOT_LOADED; + } else { + HTAlert(gettext("Unable to connect to FTP host.")); + } + if (con->socket != -1) { + NETCLOSE(con->socket); + } + + FREE(username); + if (control == con) + control = NULL; + FREE(con); + return status; /* Bad return */ + } + + CTRACE((tfp, "FTP connected, socket %d control %p\n", + con->socket, (void *) con)); + control = con; /* Current control connection */ + + /* Initialise buffering for control connection */ + HTInitInput(control->socket); + init_help_message_cache(); /* Clear the login message buffer. */ + +/* Now we log in Look up username, prompt for pw. +*/ + status = response((char *) 0); /* Get greeting */ + + if (status == HT_INTERRUPTED) { + CTRACE((tfp, "HTFTP: Interrupted at beginning of login.\n")); + _HTProgress(CONNECTION_INTERRUPTED); + NETCLOSE(control->socket); + control->socket = -1; + return HT_INTERRUPTED; + } + server_type = GENERIC_SERVER; /* reset */ + if (status == 2) { /* Send username */ + char *cp; /* look at greeting text */ + + /* don't gettext() this -- incoming text: */ + if (strlen(response_text) > 4) { + if ((cp = strstr(response_text, " awaits your command")) || + (cp = strstr(response_text, " ready."))) { + *cp = '\0'; + } + cp = response_text + 4; + if (!strncasecomp(cp, "NetPresenz", 10)) + server_type = NETPRESENZ_SERVER; + } else { + cp = response_text; + } + StrAllocCopy(anchor->server, cp); + + status = send_cmd_2("USER", (username && *username) + ? username + : "anonymous"); + + if (status == HT_INTERRUPTED) { + CTRACE((tfp, "HTFTP: Interrupted while sending username.\n")); + _HTProgress(CONNECTION_INTERRUPTED); + NETCLOSE(control->socket); + control->socket = -1; + return HT_INTERRUPTED; + } + } + if (status == 3) { /* Send password */ + if (password) { + /* + * We have non-zero length password, so send it. - FM + */ + HTSprintf0(&command, "PASS %s%c%c", password, CR, LF); + } else { + /* + * Create and send a mail address as the password. - FM + */ + const char *the_address; + char *user = NULL; + const char *host = NULL; + char *cp; + + the_address = anonftp_password; + if (isEmpty(the_address)) + the_address = personal_mail_address; + if (isEmpty(the_address)) + the_address = LYGetEnv("USER"); + if (isEmpty(the_address)) + the_address = "WWWuser"; + + StrAllocCopy(user, the_address); + if ((cp = strchr(user, '@')) != NULL) { + *cp++ = '\0'; + if (*cp == '\0') + host = HTHostName(); + else + host = cp; + } else { + host = HTHostName(); + } + + /* + * If host is not fully qualified, suppress it + * as ftp.uu.net prefers a blank to a bad name + */ + if (!(host) || strchr(host, '.') == NULL) + host = ""; + + HTSprintf0(&command, "PASS %s@%s%c%c", user, host, CR, LF); + FREE(user); + } + status = response(command); + FREE(command); + if (status == HT_INTERRUPTED) { + CTRACE((tfp, "HTFTP: Interrupted while sending password.\n")); + _HTProgress(CONNECTION_INTERRUPTED); + NETCLOSE(control->socket); + control->socket = -1; + return HT_INTERRUPTED; + } + } + FREE(username); + + if (status == 3) { + status = send_cmd_1("ACCT noaccount"); + if (status == HT_INTERRUPTED) { + CTRACE((tfp, "HTFTP: Interrupted while sending password.\n")); + _HTProgress(CONNECTION_INTERRUPTED); + NETCLOSE(control->socket); + control->socket = -1; + return HT_INTERRUPTED; + } + + } + if (status != 2) { + CTRACE((tfp, "HTFTP: Login fail: %s", response_text)); + /* if (control->socket > 0) close_connection(control->socket); */ + return -1; /* Bad return */ + } + CTRACE((tfp, "HTFTP: Logged in.\n")); + + /* Check for host type */ + if (server_type != NETPRESENZ_SERVER) + server_type = GENERIC_SERVER; /* reset */ + use_list = FALSE; /* reset */ + if (response("SYST\r\n") == 2) { + /* we got a line -- what kind of server are we talking to? */ + if (StrNCmp(response_text + 4, + "UNIX Type: L8 MAC-OS MachTen", 28) == 0) { + server_type = MACHTEN_SERVER; + use_list = TRUE; + CTRACE((tfp, "HTFTP: Treating as MachTen server.\n")); + + } else if (strstr(response_text + 4, "UNIX") != NULL || + strstr(response_text + 4, "Unix") != NULL) { + server_type = UNIX_SERVER; + unsure_type = FALSE; /* to the best of out knowledge... */ + use_list = TRUE; + CTRACE((tfp, "HTFTP: Treating as Unix server.\n")); + + } else if (strstr(response_text + 4, "MSDOS") != NULL) { + server_type = MSDOS_SERVER; + use_list = TRUE; + CTRACE((tfp, "HTFTP: Treating as MSDOS (Unix emulation) server.\n")); + + } else if (StrNCmp(response_text + 4, "VMS", 3) == 0) { + char *tilde = strstr(arg, "/~"); + + use_list = TRUE; + if (tilde != 0 + && tilde[2] != 0 + && strstr(response_text + 4, "MadGoat") != 0) { + server_type = UNIX_SERVER; + CTRACE((tfp, "HTFTP: Treating VMS as UNIX server.\n")); + } else { + server_type = VMS_SERVER; + CTRACE((tfp, "HTFTP: Treating as VMS server.\n")); + } + + } else if ((StrNCmp(response_text + 4, "VM/CMS", 6) == 0) || + (StrNCmp(response_text + 4, "VM ", 3) == 0)) { + server_type = CMS_SERVER; + use_list = TRUE; + CTRACE((tfp, "HTFTP: Treating as CMS server.\n")); + + } else if (StrNCmp(response_text + 4, "DCTS", 4) == 0) { + server_type = DCTS_SERVER; + CTRACE((tfp, "HTFTP: Treating as DCTS server.\n")); + + } else if (strstr(response_text + 4, "MAC-OS TCP/Connect II") != NULL) { + server_type = TCPC_SERVER; + CTRACE((tfp, "HTFTP: Looks like a TCPC server.\n")); + get_ftp_pwd(&server_type, &use_list); + unsure_type = TRUE; + + } else if (server_type == NETPRESENZ_SERVER) { /* already set above */ + use_list = TRUE; + set_mac_binary(server_type); + CTRACE((tfp, "HTFTP: Treating as NetPresenz (MACOS) server.\n")); + + } else if (StrNCmp(response_text + 4, "MACOS Peter's Server", 20) == 0) { + server_type = PETER_LEWIS_SERVER; + use_list = TRUE; + set_mac_binary(server_type); + CTRACE((tfp, "HTFTP: Treating as Peter Lewis (MACOS) server.\n")); + + } else if (StrNCmp(response_text + 4, "Windows_NT", 10) == 0) { + server_type = WINDOWS_NT_SERVER; + CTRACE((tfp, "HTFTP: Treating as Window_NT server.\n")); + set_unix_dirstyle(&server_type, &use_list); + + } else if (StrNCmp(response_text + 4, "Windows2000", 11) == 0) { + server_type = WINDOWS_2K_SERVER; + CTRACE((tfp, "HTFTP: Treating as Window_2K server.\n")); + set_unix_dirstyle(&server_type, &use_list); + + } else if (StrNCmp(response_text + 4, "MS Windows", 10) == 0) { + server_type = MS_WINDOWS_SERVER; + use_list = TRUE; + CTRACE((tfp, "HTFTP: Treating as MS Windows server.\n")); + + } else if (StrNCmp(response_text + 4, + "MACOS AppleShare IP FTP Server", 30) == 0) { + server_type = APPLESHARE_SERVER; + use_list = TRUE; + set_mac_binary(server_type); + CTRACE((tfp, "HTFTP: Treating as AppleShare server.\n")); + + } else { + server_type = GENERIC_SERVER; + CTRACE((tfp, "HTFTP: Ugh! A Generic server.\n")); + get_ftp_pwd(&server_type, &use_list); + unsure_type = TRUE; + } + } else { + /* SYST fails :( try to get the type from the PWD command */ + get_ftp_pwd(&server_type, &use_list); + } + +/* Now we inform the server of the port number we will listen on +*/ +#ifdef NOTREPEAT_PORT + { + int status = response(port_command); + + if (status != 2) { + if (control->socket) + close_connection(control->socket); + return -status; /* Bad return */ + } + CTRACE((tfp, "HTFTP: Port defined.\n")); + } +#endif /* NOTREPEAT_PORT */ + return con->socket; /* Good return */ +} + +static void reset_master_socket(void) +{ + have_socket = FALSE; +} + +static void set_master_socket(int value) +{ + have_socket = (BOOLEAN) (value >= 0); + if (have_socket) + master_socket = (unsigned) value; +} + +/* Close Master (listening) socket + * ------------------------------- + * + * + */ +static int close_master_socket(void) +{ + int status; + + if (have_socket) + FD_CLR(master_socket, &open_sockets); + + status = NETCLOSE((int) master_socket); + CTRACE((tfp, "HTFTP: Closed master socket %u\n", master_socket)); + + reset_master_socket(); + + if (status < 0) + return HTInetStatus(gettext("close master socket")); + else + return status; +} + +/* Open a master socket for listening on + * ------------------------------------- + * + * When data is transferred, we open a port, and wait for the server to + * connect with the data. + * + * On entry, + * have_socket Must be false, if master_socket is not setup already + * master_socket Must be negative if not set up already. + * On exit, + * Returns socket number if good + * less than zero if error. + * master_socket is socket number if good, else negative. + * port_number is valid if good. + */ +static int get_listen_socket(void) +{ +#ifdef INET6 + struct sockaddr_storage soc_address; /* Binary network address */ + struct sockaddr_in *soc_in = (struct sockaddr_in *) &soc_address; + int af; + LY_SOCKLEN slen; + +#else + struct sockaddr_in soc_address; /* Binary network address */ + struct sockaddr_in *soc_in = &soc_address; +#endif /* INET6 */ + int new_socket; /* Will be master_socket */ + + FD_ZERO(&open_sockets); /* Clear our record of open sockets */ + num_sockets = 0; + +#ifndef REPEAT_LISTEN + if (have_socket) + return master_socket; /* Done already */ +#endif /* !REPEAT_LISTEN */ + +#ifdef INET6 + /* query address family of control connection */ + slen = (LY_SOCKLEN) sizeof(soc_address); + if (getsockname(control->socket, (struct sockaddr *) &soc_address, + &slen) < 0) { + return HTInetStatus("getsockname failed"); + } + af = ((struct sockaddr *) &soc_address)->sa_family; + memset(&soc_address, 0, sizeof(soc_address)); +#endif /* INET6 */ + +/* Create internet socket +*/ +#ifdef INET6 + new_socket = socket(af, SOCK_STREAM, IPPROTO_TCP); +#else + new_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); +#endif /* INET6 */ + + if (new_socket < 0) + return HTInetStatus(gettext("socket for master socket")); + + CTRACE((tfp, "HTFTP: Opened master socket number %d\n", new_socket)); + +/* Search for a free port. +*/ +#ifdef INET6 + memset(&soc_address, 0, sizeof(soc_address)); + ((struct sockaddr *) &soc_address)->sa_family = af; + switch (af) { + case AF_INET: +#ifdef SIN6_LEN + ((struct sockaddr *) &soc_address)->sa_len = sizeof(struct sockaddr_in); +#endif /* SIN6_LEN */ + break; + case AF_INET6: +#ifdef SIN6_LEN + ((struct sockaddr *) &soc_address)->sa_len = sizeof(struct sockaddr_in6); +#endif /* SIN6_LEN */ + break; + default: + HTInetStatus("AF"); + } +#else + soc_in->sin_family = AF_INET; /* Family = internet, host order */ + soc_in->sin_addr.s_addr = INADDR_ANY; /* Any peer address */ +#endif /* INET6 */ +#ifdef POLL_PORTS + { + PortNumber old_port_number = port_number; + + for (port_number = (old_port_number + 1);; port_number++) { + int status; + + if (port_number > LAST_TCP_PORT) + port_number = FIRST_TCP_PORT; + if (port_number == old_port_number) { + return HTInetStatus("bind"); + } +#ifdef INET6 + soc_in->sin_port = htons(port_number); +#else + soc_address.sin_port = htons(port_number); +#endif /* INET6 */ +#ifdef SOCKS + if (socks_flag) + if ((status = Rbind(new_socket, + (struct sockaddr *) &soc_address, + /* Cast to generic sockaddr */ + SOCKADDR_LEN(soc_address) +#ifndef SHORTENED_RBIND + ,socks_bind_remoteAddr +#endif /* !SHORTENED_RBIND */ + )) == 0) { + break; + } else +#endif /* SOCKS */ + if ((status = bind(new_socket, + (struct sockaddr *) &soc_address, + /* Cast to generic sockaddr */ + SOCKADDR_LEN(soc_address) + )) == 0) { + break; + } + CTRACE((tfp, "TCP bind attempt to port %d yields %d, errno=%d\n", + port_number, status, SOCKET_ERRNO)); + } /* for */ + } +#else + { + int status; + LY_SOCKLEN address_length = (LY_SOCKLEN) sizeof(soc_address); + +#ifdef SOCKS + if (socks_flag) + status = Rgetsockname(control->socket, + (struct sockaddr *) &soc_address, + &address_length); + else +#endif /* SOCKS */ + status = getsockname(control->socket, + (struct sockaddr *) &soc_address, + &address_length); + if (status < 0) + return HTInetStatus("getsockname"); +#ifdef INET6 + CTRACE((tfp, "HTFTP: This host is %s\n", + HTInetString((void *) soc_in))); + + soc_in->sin_port = 0; /* Unspecified: please allocate */ +#else + CTRACE((tfp, "HTFTP: This host is %s\n", + HTInetString(soc_in))); + + soc_address.sin_port = 0; /* Unspecified: please allocate */ +#endif /* INET6 */ +#ifdef SOCKS + if (socks_flag) + status = Rbind(new_socket, + (struct sockaddr *) &soc_address, + /* Cast to generic sockaddr */ + sizeof(soc_address) +#ifndef SHORTENED_RBIND + ,socks_bind_remoteAddr +#endif /* !SHORTENED_RBIND */ + ); + else +#endif /* SOCKS */ + status = bind(new_socket, + (struct sockaddr *) &soc_address, + /* Cast to generic sockaddr */ + SOCKADDR_LEN(soc_address) + ); + if (status < 0) + return HTInetStatus("bind"); + + address_length = sizeof(soc_address); +#ifdef SOCKS + if (socks_flag) + status = Rgetsockname(new_socket, + (struct sockaddr *) &soc_address, + &address_length); + else +#endif /* SOCKS */ + status = getsockname(new_socket, + (struct sockaddr *) &soc_address, + &address_length); + if (status < 0) + return HTInetStatus("getsockname"); + } +#endif /* POLL_PORTS */ + +#ifdef INET6 + CTRACE((tfp, "HTFTP: bound to port %d on %s\n", + (int) ntohs(soc_in->sin_port), + HTInetString((void *) soc_in))); +#else + CTRACE((tfp, "HTFTP: bound to port %d on %s\n", + (int) ntohs(soc_in->sin_port), + HTInetString(soc_in))); +#endif /* INET6 */ + +#ifdef REPEAT_LISTEN + if (have_socket) + (void) close_master_socket(); +#endif /* REPEAT_LISTEN */ + + set_master_socket(new_socket); + +/* Now we must find out who we are to tell the other guy +*/ + (void) HTHostName(); /* Make address valid - doesn't work */ +#ifdef INET6 + switch (((struct sockaddr *) &soc_address)->sa_family) { + case AF_INET: +#endif /* INET6 */ + sprintf(port_command, "PORT %d,%d,%d,%d,%d,%d%c%c", + (int) *((unsigned char *) (&soc_in->sin_addr) + 0), + (int) *((unsigned char *) (&soc_in->sin_addr) + 1), + (int) *((unsigned char *) (&soc_in->sin_addr) + 2), + (int) *((unsigned char *) (&soc_in->sin_addr) + 3), + (int) *((unsigned char *) (&soc_in->sin_port) + 0), + (int) *((unsigned char *) (&soc_in->sin_port) + 1), + CR, LF); + +#ifdef INET6 + break; + + case AF_INET6: + { + char hostbuf[MAXHOSTNAMELEN]; + char portbuf[MAXHOSTNAMELEN]; + + getnameinfo((struct sockaddr *) &soc_address, + SOCKADDR_LEN(soc_address), + hostbuf, + (socklen_t) sizeof(hostbuf), + portbuf, + (socklen_t) sizeof(portbuf), + NI_NUMERICHOST | NI_NUMERICSERV); + sprintf(port_command, "EPRT |%d|%s|%s|%c%c", 2, hostbuf, portbuf, + CR, LF); + break; + } + default: + sprintf(port_command, "JUNK%c%c", CR, LF); + break; + } +#endif /* INET6 */ + + /* Inform TCP that we will accept connections + */ + { + int status; + +#ifdef SOCKS + if (socks_flag) + status = Rlisten((int) master_socket, 1); + else +#endif /* SOCKS */ + status = listen((int) master_socket, 1); + if (status < 0) { + reset_master_socket(); + return HTInetStatus("listen"); + } + } + CTRACE((tfp, "TCP: Master socket(), bind() and listen() all OK\n")); + FD_SET(master_socket, &open_sockets); + if ((master_socket + 1) > num_sockets) + num_sockets = master_socket + 1; + + return (int) master_socket; /* Good */ + +} /* get_listen_socket */ + +static const char *months[12] = +{ + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" +}; + +/* Procedure: Set the current and last year strings and date integer + * ----------------------------------------------------------------- + * + * Bug: + * This code is for sorting listings by date, if that option + * is selected in Lynx, and doesn't take into account time + * zones or ensure resetting at midnight, so the sort may not + * be perfect, but the actual date isn't changed in the display, + * i.e., the date is still correct. - FM + */ +static void set_years_and_date(void) +{ + char day[8], month[8], date[12]; + time_t NowTime; + int i; + + NowTime = time(NULL); + StrNCpy(day, (char *) ctime(&NowTime) + 8, 2); + day[2] = '\0'; + if (day[0] == ' ') { + day[0] = '0'; + } + StrNCpy(month, (char *) ctime(&NowTime) + 4, 3); + month[3] = '\0'; + for (i = 0; i < 12; i++) { + if (!strcasecomp(month, months[i])) { + break; + } + } + i++; + sprintf(date, "9999%02d%.2s", i, day); + TheDate = atoi(date); + strcpy(ThisYear, (char *) ctime(&NowTime) + 20); + ThisYear[4] = '\0'; + sprintf(LastYear, "%d", (atoi(ThisYear) - 1)); + HaveYears = TRUE; +} + +typedef struct _EntryInfo { + char *filename; + char *linkname; /* symbolic link, if any */ + char *type; + char *date; + unsigned long size; + BOOLEAN display; /* show this entry? */ +#ifdef LONG_LIST + unsigned long file_links; + char *file_mode; + char *file_user; + char *file_group; +#endif +} EntryInfo; + +static void free_entryinfo_struct_contents(EntryInfo *entry_info) +{ + if (entry_info) { + FREE(entry_info->filename); + FREE(entry_info->linkname); + FREE(entry_info->type); + FREE(entry_info->date); + } + /* dont free the struct */ +} + +/* + * is_ls_date() -- + * Return TRUE if s points to a string of the form: + * "Sep 1 1990 " or + * "Sep 11 11:59 " or + * "Dec 12 1989 " or + * "FCv 23 1990 " ... + */ +static BOOLEAN is_ls_date(char *s) +{ + /* must start with three alpha characters */ + if (!isalpha(UCH(*s++)) || !isalpha(UCH(*s++)) || !isalpha(UCH(*s++))) + return FALSE; + + /* space or HT_NON_BREAK_SPACE */ + if (!(*s == ' ' || *s == HT_NON_BREAK_SPACE)) { + return FALSE; + } + s++; + + /* space or digit */ + if (!(*s == ' ' || isdigit(UCH(*s)))) { + return FALSE; + } + s++; + + /* digit */ + if (!isdigit(UCH(*s++))) + return FALSE; + + /* space */ + if (*s++ != ' ') + return FALSE; + + /* space or digit */ + if (!(*s == ' ' || isdigit(UCH(*s)))) { + return FALSE; + } + s++; + + /* digit */ + if (!isdigit(UCH(*s++))) + return FALSE; + + /* colon or digit */ + if (!(*s == ':' || isdigit(UCH(*s)))) { + return FALSE; + } + s++; + + /* digit */ + if (!isdigit(UCH(*s++))) + return FALSE; + + /* space or digit */ + if (!(*s == ' ' || isdigit(UCH(*s)))) { + return FALSE; + } + s++; + + /* space */ + if (*s != ' ') + return FALSE; + + return TRUE; +} /* is_ls_date() */ + +/* + * Extract the name, size, and date from an EPLF line. - 08-06-96 DJB + */ +static void parse_eplf_line(char *line, + EntryInfo *info) +{ + char *cp = line; + char ct[26]; + unsigned long size; + time_t secs; + static time_t base; /* time() value on this OS in 1970 */ + static int flagbase = 0; + + if (!flagbase) { + struct tm t; + + t.tm_year = 70; + t.tm_mon = 0; + t.tm_mday = 0; + t.tm_hour = 0; + t.tm_min = 0; + t.tm_sec = 0; + t.tm_isdst = -1; + base = mktime(&t); /* could return -1 */ + flagbase = 1; + } + + while (*cp) { + switch (*cp) { + case '\t': + StrAllocCopy(info->filename, cp + 1); + return; + case 's': + size = 0; + while (*(++cp) && (*cp != ',')) + size = (size * 10) + (unsigned long) (*cp - '0'); + info->size = size; + break; + case 'm': + secs = 0; + while (*(++cp) && (*cp != ',')) + secs = (secs * 10) + (*cp - '0'); + secs += base; /* assumes that time_t is #seconds */ + strcpy(ct, ctime(&secs)); + ct[24] = 0; + StrAllocCopy(info->date, ct); + break; + case '/': + StrAllocCopy(info->type, ENTRY_IS_DIRECTORY); + /* FALLTHRU */ + default: + while (*cp) { + if (*cp++ == ',') + break; + } + break; + } + } +} /* parse_eplf_line */ + +/* + * Extract the name, size, and date from an ls -l line. + */ +static void parse_ls_line(char *line, + EntryInfo *entry) +{ +#ifdef LONG_LIST + char *next; + char *cp; +#endif + int i, j; + unsigned long base = 1; + unsigned long size_num = 0; + + for (i = (int) strlen(line) - 1; + (i > 13) && (!isspace(UCH(line[i])) || !is_ls_date(&line[i - 12])); + i--) { + ; /* null body */ + } + line[i] = '\0'; + if (i > 13) { + StrAllocCopy(entry->date, &line[i - 12]); + /* replace the 4th location with nbsp if it is a space or zero */ + if (entry->date[4] == ' ' || entry->date[4] == '0') + entry->date[4] = HT_NON_BREAK_SPACE; + /* make sure year or time is flush right */ + if (entry->date[11] == ' ') { + for (j = 11; j > 6; j--) { + entry->date[j] = entry->date[j - 1]; + } + } + } + j = i - 14; + while (isdigit(UCH(line[j]))) { + size_num += ((unsigned long) (line[j] - '0') * base); + base *= 10; + j--; + } + entry->size = size_num; + StrAllocCopy(entry->filename, &line[i + 1]); + +#ifdef LONG_LIST + line[j] = '\0'; + + /* + * Extract the file-permissions, as a string. + */ + if ((cp = strchr(line, ' ')) != 0) { + if ((cp - line) == 10) { + *cp = '\0'; + StrAllocCopy(entry->file_mode, line); + *cp = ' '; + } + + /* + * Next is the link-count. + */ + next = 0; + entry->file_links = (unsigned long) strtol(cp, &next, 10); + if (next == 0 || *next != ' ') { + entry->file_links = 0; + next = cp; + } else { + cp = next; + } + /* + * Next is the user-name. + */ + while (isspace(UCH(*cp))) + ++cp; + if ((next = strchr(cp, ' ')) != 0) + *next = '\0'; + if (*cp != '\0') + StrAllocCopy(entry->file_user, cp); + /* + * Next is the group-name (perhaps). + */ + if (next != NULL) { + cp = (next + 1); + while (isspace(UCH(*cp))) + ++cp; + if ((next = strchr(cp, ' ')) != 0) + *next = '\0'; + if (*cp != '\0') + StrAllocCopy(entry->file_group, cp); + } + } +#endif +} + +/* + * Extract the name and size info and whether it refers to a directory from a + * LIST line in "dls" format. + */ +static void parse_dls_line(char *line, + EntryInfo *entry_info, + char **pspilledname) +{ + short j; + int base = 1; + int size_num = 0; + int len; + char *cps = NULL; + + /* README 763 Information about this server\0 + bin/ - \0 + etc/ = \0 + ls-lR 0 \0 + ls-lR.Z 3 \0 + pub/ = Public area\0 + usr/ - \0 + morgan 14 -> ../real/morgan\0 + TIMIT.mostlikely.Z\0 + 79215 \0 + */ + + len = (int) strlen(line); + if (len == 0) { + FREE(*pspilledname); + entry_info->display = FALSE; + return; + } + cps = LYSkipNonBlanks(line); + if (*cps == '\0') { /* only a filename, save it and return. */ + StrAllocCopy(*pspilledname, line); + entry_info->display = FALSE; + return; + } + if (len < 24 || line[23] != ' ' || + (isspace(UCH(line[0])) && !*pspilledname)) { + /* this isn't the expected "dls" format! */ + if (!isspace(UCH(line[0]))) + *cps = '\0'; + if (*pspilledname && !*line) { + entry_info->filename = *pspilledname; + *pspilledname = NULL; + if (entry_info->filename[strlen(entry_info->filename) - 1] == '/') + StrAllocCopy(entry_info->type, ENTRY_IS_DIRECTORY); + else + StrAllocCopy(entry_info->type, ""); + } else { + StrAllocCopy(entry_info->filename, line); + if (cps && cps != line && *(cps - 1) == '/') + StrAllocCopy(entry_info->type, ENTRY_IS_DIRECTORY); + else + StrAllocCopy(entry_info->type, ""); + FREE(*pspilledname); + } + return; + } + + j = 22; + if (line[j] == '=' || line[j] == '-') { + StrAllocCopy(entry_info->type, ENTRY_IS_DIRECTORY); + } else { + while (isdigit(UCH(line[j]))) { + size_num += (line[j] - '0') * base; + base *= 10; + j--; + } + } + entry_info->size = (unsigned long) size_num; + + cps = LYSkipBlanks(&line[23]); + if (!StrNCmp(cps, "-> ", 3) && cps[3] != '\0' && cps[3] != ' ') { + StrAllocCopy(entry_info->type, ENTRY_IS_SYMBOLIC_LINK); + StrAllocCopy(entry_info->linkname, LYSkipBlanks(cps + 3)); + entry_info->size = 0; /* don't display size */ + } + + if (j > 0) + line[j] = '\0'; + + LYTrimTrailing(line); + + len = (int) strlen(line); + if (len == 0 && *pspilledname && **pspilledname) { + line = *pspilledname; + len = (int) strlen(*pspilledname); + } + if (len > 0 && line[len - 1] == '/') { + /* + * It's a dir, remove / and mark it as such. + */ + if (len > 1) + line[len - 1] = '\0'; + if (!entry_info->type) + StrAllocCopy(entry_info->type, ENTRY_IS_DIRECTORY); + } + + StrAllocCopy(entry_info->filename, line); + FREE(*pspilledname); +} /* parse_dls_line() */ + +/* + * parse_vms_dir_entry() + * Format the name, date, and size from a VMS LIST line + * into the EntryInfo structure - FM + */ +static void parse_vms_dir_entry(char *line, + EntryInfo *entry_info) +{ + int i, j; + unsigned int ialloc; + char *cp, *cpd, *cps, date[16]; + const char *sp = " "; + + /* Get rid of blank lines, and information lines. Valid lines have the ';' + * version number token. + */ + if (!strlen(line) || (cp = strchr(line, ';')) == NULL) { + entry_info->display = FALSE; + return; + } + + /* Cut out file or directory name at VMS version number. */ + *cp++ = '\0'; + StrAllocCopy(entry_info->filename, line); + + /* Cast VMS non-README file and directory names to lowercase. */ + if (strstr(entry_info->filename, "READ") == NULL) { + LYLowerCase(entry_info->filename); + i = (int) strlen(entry_info->filename); + } else { + i = (int) ((strstr(entry_info->filename, "READ") + - entry_info->filename) + + 4); + if (!StrNCmp(&entry_info->filename[i], "ME", 2)) { + i += 2; + while (entry_info->filename[i] && entry_info->filename[i] != '.') { + i++; + } + } else if (!StrNCmp(&entry_info->filename[i], ".ME", 3)) { + i = (int) strlen(entry_info->filename); + } else { + i = 0; + } + LYLowerCase(entry_info->filename + i); + } + + /* Uppercase terminal .z's or _z's. */ + if ((--i > 2) && + entry_info->filename[i] == 'z' && + (entry_info->filename[i - 1] == '.' || + entry_info->filename[i - 1] == '_')) + entry_info->filename[i] = 'Z'; + + /* Convert any tabs in rest of line to spaces. */ + cps = cp - 1; + while ((cps = strchr(cps + 1, '\t')) != NULL) + *cps = ' '; + + /* Collapse serial spaces. */ + i = 0; + j = 1; + cps = cp; + while (cps[j] != '\0') { + if (cps[i] == ' ' && cps[j] == ' ') + j++; + else + cps[++i] = cps[j++]; + } + cps[++i] = '\0'; + + /* Set the years and date, if we don't have them yet. * */ + if (!HaveYears) { + set_years_and_date(); + } + + /* Track down the date. */ + if ((cpd = strchr(cp, '-')) != NULL && + strlen(cpd) > 9 && isdigit(UCH(*(cpd - 1))) && + isalpha(UCH(*(cpd + 1))) && *(cpd + 4) == '-') { + + /* Month */ + *(cpd + 2) = (char) TOLOWER(*(cpd + 2)); + *(cpd + 3) = (char) TOLOWER(*(cpd + 3)); + sprintf(date, "%.3s ", cpd + 1); + + /* Day */ + if (isdigit(UCH(*(cpd - 2)))) + sprintf(date + 4, "%.2s ", cpd - 2); + else + sprintf(date + 4, "%c%.1s ", HT_NON_BREAK_SPACE, cpd - 1); + + /* Time or Year */ + if (!StrNCmp(ThisYear, cpd + 5, 4) && + strlen(cpd) > 15 && *(cpd + 12) == ':') { + sprintf(date + 7, "%.5s", cpd + 10); + } else { + sprintf(date + 7, " %.4s", cpd + 5); + } + + StrAllocCopy(entry_info->date, date); + } + + /* Track down the size */ + if ((cpd = strchr(cp, '/')) != NULL) { + /* Appears be in used/allocated format */ + cps = cpd; + while (isdigit(UCH(*(cps - 1)))) + cps--; + if (cps < cpd) + *cpd = '\0'; + entry_info->size = (unsigned long) atol(cps); + cps = cpd + 1; + while (isdigit(UCH(*cps))) + cps++; + *cps = '\0'; + ialloc = (unsigned) atoi(cpd + 1); + /* Check if used is in blocks or bytes */ + if (entry_info->size <= ialloc) + entry_info->size *= 512; + + } else if (strtok(cp, sp) != NULL) { + /* We just initialized on the version number */ + /* Now let's hunt for a lone, size number */ + while ((cps = strtok(NULL, sp)) != NULL) { + cpd = cps; + while (isdigit(UCH(*cpd))) + cpd++; + if (*cpd == '\0') { + /* Assume it's blocks */ + entry_info->size = ((unsigned long) atol(cps) * 512); + break; + } + } + } + + /* Wrap it up */ + CTRACE((tfp, "HTFTP: VMS filename: %s date: %s size: %lu\n", + entry_info->filename, + NonNull(entry_info->date), + entry_info->size)); + return; +} /* parse_vms_dir_entry() */ + +/* + * parse_ms_windows_dir_entry() -- + * Format the name, date, and size from an MS_WINDOWS LIST line into + * the EntryInfo structure (assumes Chameleon NEWT format). - FM + */ +static void parse_ms_windows_dir_entry(char *line, + EntryInfo *entry_info) +{ + char *cp = line; + char *cps, *cpd, date[16]; + char *end = line + strlen(line); + + /* Get rid of blank or junk lines. */ + cp = LYSkipBlanks(cp); + if (!(*cp)) { + entry_info->display = FALSE; + return; + } + + /* Cut out file or directory name. */ + cps = LYSkipNonBlanks(cp); + *cps++ = '\0'; + cpd = cps; + StrAllocCopy(entry_info->filename, cp); + + /* Track down the size */ + if (cps < end) { + cps = LYSkipBlanks(cps); + cpd = LYSkipNonBlanks(cps); + *cpd++ = '\0'; + if (isdigit(UCH(*cps))) { + entry_info->size = (unsigned long) atol(cps); + } else { + StrAllocCopy(entry_info->type, ENTRY_IS_DIRECTORY); + } + } else { + StrAllocCopy(entry_info->type, ""); + } + + /* Set the years and date, if we don't have them yet. * */ + if (!HaveYears) { + set_years_and_date(); + } + + /* Track down the date. */ + if (cpd < end) { + cpd = LYSkipBlanks(cpd); + if (strlen(cpd) > 17) { + *(cpd + 6) = '\0'; /* Month and Day */ + *(cpd + 11) = '\0'; /* Year */ + *(cpd + 17) = '\0'; /* Time */ + if (strcmp(ThisYear, cpd + 7)) + /* Not this year, so show the year */ + sprintf(date, "%.6s %.4s", cpd, (cpd + 7)); + else + /* Is this year, so show the time */ + sprintf(date, "%.6s %.5s", cpd, (cpd + 12)); + StrAllocCopy(entry_info->date, date); + if (entry_info->date[4] == ' ' || entry_info->date[4] == '0') { + entry_info->date[4] = HT_NON_BREAK_SPACE; + } + } + } + + /* Wrap it up */ + CTRACE((tfp, "HTFTP: MS Windows filename: %s date: %s size: %lu\n", + entry_info->filename, + NonNull(entry_info->date), + entry_info->size)); + return; +} /* parse_ms_windows_dir_entry */ + +/* + * parse_windows_nt_dir_entry() -- + * Format the name, date, and size from a WINDOWS_NT LIST line into + * the EntryInfo structure (assumes Chameleon NEWT format). - FM + */ +#ifdef NOTDEFINED +static void parse_windows_nt_dir_entry(char *line, + EntryInfo *entry_info) +{ + char *cp = line; + char *cps, *cpd, date[16]; + char *end = line + strlen(line); + int i; + + /* Get rid of blank or junk lines. */ + cp = LYSkipBlanks(cp); + if (!(*cp)) { + entry_info->display = FALSE; + return; + } + + /* Cut out file or directory name. */ + cpd = cp; + cps = LYSkipNonBlanks(end - 1); + cp = (cps + 1); + if (!strcmp(cp, ".") || !strcmp(cp, "..")) { + entry_info->display = FALSE; + return; + } + StrAllocCopy(entry_info->filename, cp); + if (cps < cpd) + return; + *cp = '\0'; + end = cp; + + /* Set the years and date, if we don't have them yet. * */ + if (!HaveYears) { + set_years_and_date(); + } + + /* Cut out the date. */ + cp = cps = cpd; + cps = LYSkipNonBlanks(cps); + *cps++ = '\0'; + if (cps > end) { + entry_info->display = FALSE; + return; + } + cps = LYSkipBlanks(cps); + cpd = LYSkipNonBlanks(cps); + *cps++ = '\0'; + if (cps > end || cpd == cps || strlen(cpd) < 7) { + entry_info->display = FALSE; + return; + } + if (strlen(cp) == 8 && + isdigit(*cp) && isdigit(*(cp + 1)) && *(cp + 2) == '-' && + isdigit(*(cp + 3)) && isdigit(*(cp + 4)) && *(cp + 5) == '-') { + *(cp + 2) = '\0'; /* Month */ + i = atoi(cp) - 1; + *(cp + 5) = '\0'; /* Day */ + sprintf(date, "%.3s %.2s", months[i], (cp + 3)); + if (date[4] == '0') + date[4] = ' '; + cp += 6; /* Year */ + if (strcmp((ThisYear + 2), cp)) { + /* Not this year, so show the year */ + if (atoi(cp) < 70) { + sprintf(&date[6], " 20%.2s", cp); + } else { + sprintf(&date[6], " 19%.2s", cp); + } + } else { + /* Is this year, so show the time */ + *(cpd + 2) = '\0'; /* Hour */ + i = atoi(cpd); + if (*(cpd + 5) == 'P' || *(cpd + 5) == 'p') + i += 12; + sprintf(&date[6], " %02d:%.2s", i, (cpd + 3)); + } + StrAllocCopy(entry_info->date, date); + if (entry_info->date[4] == ' ' || entry_info->date[4] == '0') { + entry_info->date[4] = HT_NON_BREAK_SPACE; + } + } + + /* Track down the size */ + if (cps < end) { + cps = LYSkipBlanks(cps); + cpd = LYSkipNonBlanks(cps); + *cpd = '\0'; + if (isdigit(*cps)) { + entry_info->size = atol(cps); + } else { + StrAllocCopy(entry_info->type, ENTRY_IS_DIRECTORY); + } + } else { + StrAllocCopy(entry_info->type, ""); + } + + /* Wrap it up */ + CTRACE((tfp, "HTFTP: Windows NT filename: %s date: %s size: %d\n", + entry_info->filename, + NonNull(entry_info->date), + entry_info->size)); + return; +} /* parse_windows_nt_dir_entry */ +#endif /* NOTDEFINED */ + +/* + * parse_cms_dir_entry() -- + * Format the name, date, and size from a VM/CMS line into + * the EntryInfo structure. - FM + */ +static void parse_cms_dir_entry(char *line, + EntryInfo *entry_info) +{ + char *cp = line; + char *cps, *cpd, date[16]; + char *end = line + strlen(line); + int RecordLength = 0; + int Records = 0; + int i; + + /* Get rid of blank or junk lines. */ + cp = LYSkipBlanks(cp); + if (!(*cp)) { + entry_info->display = FALSE; + return; + } + + /* Cut out file or directory name. */ + cps = LYSkipNonBlanks(cp); + *cps++ = '\0'; + StrAllocCopy(entry_info->filename, cp); + if (strchr(entry_info->filename, '.') != NULL) + /* If we already have a dot, we did an NLST. */ + return; + cp = LYSkipBlanks(cps); + if (!(*cp)) { + /* If we don't have more, we've misparsed. */ + FREE(entry_info->filename); + FREE(entry_info->type); + entry_info->display = FALSE; + return; + } + cps = LYSkipNonBlanks(cp); + *cps++ = '\0'; + if ((0 == strcasecomp(cp, "DIR")) && (cp - line) > 17) { + /* It's an SFS directory. */ + StrAllocCopy(entry_info->type, ENTRY_IS_DIRECTORY); + entry_info->size = 0; + } else { + /* It's a file. */ + cp--; + *cp = '.'; + StrAllocCat(entry_info->filename, cp); + + /* Track down the VM/CMS RECFM or type. */ + cp = cps; + if (cp < end) { + cp = LYSkipBlanks(cp); + cps = LYSkipNonBlanks(cp); + *cps++ = '\0'; + /* Check cp here, if it's relevant someday. */ + } + } + + /* Track down the record length or dash. */ + cp = cps; + if (cp < end) { + cp = LYSkipBlanks(cp); + cps = LYSkipNonBlanks(cp); + *cps++ = '\0'; + if (isdigit(UCH(*cp))) { + RecordLength = atoi(cp); + } + } + + /* Track down the number of records or the dash. */ + cp = cps; + if (cps < end) { + cp = LYSkipBlanks(cp); + cps = LYSkipNonBlanks(cp); + *cps++ = '\0'; + if (isdigit(UCH(*cp))) { + Records = atoi(cp); + } + if (Records > 0 && RecordLength > 0) { + /* Compute an approximate size. */ + entry_info->size = ((unsigned long) Records * (unsigned long) RecordLength); + } + } + + /* Set the years and date, if we don't have them yet. */ + if (!HaveYears) { + set_years_and_date(); + } + + /* Track down the date. */ + cpd = cps; + if (((cps < end) && + (cps = strchr(cpd, ':')) != NULL) && + (cps < (end - 3) && + isdigit(UCH(*(cps + 1))) && isdigit(UCH(*(cps + 2))) && *(cps + 3) == ':')) { + cps += 3; + *cps = '\0'; + if ((cps - cpd) >= 14) { + cpd = (cps - 14); + *(cpd + 2) = '\0'; /* Month */ + *(cpd + 5) = '\0'; /* Day */ + *(cpd + 8) = '\0'; /* Year */ + cps -= 5; /* Time */ + if (*cpd == ' ') + *cpd = '0'; + i = atoi(cpd) - 1; + sprintf(date, "%.3s %.2s", months[i], (cpd + 3)); + if (date[4] == '0') + date[4] = ' '; + cpd += 6; /* Year */ + if (strcmp((ThisYear + 2), cpd)) { + /* Not this year, so show the year. */ + if (atoi(cpd) < 70) { + sprintf(&date[6], " 20%.2s", cpd); + } else { + sprintf(&date[6], " 19%.2s", cpd); + } + } else { + /* Is this year, so show the time. */ + *(cps + 2) = '\0'; /* Hour */ + i = atoi(cps); + sprintf(&date[6], " %02d:%.2s", i, (cps + 3)); + } + StrAllocCopy(entry_info->date, date); + if (entry_info->date[4] == ' ' || entry_info->date[4] == '0') { + entry_info->date[4] = HT_NON_BREAK_SPACE; + } + } + } + + /* Wrap it up. */ + CTRACE((tfp, "HTFTP: VM/CMS filename: %s date: %s size: %lu\n", + entry_info->filename, + NonNull(entry_info->date), + entry_info->size)); + return; +} /* parse_cms_dir_entry */ + +/* + * Given a line of LIST/NLST output in entry, return results and a file/dir + * name in entry_info struct + * + * If first is true, this is the first name in a directory. + */ +static EntryInfo *parse_dir_entry(char *entry, + BOOLEAN *first, + char **pspilledname) +{ + EntryInfo *entry_info; + int i; + int len; + BOOLEAN remove_size = FALSE; + char *cp; + + entry_info = typecalloc(EntryInfo); + + if (entry_info == NULL) + outofmem(__FILE__, "parse_dir_entry"); + + assert(entry_info != NULL); + + entry_info->display = TRUE; + + switch (server_type) { + case DLS_SERVER: + + /* + * Interpret and edit LIST output from a Unix server in "dls" format. + * This one must have claimed to be Unix in order to get here; if the + * first line looks fishy, we revert to Unix and hope that fits better + * (this recovery is untested). - kw + */ + + if (*first) { + len = (int) strlen(entry); + if (!len || entry[0] == ' ' || + (len >= 24 && entry[23] != ' ') || + (len < 24 && strchr(entry, ' '))) { + server_type = UNIX_SERVER; + CTRACE((tfp, + "HTFTP: Falling back to treating as Unix server.\n")); + } else { + *first = FALSE; + } + } + + if (server_type == DLS_SERVER) { + /* if still unchanged... */ + parse_dls_line(entry, entry_info, pspilledname); + + if (!entry_info->filename || *entry_info->filename == '\0') { + entry_info->display = FALSE; + return (entry_info); + } + if (!strcmp(entry_info->filename, "..") || + !strcmp(entry_info->filename, ".")) + entry_info->display = FALSE; + if (entry_info->type && *entry_info->type == '\0') { + FREE(entry_info->type); + return (entry_info); + } + /* + * Goto the bottom and get real type. + */ + break; + } + /* fall through if server_type changed for *first == TRUE ! */ + case UNIX_SERVER: + case PETER_LEWIS_SERVER: + case MACHTEN_SERVER: + case MSDOS_SERVER: + case WINDOWS_NT_SERVER: + case WINDOWS_2K_SERVER: + case APPLESHARE_SERVER: + case NETPRESENZ_SERVER: + /* + * Check for EPLF output (local times). + */ + if (*entry == '+') { + parse_eplf_line(entry, entry_info); + break; + } + + /* + * Interpret and edit LIST output from Unix server. + */ + len = (int) strlen(entry); + if (*first) { + /* don't gettext() this -- incoming text: */ + if (!strcmp(entry, "can not access directory .")) { + /* + * Don't reset *first, nothing real will follow. - KW + */ + entry_info->display = FALSE; + return (entry_info); + } + *first = FALSE; + if (!StrNCmp(entry, "total ", 6) || + strstr(entry, "not available") != NULL) { + entry_info->display = FALSE; + return (entry_info); + } else if (unsure_type) { + /* this isn't really a unix server! */ + server_type = GENERIC_SERVER; + entry_info->display = FALSE; + return (entry_info); + } + } + + /* + * Check first character of ls -l output. + */ + if (TOUPPER(entry[0]) == 'D') { + /* + * It's a directory. + */ + StrAllocCopy(entry_info->type, ENTRY_IS_DIRECTORY); + remove_size = TRUE; /* size is not useful */ + } else if (entry[0] == 'l') { + /* + * It's a symbolic link, does the user care about knowing if it is + * symbolic? I think so since it might be a directory. + */ + StrAllocCopy(entry_info->type, ENTRY_IS_SYMBOLIC_LINK); + remove_size = TRUE; /* size is not useful */ + + /* + * Strip off " -> pathname". + */ + for (i = len - 1; (i > 3) && + (!isspace(UCH(entry[i])) || + (entry[i - 1] != '>') || + (entry[i - 2] != '-') || + (entry[i - 3] != ' ')); i--) ; /* null body */ + if (i > 3) { + entry[i - 3] = '\0'; + StrAllocCopy(entry_info->linkname, LYSkipBlanks(entry + i)); + } + } + /* link */ + parse_ls_line(entry, entry_info); + + if (!strcmp(entry_info->filename, "..") || + !strcmp(entry_info->filename, ".")) + entry_info->display = FALSE; + /* + * Goto the bottom and get real type. + */ + break; + + case VMS_SERVER: + /* + * Interpret and edit LIST output from VMS server and convert + * information lines to zero length. + */ + parse_vms_dir_entry(entry, entry_info); + + /* + * Get rid of any junk lines. + */ + if (!entry_info->display) + return (entry_info); + + /* + * Trim off VMS directory extensions. + */ + len = (int) strlen(entry_info->filename); + if ((len > 4) && !strcmp(&entry_info->filename[len - 4], ".dir")) { + entry_info->filename[len - 4] = '\0'; + StrAllocCopy(entry_info->type, ENTRY_IS_DIRECTORY); + remove_size = TRUE; /* size is not useful */ + } + /* + * Goto the bottom and get real type. + */ + break; + + case MS_WINDOWS_SERVER: + /* + * Interpret and edit LIST output from MS_WINDOWS server and convert + * information lines to zero length. + */ + parse_ms_windows_dir_entry(entry, entry_info); + + /* + * Get rid of any junk lines. + */ + if (!entry_info->display) + return (entry_info); + if (entry_info->type && *entry_info->type == '\0') { + FREE(entry_info->type); + return (entry_info); + } + /* + * Goto the bottom and get real type. + */ + break; + +#ifdef NOTDEFINED + case WINDOWS_NT_SERVER: + /* + * Interpret and edit LIST output from MS_WINDOWS server and convert + * information lines to zero length. + */ + parse_windows_nt_dir_entry(entry, entry_info); + + /* + * Get rid of any junk lines. + */ + if (!entry_info->display) + return (entry_info); + if (entry_info->type && *entry_info->type == '\0') { + FREE(entry_info->type); + return (entry_info); + } + /* + * Goto the bottom and get real type. + */ + break; +#endif /* NOTDEFINED */ + + case CMS_SERVER: + { + /* + * Interpret and edit LIST output from VM/CMS server and convert + * any information lines to zero length. + */ + parse_cms_dir_entry(entry, entry_info); + + /* + * Get rid of any junk lines. + */ + if (!entry_info->display) + return (entry_info); + if (entry_info->type && *entry_info->type == '\0') { + FREE(entry_info->type); + return (entry_info); + } + /* + * Goto the bottom and get real type. + */ + break; + } + + case NCSA_SERVER: + case TCPC_SERVER: + /* + * Directories identified by trailing "/" characters. + */ + StrAllocCopy(entry_info->filename, entry); + len = (int) strlen(entry); + if (entry[len - 1] == '/') { + /* + * It's a dir, remove / and mark it as such. + */ + entry[len - 1] = '\0'; + StrAllocCopy(entry_info->type, ENTRY_IS_DIRECTORY); + remove_size = TRUE; /* size is not useful */ + } + /* + * Goto the bottom and get real type. + */ + break; + + default: + /* + * We can't tell if it is a directory since we only did an NLST :( List + * bad file types anyways? NOT! + */ + StrAllocCopy(entry_info->filename, entry); + return (entry_info); /* mostly empty info */ + + } /* switch (server_type) */ + +#ifdef LONG_LIST + (void) remove_size; +#else + if (remove_size && entry_info->size) { + entry_info->size = 0; + } +#endif + + if (entry_info->filename && strlen(entry_info->filename) > 3) { + if (((cp = strrchr(entry_info->filename, '.')) != NULL && + 0 == strncasecomp(cp, ".me", 3)) && + (cp[3] == '\0' || cp[3] == ';')) { + /* + * Don't treat this as application/x-Troff-me if it's a Unix server + * but has the string "read.me", or if it's not a Unix server. - + * FM + */ + if ((server_type != UNIX_SERVER) || + (cp > (entry_info->filename + 3) && + 0 == strncasecomp((cp - 4), "read.me", 7))) { + StrAllocCopy(entry_info->type, "text/plain"); + } + } + } + + /* + * Get real types eventually. + */ + if (!entry_info->type) { + const char *cp2; + HTFormat format; + HTAtom *encoding; /* @@ not used at all */ + + format = HTFileFormat(entry_info->filename, &encoding, &cp2); + + if (cp2 == NULL) { + if (!StrNCmp(HTAtom_name(format), "application", 11)) { + cp2 = HTAtom_name(format) + 12; + if (!StrNCmp(cp2, "x-", 2)) + cp2 += 2; + } else { + cp2 = HTAtom_name(format); + } + } + + StrAllocCopy(entry_info->type, cp2); + } + + return (entry_info); +} + +static int compare_EntryInfo_structs(EntryInfo *entry1, EntryInfo *entry2) +{ + int i, status; + char date1[16], date2[16], time1[8], time2[8], month[4]; + + switch (HTfileSortMethod) { + case FILE_BY_SIZE: + /* both equal or both 0 */ + if (entry1->size == entry2->size) + return (strcmp(entry1->filename, entry2->filename)); + else if (entry1->size > entry2->size) + return (1); + else + return (-1); + + case FILE_BY_TYPE: + if (entry1->type && entry2->type) { + status = strcasecomp(entry1->type, entry2->type); + if (status) + return (status); + /* else fall to filename comparison */ + } + return (strcmp(entry1->filename, entry2->filename)); + + case FILE_BY_DATE: + if (entry1->date && entry2->date) { + /* + * Make sure we have the correct length. - FM + */ + if (strlen(entry1->date) != 12 || strlen(entry2->date) != 12) { + return (strcmp(entry1->filename, entry2->filename)); + } + /* + * Set the years and date, + * if we don't have them yet. + */ + if (!HaveYears) { + set_years_and_date(); + } + /* + * Set up for sorting in reverse + * chronological order. - FM + */ + if (entry1->date[9] == ':') { + strcpy(date1, "9999"); + strcpy(time1, &entry1->date[7]); + if (time1[0] == ' ') { + time1[0] = '0'; + } + } else { + strcpy(date1, &entry1->date[8]); + strcpy(time1, "00:00"); + } + StrNCpy(month, entry1->date, 3); + month[3] = '\0'; + for (i = 0; i < 12; i++) { + if (!strcasecomp(month, months[i])) { + break; + } + } + i++; + sprintf(month, "%02d", i); + strcat(date1, month); + StrNCat(date1, &entry1->date[4], 2); + date1[8] = '\0'; + if (date1[6] == ' ' || date1[6] == HT_NON_BREAK_SPACE) { + date1[6] = '0'; + } + /* If no year given, assume last year if it would otherwise be in + * the future by more than one day. The one day tolerance is to + * account for a possible timezone difference. - kw + */ + if (date1[0] == '9' && atoi(date1) > TheDate + 1) { + for (i = 0; i < 4; i++) { + date1[i] = LastYear[i]; + } + } + strcat(date1, time1); + if (entry2->date[9] == ':') { + strcpy(date2, "9999"); + strcpy(time2, &entry2->date[7]); + if (time2[0] == ' ') { + time2[0] = '0'; + } + } else { + strcpy(date2, &entry2->date[8]); + strcpy(time2, "00:00"); + } + StrNCpy(month, entry2->date, 3); + month[3] = '\0'; + for (i = 0; i < 12; i++) { + if (!strcasecomp(month, months[i])) { + break; + } + } + i++; + sprintf(month, "%02d", i); + strcat(date2, month); + StrNCat(date2, &entry2->date[4], 2); + date2[8] = '\0'; + if (date2[6] == ' ' || date2[6] == HT_NON_BREAK_SPACE) { + date2[6] = '0'; + } + /* If no year given, assume last year if it would otherwise be in + * the future by more than one day. The one day tolerance is to + * account for a possible timezone difference. - kw + */ + if (date2[0] == '9' && atoi(date2) > TheDate + 1) { + for (i = 0; i < 4; i++) { + date2[i] = LastYear[i]; + } + } + strcat(date2, time2); + /* + * Do the comparison. - FM + */ + status = strcasecomp(date2, date1); + if (status) + return (status); + /* else fall to filename comparison */ + } + return (strcmp(entry1->filename, entry2->filename)); + + case FILE_BY_NAME: + default: + return (strcmp(entry1->filename, entry2->filename)); + } +} + +#ifdef LONG_LIST +static char *FormatStr(char **bufp, + char *start, + const char *value) +{ + char fmt[512]; + + if (*start) { + sprintf(fmt, "%%%.*ss", (int) sizeof(fmt) - 3, start); + HTSprintf(bufp, fmt, value); + } else if (*bufp && !(value && *value)) { + ; + } else if (value) { + StrAllocCat(*bufp, value); + } + return *bufp; +} + +static char *FormatNum(char **bufp, + char *start, + unsigned long value) +{ + char fmt[512]; + + if (*start) { + sprintf(fmt, "%%%.*sld", (int) sizeof(fmt) - 3, start); + HTSprintf(bufp, fmt, value); + } else { + sprintf(fmt, "%lu", value); + StrAllocCat(*bufp, fmt); + } + return *bufp; +} + +static void FlushParse(HTStructured * target, char **buf) +{ + if (*buf && **buf) { + PUTS(*buf); + **buf = '\0'; + } +} + +static void LYListFmtParse(const char *fmtstr, + EntryInfo *data, + HTStructured * target, + char *tail) +{ + char c; + char *s; + char *end; + char *start; + char *str = NULL; + char *buf = NULL; + BOOL is_directory = (BOOL) (data->file_mode != 0 && + (TOUPPER(data->file_mode[0]) == 'D')); + BOOL is_symlinked = (BOOL) (data->file_mode != 0 && + (TOUPPER(data->file_mode[0]) == 'L')); + BOOL remove_size = (BOOL) (is_directory || is_symlinked); + + StrAllocCopy(str, fmtstr); + s = str; + end = str + strlen(str); + while (*s) { + start = s; + while (*s) { + if (*s == '%') { + if (*(s + 1) == '%') /* literal % */ + s++; + else + break; + } + s++; + } + /* s is positioned either at a % or at \0 */ + *s = '\0'; + if (s > start) { /* some literal chars. */ + StrAllocCat(buf, start); + } + if (s == end) + break; + start = ++s; + while (isdigit(UCH(*s)) || *s == '.' || *s == '-' || *s == ' ' || + *s == '#' || *s == '+' || *s == '\'') + s++; + c = *s; /* the format char. or \0 */ + *s = '\0'; + + switch (c) { + case '\0': + StrAllocCat(buf, start); + continue; + + case 'A': + case 'a': /* anchor */ + FlushParse(target, &buf); + HTDirEntry(target, tail, data->filename); + FormatStr(&buf, start, data->filename); + PUTS(buf); + END(HTML_A); + *buf = '\0'; + if (c != 'A' && data->linkname != 0) { + PUTS(" -> "); + PUTS(data->linkname); + } + break; + + case 'T': /* MIME type */ + case 't': /* MIME type description */ + if (is_directory) { + if (c != 'T') { + FormatStr(&buf, start, ENTRY_IS_DIRECTORY); + } else { + FormatStr(&buf, start, ""); + } + } else if (is_symlinked) { + if (c != 'T') { + FormatStr(&buf, start, ENTRY_IS_SYMBOLIC_LINK); + } else { + FormatStr(&buf, start, ""); + } + } else { + const char *cp2; + HTFormat format; + + format = HTFileFormat(data->filename, NULL, &cp2); + + if (c != 'T') { + if (cp2 == NULL) { + if (!StrNCmp(HTAtom_name(format), + "application", 11)) { + cp2 = HTAtom_name(format) + 12; + if (!StrNCmp(cp2, "x-", 2)) + cp2 += 2; + } else { + cp2 = HTAtom_name(format); + } + } + FormatStr(&buf, start, cp2); + } else { + FormatStr(&buf, start, HTAtom_name(format)); + } + } + break; + + case 'd': /* date */ + if (data->date) { + FormatStr(&buf, start, data->date); + } else { + FormatStr(&buf, start, " * "); + } + break; + + case 's': /* size in bytes */ + FormatNum(&buf, start, data->size); + break; + + case 'K': /* size in Kilobytes but not for directories */ + if (remove_size) { + FormatStr(&buf, start, ""); + StrAllocCat(buf, " "); + break; + } + /* FALL THROUGH */ + case 'k': /* size in Kilobytes */ + /* FIXME - this is inconsistent with HTFile.c, but historical */ + if (data->size < 1024) { + FormatNum(&buf, start, data->size); + StrAllocCat(buf, " bytes"); + } else { + FormatNum(&buf, start, data->size / 1024); + StrAllocCat(buf, "Kb"); + } + break; + +#ifdef LONG_LIST + case 'p': /* unix-style permission bits */ + FormatStr(&buf, start, NonNull(data->file_mode)); + break; + + case 'o': /* owner */ + FormatStr(&buf, start, NonNull(data->file_user)); + break; + + case 'g': /* group */ + FormatStr(&buf, start, NonNull(data->file_group)); + break; + + case 'l': /* link count */ + FormatNum(&buf, start, data->file_links); + break; +#endif + + case '%': /* literal % with flags/width */ + FormatStr(&buf, start, "%"); + break; + + default: + fprintf(stderr, + "Unknown format character `%c' in list format\n", c); + break; + } + + s++; + } + if (buf) { + LYTrimTrailing(buf); + FlushParse(target, &buf); + FREE(buf); + } + PUTC('\n'); + FREE(str); +} +#endif /* LONG_LIST */ +/* Read a directory into an hypertext object from the data socket + * -------------------------------------------------------------- + * + * On entry, + * anchor Parent anchor to link the this node to + * address Address of the directory + * On exit, + * returns HT_LOADED if OK + * <0 if error. + */ +static int read_directory(HTParentAnchor *parent, + const char *address, + HTFormat format_out, + HTStream *sink) +{ + int status; + BOOLEAN WasInterrupted = FALSE; + HTStructured *target = HTML_new(parent, format_out, sink); + char *filename = HTParse(address, "", PARSE_PATH + PARSE_PUNCTUATION); + EntryInfo *entry_info; + BOOLEAN first = TRUE; + char *lastpath = NULL; /* prefix for link, either "" (for root) or xxx */ + BOOL tildeIsTop = FALSE; + +#ifndef LONG_LIST + char string_buffer[64]; +#endif + + _HTProgress(gettext("Receiving FTP directory.")); + + /* + * Force the current Date and Year (TheDate, ThisYear, and LastYear) to be + * recalculated for each directory request. Otherwise we have a problem + * with long-running sessions assuming the wrong date for today. - kw + */ + HaveYears = FALSE; + /* + * Check whether we always want the home directory treated as Welcome. - + * FM + */ + if (server_type == VMS_SERVER) + tildeIsTop = TRUE; + + /* + * This should always come back FALSE, since the flag is set only for local + * directory listings if LONG_LIST was defined on compilation, but we could + * someday set up an equivalent listing for Unix ftp servers. - FM + */ + (void) HTDirTitles(target, parent, format_out, tildeIsTop); + + data_read_pointer = data_write_pointer = data_buffer; + + if (*filename == '\0') { /* Empty filename: use root. */ + StrAllocCopy(lastpath, "/"); + } else if (!strcmp(filename, "/")) { /* Root path. */ + StrAllocCopy(lastpath, "/foo/.."); + } else { + char *p = strrchr(filename, '/'); /* Find the lastslash. */ + char *cp; + + if (server_type == CMS_SERVER) { + StrAllocCopy(lastpath, filename); /* Use absolute path for CMS. */ + } else { + StrAllocCopy(lastpath, p + 1); /* Take slash off the beginning. */ + } + if ((cp = strrchr(lastpath, ';')) != NULL) { /* Trim type= param. */ + if (!strncasecomp((cp + 1), "type=", 5)) { + if (TOUPPER(*(cp + 6)) == 'D' || + TOUPPER(*(cp + 6)) == 'A' || + TOUPPER(*(cp + 6)) == 'I') + *cp = '\0'; + } + } + } + FREE(filename); + + { + HTBTree *bt = HTBTree_new((HTComparer) compare_EntryInfo_structs); + int ic; + HTChunk *chunk = HTChunkCreate(128); + int BytesReceived = 0; + int BytesReported = 0; + char NumBytes[64]; + char *spilledname = NULL; + + PUTC('\n'); /* prettier LJM */ + for (ic = 0; ic != EOF;) { /* For each entry in the directory */ + HTChunkClear(chunk); + + if (HTCheckForInterrupt()) { + CTRACE((tfp, + "read_directory: interrupted after %d bytes\n", + BytesReceived)); + WasInterrupted = TRUE; + if (BytesReceived) { + goto unload_btree; /* unload btree */ + } else { + ABORT_TARGET; + HTBTreeAndObject_free(bt); + FREE(spilledname); + return HT_INTERRUPTED; + } + } + + /* read directory entry + */ + interrupted_in_next_data_char = FALSE; + for (;;) { /* Read in one line as filename */ + ic = NEXT_DATA_CHAR; + AgainForMultiNet: + if (interrupted_in_next_data_char) { + CTRACE((tfp, + "read_directory: interrupted_in_next_data_char after %d bytes\n", + BytesReceived)); + WasInterrupted = TRUE; + if (BytesReceived) { + goto unload_btree; /* unload btree */ + } else { + ABORT_TARGET; + HTBTreeAndObject_free(bt); + FREE(spilledname); + return HT_INTERRUPTED; + } + } else if ((char) ic == CR || (char) ic == LF) { /* Terminator? */ + if (chunk->size != 0) { /* got some text */ + /* Deal with MultiNet's wrapping of long lines */ + if (server_type == VMS_SERVER) { + /* Deal with MultiNet's wrapping of long lines - F.M. */ + if (data_read_pointer < data_write_pointer && + *(data_read_pointer + 1) == ' ') + data_read_pointer++; + else if (data_read_pointer >= data_write_pointer) { + status = NETREAD(data_soc, data_buffer, + DATA_BUFFER_SIZE); + if (status == HT_INTERRUPTED) { + interrupted_in_next_data_char = 1; + goto AgainForMultiNet; + } + if (status <= 0) { + ic = EOF; + break; + } + data_write_pointer = data_buffer + status; + data_read_pointer = data_buffer; + if (*data_read_pointer == ' ') + data_read_pointer++; + else + break; + } else + break; + } else + break; /* finish getting one entry */ + } + } else if (ic == EOF) { + break; /* End of file */ + } else { + HTChunkPutc(chunk, UCH(ic)); + } + } + HTChunkTerminate(chunk); + + BytesReceived += chunk->size; + if (BytesReceived > BytesReported + 1024) { +#ifdef _WINDOWS + sprintf(NumBytes, gettext("Transferred %d bytes (%5d)"), + BytesReceived, ws_read_per_sec); +#else + sprintf(NumBytes, TRANSFERRED_X_BYTES, BytesReceived); +#endif + HTProgress(NumBytes); + BytesReported = BytesReceived; + } + + if (ic == EOF && chunk->size == 1) + /* 1 means empty: includes terminating 0 */ + break; + CTRACE((tfp, "HTFTP: Line in %s is %s\n", + lastpath, chunk->data)); + + entry_info = parse_dir_entry(chunk->data, &first, &spilledname); + if (entry_info->display) { + FREE(spilledname); + CTRACE((tfp, "Adding file to BTree: %s\n", + entry_info->filename)); + HTBTree_add(bt, entry_info); + } else { + free_entryinfo_struct_contents(entry_info); + FREE(entry_info); + } + + } /* next entry */ + + unload_btree: + + HTChunkFree(chunk); + FREE(spilledname); + + /* print out the handy help message if it exists :) */ + if (help_message_cache_non_empty()) { + START(HTML_PRE); + START(HTML_HR); + PUTC('\n'); + PUTS(help_message_cache_contents()); + init_help_message_cache(); /* to free memory */ + START(HTML_HR); + PUTC('\n'); + } else { + START(HTML_PRE); + PUTC('\n'); + } + + /* Run through tree printing out in order + */ + { +#ifndef LONG_LIST +#ifdef SH_EX /* 1997/10/18 (Sat) 14:14:28 */ + char *p, name_buff[256]; + int name_len, dot_len; + +#define FNAME_WIDTH 30 +#define FILE_GAP 1 + +#endif + int i; +#endif + HTBTElement *ele; + + for (ele = HTBTree_next(bt, NULL); + ele != NULL; + ele = HTBTree_next(bt, ele)) { + entry_info = (EntryInfo *) HTBTree_object(ele); + +#ifdef LONG_LIST + LYListFmtParse(ftp_format, + entry_info, + target, + lastpath); +#else + if (entry_info->date) { + PUTS(entry_info->date); + PUTS(" "); + } else { + PUTS(" * "); + } + + if (entry_info->type) { + for (i = 0; entry_info->type[i] != '\0' && i < 16; i++) + PUTC(entry_info->type[i]); + for (; i < 17; i++) + PUTC(' '); + } + /* start the anchor */ + HTDirEntry(target, lastpath, entry_info->filename); +#ifdef SH_EX /* 1997/10/18 (Sat) 16:00 */ + name_len = strlen(entry_info->filename); + + sprintf(name_buff, "%-*s", FNAME_WIDTH, entry_info->filename); + + if (name_len < FNAME_WIDTH) { + dot_len = FNAME_WIDTH - FILE_GAP - name_len; + if (dot_len > 0) { + p = name_buff + name_len + 1; + while (dot_len-- > 0) + *p++ = '.'; + } + } else { + name_buff[FNAME_WIDTH] = '\0'; + } + + PUTS(name_buff); +#else + PUTS(entry_info->filename); +#endif + END(HTML_A); + + if (entry_info->size) { +#ifdef SH_EX /* 1998/02/02 (Mon) 16:34:52 */ + if (entry_info->size < 1024) + sprintf(string_buffer, "%6ld bytes", + entry_info->size); + else + sprintf(string_buffer, "%6ld Kb", + entry_info->size / 1024); +#else + if (entry_info->size < 1024) + sprintf(string_buffer, " %lu bytes", + entry_info->size); + else + sprintf(string_buffer, " %luKb", + entry_info->size / 1024); +#endif + PUTS(string_buffer); + } else if (entry_info->linkname != 0) { + PUTS(" -> "); + PUTS(entry_info->linkname); + } + + PUTC('\n'); /* end of this entry */ +#endif + + free_entryinfo_struct_contents(entry_info); + } + } + END(HTML_PRE); + END(HTML_BODY); + FREE_TARGET; + HTBTreeAndObject_free(bt); + } + + FREE(lastpath); + + if (WasInterrupted || data_soc != -1) { /* should always be true */ + /* + * Without closing the data socket first, the response(0) later may + * hang. Some servers expect the client to fin/ack the close of the + * data connection before proceeding with the conversation on the + * control connection. - kw + */ + CTRACE((tfp, "HTFTP: Closing data socket %d\n", data_soc)); + status = NETCLOSE(data_soc); + if (status == -1) + HTInetStatus("close"); /* Comment only */ + data_soc = -1; + } + + if (WasInterrupted || HTCheckForInterrupt()) { + _HTProgress(TRANSFER_INTERRUPTED); + } + return HT_LOADED; +} + +/* + * Setup an FTP connection. + */ +static int setup_connection(const char *name, + HTParentAnchor *anchor) +{ + int retry; /* How many times tried? */ + int status = HT_NO_CONNECTION; + + CTRACE((tfp, "setup_connection(%s)\n", name)); + + /* set use_list to NOT since we don't know what kind of server + * this is yet. And set the type to GENERIC + */ + use_list = FALSE; + server_type = GENERIC_SERVER; + Broken_RETR = FALSE; + +#ifdef INET6 + Broken_EPSV = FALSE; +#endif + + for (retry = 0; retry < 2; retry++) { /* For timed out/broken connections */ + status = get_connection(name, anchor); + if (status < 0) { + break; + } + + if (!ftp_local_passive) { + status = get_listen_socket(); + if (status < 0) { + NETCLOSE(control->socket); + control->socket = -1; +#ifdef INET6 + if (have_socket) + (void) close_master_socket(); +#else + close_master_socket(); +#endif /* INET6 */ + /* HT_INTERRUPTED would fall through, if we could interrupt + somehow in the middle of it, which we currently can't. */ + break; + } +#ifdef REPEAT_PORT + /* Inform the server of the port number we will listen on + */ + status = response(port_command); + if (status == HT_INTERRUPTED) { + CTRACE((tfp, "HTFTP: Interrupted in response (port_command)\n")); + _HTProgress(CONNECTION_INTERRUPTED); + NETCLOSE(control->socket); + control->socket = -1; + close_master_socket(); + status = HT_INTERRUPTED; + break; + } + if (status != 2) { /* Could have timed out */ + if (status < 0) + continue; /* try again - net error */ + status = -status; /* bad reply */ + break; + } + CTRACE((tfp, "HTFTP: Port defined.\n")); +#endif /* REPEAT_PORT */ + } else { /* Tell the server to be passive */ + char *command = NULL; + const char *p = "?"; + int h0, h1, h2, h3, p0, p1; /* Parts of reply */ + +#ifdef INET6 + char dst[LINE_LENGTH + 1]; +#endif + + data_soc = status; + +#ifdef INET6 + /* see RFC 2428 */ + if (Broken_EPSV) + status = 1; + else + status = send_cmd_1(p = "EPSV"); + if (status < 0) /* retry or Bad return */ + continue; + else if (status != 2) { + status = send_cmd_1(p = "PASV"); + if (status < 0) { /* retry or Bad return */ + continue; + } else if (status != 2) { + status = -status; /* bad reply */ + break; + } + } + + if (strcmp(p, "PASV") == 0) { + for (p = response_text; *p && *p != ','; p++) { + ; /* null body */ + } + + while (--p > response_text && '0' <= *p && *p <= '9') { + ; /* null body */ + } + status = sscanf(p + 1, "%d,%d,%d,%d,%d,%d", + &h0, &h1, &h2, &h3, &p0, &p1); + if (status < 4) { + fprintf(tfp, "HTFTP: PASV reply has no inet address!\n"); + status = HT_NO_CONNECTION; + break; + } + passive_port = (PortNumber) ((p0 << 8) + p1); + sprintf(dst, "%d.%d.%d.%d", h0, h1, h2, h3); + } else if (strcmp(p, "EPSV") == 0) { + char c0, c1, c2, c3; + struct sockaddr_storage ss; + LY_SOCKLEN sslen; + + /* + * EPSV bla (|||port|) + */ + for (p = response_text; *p && !isspace(UCH(*p)); p++) { + ; /* null body */ + } + for ( /*nothing */ ; + *p && *p && *p != '('; + p++) { /*) */ + ; /* null body */ + } + status = sscanf(p, "(%c%c%c%d%c)", &c0, &c1, &c2, &p0, &c3); + if (status != 5) { + fprintf(tfp, "HTFTP: EPSV reply has invalid format!\n"); + status = HT_NO_CONNECTION; + break; + } + passive_port = (PortNumber) p0; + + sslen = (LY_SOCKLEN) sizeof(ss); + if (getpeername(control->socket, (struct sockaddr *) &ss, + &sslen) < 0) { + fprintf(tfp, "HTFTP: getpeername(control) failed\n"); + status = HT_NO_CONNECTION; + break; + } + if (getnameinfo((struct sockaddr *) &ss, + sslen, + dst, + (socklen_t) sizeof(dst), + NULL, 0, NI_NUMERICHOST)) { + fprintf(tfp, "HTFTP: getnameinfo failed\n"); + status = HT_NO_CONNECTION; + break; + } + } +#else + status = send_cmd_1("PASV"); + if (status != 2) { + if (status < 0) + continue; /* retry or Bad return */ + status = -status; /* bad reply */ + break; + } + for (p = response_text; *p && *p != ','; p++) { + ; /* null body */ + } + + while (--p > response_text && '0' <= *p && *p <= '9') { + ; /* null body */ + } + + status = sscanf(p + 1, "%d,%d,%d,%d,%d,%d", + &h0, &h1, &h2, &h3, &p0, &p1); + if (status < 4) { + fprintf(tfp, "HTFTP: PASV reply has no inet address!\n"); + status = HT_NO_CONNECTION; + break; + } + passive_port = (PortNumber) ((p0 << 8) + p1); +#endif /* INET6 */ + CTRACE((tfp, "HTFTP: Server is listening on port %d\n", + passive_port)); + + /* Open connection for data: */ + +#ifdef INET6 + HTSprintf0(&command, "%s//%s:%d/", STR_FTP_URL, dst, passive_port); +#else + HTSprintf0(&command, "%s//%d.%d.%d.%d:%d/", + STR_FTP_URL, h0, h1, h2, h3, passive_port); +#endif + status = HTDoConnect(command, "FTP data", passive_port, &data_soc); + FREE(command); + + if (status < 0) { + (void) HTInetStatus(gettext("connect for data")); + NETCLOSE(data_soc); + break; + } + + CTRACE((tfp, "FTP data connected, socket %d\n", data_soc)); + } + status = 0; + break; /* No more retries */ + + } /* for retries */ + CTRACE((tfp, "setup_connection returns %d\n", status)); + return status; +} + +/* Retrieve File from Server + * ------------------------- + * + * On entry, + * name WWW address of a file: document, including hostname + * On exit, + * returns Socket number for file if good. + * <0 if bad. + */ +int HTFTPLoad(const char *name, + HTParentAnchor *anchor, + HTFormat format_out, + HTStream *sink) +{ + BOOL isDirectory = NO; + HTAtom *encoding = NULL; + int status, final_status; + int outstanding = 1; /* outstanding control connection responses + + that we are willing to wait for, if we + get to the point of reading data - kw */ + HTFormat format; + + CTRACE((tfp, "HTFTPLoad(%s) %s connection\n", + name, + (ftp_local_passive + ? "passive" + : "normal"))); + + HTReadProgress((off_t) 0, (off_t) 0); + + status = setup_connection(name, anchor); + if (status < 0) + return status; /* Failed with this code */ + + /* Ask for the file: + */ + { + char *filename = HTParse(name, "", PARSE_PATH + PARSE_PUNCTUATION); + char *fname = filename; /* Save for subsequent free() */ + char *vmsname = NULL; + BOOL binary; + const char *type = NULL; + char *types = NULL; + char *cp; + + if (server_type == CMS_SERVER) { + /* If the unescaped path has a %2f, reject it as illegal. - FM */ + if (((cp = strstr(filename, "%2")) != NULL) && + TOUPPER(cp[2]) == 'F') { + FREE(fname); + init_help_message_cache(); /* to free memory */ + NETCLOSE(control->socket); + control->socket = -1; + CTRACE((tfp, + "HTFTP: Rejecting path due to illegal escaped slash.\n")); + return -1; + } + } + + if (!*filename) { + StrAllocCopy(filename, "/"); + type = "D"; + } else if ((type = types = strrchr(filename, ';')) != NULL) { + /* + * Check and trim the type= parameter. - FM + */ + if (!strncasecomp((type + 1), "type=", 5)) { + switch (TOUPPER(*(type + 6))) { + case 'D': + *types = '\0'; + type = "D"; + break; + case 'A': + *types = '\0'; + type = "A"; + break; + case 'I': + *types = '\0'; + type = "I"; + break; + default: + type = ""; + break; + } + if (!*filename) { + *filename = '/'; + *(filename + 1) = '\0'; + } + } + if (*type != '\0') { + CTRACE((tfp, "HTFTP: type=%s\n", type)); + } + } + HTUnEscape(filename); + CTRACE((tfp, "HTFTP: UnEscaped %s\n", filename)); + if (filename[1] == '~') { + /* + * Check if translation of HOME as tilde is supported, + * and adjust filename if so. - FM + */ + char *cp2 = NULL; + char *fn = NULL; + + if ((cp2 = strchr((filename + 1), '/')) != NULL) { + *cp2 = '\0'; + } + status = send_cmd_1("PWD"); + if (status == 2 && response_text[5] == '/') { + status = send_cwd(filename + 1); + if (status == 2) { + StrAllocCopy(fn, (filename + 1)); + if (cp2) { + *cp2 = '/'; + if (fn[strlen(fn) - 1] != '/') { + StrAllocCat(fn, cp2); + } else { + StrAllocCat(fn, (cp2 + 1)); + } + cp2 = NULL; + } + FREE(fname); + fname = filename = fn; + } + } + if (cp2) { + *cp2 = '/'; + } + } + if (strlen(filename) > 3) { + char *cp2; + + if (((cp2 = strrchr(filename, '.')) != NULL && + 0 == strncasecomp(cp2, ".me", 3)) && + (cp2[3] == '\0' || cp2[3] == ';')) { + /* + * Don't treat this as application/x-Troff-me if it's a Unix + * server but has the string "read.me", or if it's not a Unix + * server. - FM + */ + if ((server_type != UNIX_SERVER) || + (cp2 > (filename + 3) && + 0 == strncasecomp((cp2 - 4), "read.me", 7))) { + *cp2 = '\0'; + format = HTFileFormat(filename, &encoding, NULL); + *cp2 = '.'; + } else { + format = HTFileFormat(filename, &encoding, NULL); + } + } else { + format = HTFileFormat(filename, &encoding, NULL); + } + } else { + format = HTFileFormat(filename, &encoding, NULL); + } + format = HTCharsetFormat(format, anchor, -1); + binary = (BOOL) (encoding != HTAtom_for("8bit") && + encoding != HTAtom_for("7bit")); + if (!binary && + /* + * Force binary if we're in source, download or dump mode and this is + * not a VM/CMS server, so we don't get CRLF instead of LF (or CR) for + * newlines in text files. Can't do this for VM/CMS or we'll get raw + * EBCDIC. - FM + */ + (format_out == WWW_SOURCE || + format_out == HTAtom_for("www/download") || + format_out == HTAtom_for("www/dump")) && + (server_type != CMS_SERVER)) + binary = TRUE; + if (!binary && type && *type == 'I') { + /* + * Force binary if we had ;type=I - FM + */ + binary = TRUE; + } else if (binary && type && *type == 'A') { + /* + * Force ASCII if we had ;type=A - FM + */ + binary = FALSE; + } + if (binary != control->binary) { + /* + * Act on our setting if not already set. - FM + */ + const char *mode = binary ? "I" : "A"; + + status = send_cmd_2("TYPE", mode); + if (status != 2) { + init_help_message_cache(); /* to free memory */ + return ((status < 0) ? status : -status); + } + control->binary = binary; + } + switch (server_type) { + /* + * Handle what for Lynx are special case servers, e.g., for which + * we respect RFC 1738, or which have known conflicts in suffix + * mappings. - FM + */ + case VMS_SERVER: + { + char *cp1, *cp2; + BOOL included_device = FALSE; + BOOL found_tilde = FALSE; + + /* Accept only Unix-style filename */ + if (strchr(filename, ':') != NULL || + strchr(filename, '[') != NULL) { + FREE(fname); + init_help_message_cache(); /* to free memory */ + NETCLOSE(control->socket); + control->socket = -1; + CTRACE((tfp, + "HTFTP: Rejecting path due to non-Unix-style syntax.\n")); + return -1; + } + /* Handle any unescaped "/%2F" path */ + if (!StrNCmp(filename, "//", 2)) { + int i; + + included_device = TRUE; + for (i = 0; filename[(i + 1)]; i++) + filename[i] = filename[(i + 1)]; + filename[i] = '\0'; + CTRACE((tfp, "HTFTP: Trimmed '%s'\n", filename)); + cp = HTVMS_name("", filename); + CTRACE((tfp, "HTFTP: VMSized '%s'\n", cp)); + if ((cp1 = strrchr(cp, ']')) != NULL) { + strcpy(filename, ++cp1); + CTRACE((tfp, "HTFTP: Filename '%s'\n", filename)); + *cp1 = '\0'; + status = send_cwd(cp); + if (status != 2) { + char *dotslash = 0; + + if ((cp1 = strchr(cp, '[')) != NULL) { + *cp1++ = '\0'; + status = send_cwd(cp); + if (status != 2) { + FREE(fname); + init_help_message_cache(); /* to free memory */ + NETCLOSE(control->socket); + control->socket = -1; + return ((status < 0) ? status : -status); + } + HTSprintf0(&dotslash, "[.%s", cp1); + status = send_cwd(dotslash); + FREE(dotslash); + if (status != 2) { + FREE(fname); + init_help_message_cache(); /* to free memory */ + NETCLOSE(control->socket); + control->socket = -1; + return ((status < 0) ? status : -status); + } + } else { + FREE(fname); + init_help_message_cache(); /* to free memory */ + NETCLOSE(control->socket); + control->socket = -1; + return ((status < 0) ? status : -status); + } + } + } else if ((cp1 = strchr(cp, ':')) != NULL && + strchr(cp, '[') == NULL && + strchr(cp, ']') == NULL) { + cp1++; + if (*cp1 != '\0') { + int cplen = (int) (cp1 - cp); + + strcpy(filename, cp1); + CTRACE((tfp, "HTFTP: Filename '%s'\n", filename)); + HTSprintf0(&vmsname, "%.*s[%s]", cplen, cp, filename); + status = send_cwd(vmsname); + if (status != 2) { + HTSprintf(&vmsname, "%.*s[000000]", cplen, cp); + status = send_cwd(vmsname); + if (status != 2) { + HTSprintf(&vmsname, "%.*s", cplen, cp); + status = send_cwd(vmsname); + if (status != 2) { + FREE(fname); + init_help_message_cache(); + NETCLOSE(control->socket); + control->socket = -1; + return ((status < 0) ? status : -status); + } + } + } else { + HTSprintf0(&vmsname, "000000"); + filename = vmsname; + } + } + } else if (0 == strcmp(cp, (filename + 1))) { + status = send_cwd(cp); + if (status != 2) { + HTSprintf0(&vmsname, "%s:", cp); + status = send_cwd(vmsname); + if (status != 2) { + FREE(fname); + init_help_message_cache(); /* to free memory */ + NETCLOSE(control->socket); + control->socket = -1; + return ((status < 0) ? status : -status); + } + } + HTSprintf0(&vmsname, "000000"); + filename = vmsname; + } + } + /* Trim trailing slash if filename is not the top directory */ + if (strlen(filename) > 1 && filename[strlen(filename) - 1] == '/') + filename[strlen(filename) - 1] = '\0'; + +#ifdef MAINTAIN_CONNECTION /* Don't need this if always new connection - F.M. */ + if (!included_device) { + /* Get the current default VMS device:[directory] */ + status = send_cmd_1("PWD"); + if (status != 2) { + FREE(fname); + init_help_message_cache(); /* to free memory */ + NETCLOSE(control->socket); + control->socket = -1; + return ((status < 0) ? status : -status); + } + /* Go to the VMS account's top directory */ + if ((cp = strchr(response_text, '[')) != NULL && + (cp1 = strrchr(response_text, ']')) != NULL) { + char *tmp = 0; + unsigned len = 4; + + StrAllocCopy(tmp, cp); + if ((cp2 = strchr(cp, '.')) != NULL && cp2 < cp1) { + len += (cp2 - cp); + } else { + len += (cp1 - cp); + } + tmp[len] = 0; + StrAllocCat(tmp, "]"); + + status = send_cwd(tmp); + FREE(tmp); + + if (status != 2) { + FREE(fname); + init_help_message_cache(); /* to free memory */ + NETCLOSE(control->socket); + control->socket = -1; + return ((status < 0) ? status : -status); + } + } + } +#endif /* MAINTAIN_CONNECTION */ + + /* If we want the VMS account's top directory, list it now */ + if (!(strcmp(filename, "/~")) || + (included_device && 0 == strcmp(filename, "000000")) || + (strlen(filename) == 1 && *filename == '/')) { + isDirectory = YES; + status = send_cmd_1("LIST"); + FREE(fname); + if (status != 1) { + /* Action not started */ + init_help_message_cache(); /* to free memory */ + NETCLOSE(control->socket); + control->socket = -1; + return ((status < 0) ? status : -status); + } + /* Big goto! */ + goto listen; + } + /* Otherwise, go to appropriate directory and doctor filename */ + if (!StrNCmp(filename, "/~", 2)) { + filename += 2; + found_tilde = TRUE; + } + CTRACE((tfp, "check '%s' to translate x/y/ to [.x.y]\n", filename)); + if (!included_device && + (cp = strchr(filename, '/')) != NULL && + (cp1 = strrchr(cp, '/')) != NULL && + (cp1 - cp) > 1) { + char *tmp = 0; + + HTSprintf0(&tmp, "[.%.*s]", (int) (cp1 - cp - 1), cp + 1); + + CTRACE((tfp, "change path '%s'\n", tmp)); + while ((cp2 = strrchr(tmp, '/')) != NULL) + *cp2 = '.'; + CTRACE((tfp, "...to path '%s'\n", tmp)); + + status = send_cwd(tmp); + FREE(tmp); + + if (status != 2) { + FREE(fname); + init_help_message_cache(); /* to free memory */ + NETCLOSE(control->socket); + control->socket = -1; + return ((status < 0) ? status : -status); + } + filename = cp1 + 1; + } else { + if (!included_device && !found_tilde) { + filename += 1; + } + } + break; + } + case CMS_SERVER: + { + /* + * If we want the CMS account's top directory, or a base SFS or + * anonymous directory path (i.e., without a slash), list it + * now. FM + */ + if ((strlen(filename) == 1 && *filename == '/') || + ((0 == strncasecomp((filename + 1), "vmsysu:", 7)) && + (cp = strchr((filename + 1), '.')) != NULL && + strchr(cp, '/') == NULL) || + (0 == strncasecomp(filename + 1, "anonymou.", 9) && + strchr(filename + 1, '/') == NULL)) { + if (filename[1] != '\0') { + status = send_cwd(filename + 1); + if (status != 2) { + /* Action not started */ + init_help_message_cache(); /* to free memory */ + NETCLOSE(control->socket); + control->socket = -1; + return ((status < 0) ? status : -status); + } + } + isDirectory = YES; + if (use_list) + status = send_cmd_1("LIST"); + else + status = send_cmd_1("NLST"); + FREE(fname); + if (status != 1) { + /* Action not started */ + init_help_message_cache(); /* to free memory */ + NETCLOSE(control->socket); + control->socket = -1; + return ((status < 0) ? status : -status); + } + /* Big goto! */ + goto listen; + } + filename++; + + /* Otherwise, go to appropriate directory and adjust filename */ + while ((cp = strchr(filename, '/')) != NULL) { + *cp++ = '\0'; + status = send_cwd(filename); + if (status == 2) { + if (*cp == '\0') { + isDirectory = YES; + if (use_list) + status = send_cmd_1("LIST"); + else + status = send_cmd_1("NLST"); + FREE(fname); + if (status != 1) { + /* Action not started */ + init_help_message_cache(); /* to free memory */ + NETCLOSE(control->socket); + control->socket = -1; + return ((status < 0) ? status : -status); + } + /* Clear any messages from the login directory */ + init_help_message_cache(); + /* Big goto! */ + goto listen; + } + filename = cp; + } + } + break; + } + default: + /* Shift for any unescaped "/%2F" path */ + if (!StrNCmp(filename, "//", 2)) + filename++; + break; + } + /* + * Act on a file or listing request, or try to figure out which we're + * dealing with if we don't know yet. - FM + */ + if (!(type) || (type && *type != 'D')) { + status = send_cmd_2("RETR", filename); + if (status >= 5) { + int check; + + if (Broken_RETR) { + CTRACE((tfp, "{{reconnecting...\n")); + close_connection(control); + check = setup_connection(name, anchor); + CTRACE((tfp, "...done }}reconnecting\n")); + if (check < 0) + return check; + } + } + } else { + status = 5; /* Failed status set as flag. - FM */ + } + if (status != 1) { /* Failed : try to CWD to it */ + /* Clear any login messages if this isn't the login directory */ + if (strcmp(filename, "/")) + init_help_message_cache(); + + status = send_cwd(filename); + if (status == 2) { /* Succeeded : let's NAME LIST it */ + isDirectory = YES; + if (use_list) + status = send_cmd_1("LIST"); + else + status = send_cmd_1("NLST"); + } + } + FREE(fname); + FREE(vmsname); + if (status != 1) { + init_help_message_cache(); /* to free memory */ + NETCLOSE(control->socket); + control->socket = -1; + if (status < 0) + return status; + else + return -status; + } + } + + listen: + if (!ftp_local_passive) { + /* Wait for the connection */ +#ifdef INET6 + struct sockaddr_storage soc_address; + +#else + struct sockaddr_in soc_address; +#endif /* INET6 */ + LY_SOCKLEN soc_addrlen = (LY_SOCKLEN) sizeof(soc_address); + +#ifdef SOCKS + if (socks_flag) + status = Raccept((int) master_socket, + (struct sockaddr *) &soc_address, + &soc_addrlen); + else +#endif /* SOCKS */ + status = accept((int) master_socket, + (struct sockaddr *) &soc_address, + &soc_addrlen); + if (status < 0) { + init_help_message_cache(); /* to free memory */ + return HTInetStatus("accept"); + } + CTRACE((tfp, "TCP: Accepted new socket %d\n", status)); + data_soc = status; + } + /* !ftp_local_passive */ +#if 0 /* no - this makes the data connection go away too soon (2.8.3dev.22) */ + if ((status = send_cmd_nowait("QUIT")) == 1) + outstanding++; +#endif + + if (isDirectory) { + if (server_type == UNIX_SERVER && !unsure_type && + !strcmp(response_text, + "150 Opening ASCII mode data connection for /bin/dl.\n")) { + CTRACE((tfp, "HTFTP: Treating as \"dls\" server.\n")); + server_type = DLS_SERVER; + } + final_status = read_directory(anchor, name, format_out, sink); + if (final_status > 0) { + if (server_type != CMS_SERVER) + if (outstanding-- > 0) { + status = response(0); + if (status < 0 || + (status == 2 && !StrNCmp(response_text, "221", 3))) + outstanding = 0; + } + } else { /* HT_INTERRUPTED */ + /* User may have pressed 'z' to give up because no + packets got through, so let's not make them wait + any longer - kw */ + outstanding = 0; + } + + if (data_soc != -1) { /* normally done in read_directory */ + CTRACE((tfp, "HTFTP: Closing data socket %d\n", data_soc)); + status = NETCLOSE(data_soc); + if (status == -1) + HTInetStatus("close"); /* Comment only */ + } + status = final_status; + } else { + int rv; + char *FileName = HTParse(name, "", PARSE_PATH + PARSE_PUNCTUATION); + + /* Clear any login messages */ + init_help_message_cache(); + + /* Fake a Content-Encoding for compressed files. - FM */ + HTUnEscape(FileName); + if (!IsUnityEnc(encoding)) { + /* + * We already know from the call to HTFileFormat above that this is + * a compressed file, no need to look at the filename again. - kw + */ + StrAllocCopy(anchor->content_type, format->name); + StrAllocCopy(anchor->content_encoding, HTAtom_name(encoding)); + format = HTAtom_for("www/compressed"); + + } else { + int rootlen; + CompressFileType cft = HTCompressFileType(FileName, "._-", &rootlen); + + if (cft != cftNone) { + FileName[rootlen] = '\0'; + format = HTFileFormat(FileName, &encoding, NULL); + format = HTCharsetFormat(format, anchor, -1); + StrAllocCopy(anchor->content_type, format->name); + format = HTAtom_for("www/compressed"); + + switch (cft) { + case cftCompress: + StrAllocCopy(anchor->content_encoding, "x-compress"); + break; + case cftGzip: + StrAllocCopy(anchor->content_encoding, "x-gzip"); + break; + case cftDeflate: + StrAllocCopy(anchor->content_encoding, "x-deflate"); + break; + case cftBzip2: + StrAllocCopy(anchor->content_encoding, "x-bzip2"); + break; + case cftNone: + break; + } + } + } + FREE(FileName); + + _HTProgress(gettext("Receiving FTP file.")); + rv = HTParseSocket(format, format_out, anchor, data_soc, sink); + + HTInitInput(control->socket); + /* Reset buffering to control connection DD 921208 */ + + if (rv < 0) { +#if 0 /* any known servers where ABOR would work this way? */ + if (rv == HT_INTERRUPTED || rv == -501) + if (send_cmd_nowait("ABOR") == 1) { + outstanding++; + CTRACE((tfp, "HTFTP: outstanding responses: %d\n", outstanding)); + } +#endif + if (rv == -2) /* weird error, don't expect much response */ + outstanding--; + else if (rv == HT_INTERRUPTED || rv == -1) + /* User may have pressed 'z' to give up because no + packets got through, so let's not make them wait + longer - kw */ + outstanding = 0; + CTRACE((tfp, "HTFTP: Closing data socket %d\n", data_soc)); + status = NETCLOSE(data_soc); + } else + status = 2; /* data_soc already closed in HTCopy - kw */ + + if (status < 0 && rv != HT_INTERRUPTED && rv != -1) { + (void) HTInetStatus("close"); /* Comment only */ + } else { + if (rv != HT_LOADED && outstanding--) { + status = response(0); /* Pick up final reply */ + if (status != 2 && rv != HT_INTERRUPTED && rv != -1) { + data_soc = -1; /* invalidate it */ + init_help_message_cache(); /* to free memory */ + return HTLoadError(sink, 500, response_text); + } else if (status <= 0) { + outstanding = 0; + } else if (status == 2 && !StrNCmp(response_text, "221", 3)) + outstanding = 0; + } + } + final_status = HT_LOADED; + } + while (outstanding-- > 0 && + (status > 0)) { + status = response(0); + if (status == 2 && !StrNCmp(response_text, "221", 3)) + break; + } + data_soc = -1; /* invalidate it */ + CTRACE((tfp, "HTFTPLoad: normal end; ")); + if (control->socket < 0) { + CTRACE((tfp, "control socket is %d\n", control->socket)); + } else { + CTRACE((tfp, "closing control socket %d\n", control->socket)); + status = NETCLOSE(control->socket); + if (status == -1) + HTInetStatus("control connection close"); /* Comment only */ + } + control->socket = -1; + init_help_message_cache(); /* to free memory */ + /* returns HT_LOADED (always for file if we get here) or error */ + return final_status; +} /* open_file_read */ + +/* + * This function frees any user entered password, so that + * it must be entered again for a future request. - FM + */ +void HTClearFTPPassword(void) +{ + /* + * Need code to check cached documents from non-anonymous ftp accounts and + * do something to ensure that they no longer can be accessed without a new + * retrieval. - FM + */ + + /* + * Now free the current user entered password, if any. - FM + */ + FREE(user_entered_password); +} + +#endif /* ifndef DISABLE_FTP */ diff --git a/WWW/Library/Implementation/HTFTP.h b/WWW/Library/Implementation/HTFTP.h new file mode 100644 index 00000000..a903bbb8 --- /dev/null +++ b/WWW/Library/Implementation/HTFTP.h @@ -0,0 +1,70 @@ +/* FTP access module for libwww + FTP ACCESS FUNCTIONS + + This isn't really a valid protocol module -- it is lumped together with HTFile . That + could be changed easily. + + Author: Tim Berners-Lee. Public Domain. Please mail changes to timbl@info.cern.ch + + */ +#ifndef HTFTP_H +#define HTFTP_H + +#include <HTAnchor.h> +#include <HTStream.h> +#include <HTParse.h> + +#ifdef __cplusplus +extern "C" { +#endif +#define FILE_BY_NAME 0 +#define FILE_BY_TYPE 1 +#define FILE_BY_SIZE 2 +#define FILE_BY_DATE 3 + extern int HTfileSortMethod; /* specifies the method of sorting */ + +/* PUBLIC HTVMS_name() + * CONVERTS WWW name into a VMS name + * ON ENTRY: + * nn Node Name (optional) + * fn WWW file name + * + * ON EXIT: + * returns vms file specification + * + * Bug: Returns pointer to static -- non-reentrant + */ + extern char *HTVMS_name(const char *nn, + const char *fn); + +/* + +Retrieve File from Server + + ON EXIT, + + returns Socket number for file if good.<0 if bad. + + */ + extern int HTFTPLoad(const char *name, + HTParentAnchor *anchor, + HTFormat format_out, + HTStream *sink); + +/* + * This function frees any user entered password, so that + * it must be entered again for a future request. - FM + */ + extern void HTClearFTPPassword(void); + +/* + +Return Host Name + + */ + extern const char *HTHostName(void); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/WWW/Library/Implementation/HTFWriter.c b/WWW/Library/Implementation/HTFWriter.c new file mode 100644 index 00000000..90ab003e --- /dev/null +++ b/WWW/Library/Implementation/HTFWriter.c @@ -0,0 +1,361 @@ +/* FILE WRITER HTFWrite.h + * =========== + * + * This version of the stream object just writes to a C file. + * The file is assumed open and left open. + * + * Bugs: + * strings written must be less than buffer size. + */ + +#define HTSTREAM_INTERNAL 1 + +#include <HTUtils.h> + +#include <HTFWriter.h> + +#include <HTFormat.h> +#include <HTAlert.h> +#include <HTFile.h> + +#include <LYUtils.h> +#include <LYLeaks.h> + +/* Stream Object + * ------------ + */ + +struct _HTStream { + const HTStreamClass *isa; + + FILE *fp; + char *end_command; + char *remove_command; + BOOL announce; +}; + +/*_________________________________________________________________________ + * + * B L A C K H O L E C L A S S + * + * There is only one black hole instance shared by anyone + * who wanst a black hole. These black holes don't radiate, + * they just absorb data. + */ +static void HTBlackHole_put_character(HTStream *me, char c) +{ +} +static void HTBlackHole_put_string(HTStream *me, const char *s) +{ +} +static void HTBlackHole_write(HTStream *me, const char *s, int l) +{ +} +static void HTBlackHole_free(HTStream *me) +{ +} +static void HTBlackHole_abort(HTStream *me, HTError e) +{ +} + +/* Black Hole stream + * ----------------- + */ +static const HTStreamClass HTBlackHoleClass = +{ + "BlackHole", + HTBlackHole_free, + HTBlackHole_abort, + HTBlackHole_put_character, HTBlackHole_put_string, + HTBlackHole_write +}; + +static HTStream HTBlackHoleInstance = +{ + &HTBlackHoleClass, + NULL, + NULL, + NULL, + NO +}; + +/* Black hole craetion +*/ +HTStream *HTBlackHole(void) +{ + return &HTBlackHoleInstance; +} + +/*_________________________________________________________________________ + * + * F I L E A C T I O N R O U T I N E S + * Bug: + * All errors are ignored. + */ + +/* Character handling + * ------------------ + */ + +static void HTFWriter_put_character(HTStream *me, char c) +{ + putc(c, me->fp); +} + +/* String handling + * --------------- + * + * Strings must be smaller than this buffer size. + */ +static void HTFWriter_put_string(HTStream *me, const char *s) +{ + fputs(s, me->fp); +} + +/* Buffer write. Buffers can (and should!) be big. + * ------------ + */ +static void HTFWriter_write(HTStream *me, const char *s, int l) +{ + fwrite(s, 1, l, me->fp); +} + +/* Free an HTML object + * ------------------- + * + * Note that the SGML parsing context is freed, but the created + * object is not, + * as it takes on an existence of its own unless explicitly freed. + */ +static void HTFWriter_free(HTStream *me) +{ + fclose(me->fp); + if (me->end_command) { /* Temp file */ + _HTProgress(me->end_command); /* Tell user what's happening */ + system(me->end_command); + FREE(me->end_command); + if (me->remove_command) { + system(me->remove_command); + FREE(me->remove_command); + } + } + + FREE(me); +} + +/* End writing +*/ + +static void HTFWriter_abort(HTStream *me, HTError e) +{ + fclose(me->fp); + if (me->end_command) { /* Temp file */ + CTRACE((tfp, "HTFWriter: Aborting: file not executed.\n")); + FREE(me->end_command); + if (me->remove_command) { + system(me->remove_command); + FREE(me->remove_command); + } + } + + FREE(me); +} + +/* Structured Object Class + * ----------------------- + */ +static const HTStreamClass HTFWriter = /* As opposed to print etc */ +{ + "FileWriter", + HTFWriter_free, + HTFWriter_abort, + HTFWriter_put_character, HTFWriter_put_string, + HTFWriter_write +}; + +/* Subclass-specific Methods + * ------------------------- + */ + +HTStream *HTFWriter_new(FILE *fp) +{ + HTStream *me; + + if (!fp) + return NULL; + + me = (HTStream *) malloc(sizeof(*me)); + if (me == NULL) + outofmem(__FILE__, "HTML_new"); + + assert(me != NULL); + + me->isa = &HTFWriter; + + me->fp = fp; + me->end_command = NULL; + me->remove_command = NULL; + me->announce = NO; + + return me; +} + +/* Make system command from template + * --------------------------------- + * + * See mailcap spec for description of template. + */ +/* @@ to be written. sprintfs will do for now. */ + +/* Take action using a system command + * ---------------------------------- + * + * originally from Ghostview handling by Marc Andreseen. + * Creates temporary file, writes to it, executes system command + * on end-document. The suffix of the temp file can be given + * in case the application is fussy, or so that a generic opener can + * be used. + */ +HTStream *HTSaveAndExecute(HTPresentation *pres, + HTParentAnchor *anchor, /* Not used */ + HTStream *sink) /* Not used */ + +#ifdef UNIX +#define REMOVE_COMMAND "/bin/rm -f %s\n" +#endif +#ifdef VMS +#define REMOVE_COMMAND "delete/noconfirm/nolog %s.." +#endif + +#ifdef REMOVE_COMMAND +{ + char *fnam; + const char *suffix; + + HTStream *me; + + if (HTClientHost) { + HTAlert(CANNOT_SAVE_REMOTE); + return HTBlackHole(); + } + + me = (HTStream *) malloc(sizeof(*me)); + if (me == NULL) + outofmem(__FILE__, "Save and execute"); + + assert(me != NULL); + + me->isa = &HTFWriter; + + /* Save the file under a suitably suffixed name */ + + suffix = HTFileSuffix(pres->rep, anchor->content_encoding); + + fnam = (char *) malloc(L_tmpnam + 16 + strlen(suffix)); + if (fnam == NULL) + outofmem(__FILE__, "HTSaveAndExecute"); + + assert(fnam != NULL); + + tmpnam(fnam); + strcat(fnam, suffix); + + me->fp = fopen(fnam, BIN_W); + if (!me->fp) { + HTAlert(CANNOT_OPEN_TEMP); + FREE(fnam); + FREE(me); + return NULL; + } + +/* Make command to process file +*/ + me->end_command = 0; + HTSprintf0(&(me->end_command), pres->command, fnam, fnam, fnam); + + me->remove_command = NULL; /* If needed, put into end_command */ +#ifdef NOPE +/* Make command to delete file +*/ + me->remove_command = 0; + HTSprintf0(&(me->remove_command), REMOVE_COMMAND, fnam); +#endif + + me->announce = NO; + FREE(fnam); + return me; +} + +#else /* can do remove */ +{ + return NULL; +} +#endif + +/* Save Locally + * ------------ + * + * Bugs: + * GUI Apps should open local Save panel here really. + * + */ +HTStream *HTSaveLocally(HTPresentation *pres, + HTParentAnchor *anchor, /* Not used */ + HTStream *sink) /* Not used */ + +{ + char *fnam; + char *answer; + const char *suffix; + + HTStream *me; + + if (HTClientHost) { + HTAlert(CANNOT_SAVE_REMOTE); + return HTBlackHole(); + } + + me = (HTStream *) malloc(sizeof(*me)); + if (me == NULL) + outofmem(__FILE__, "SaveLocally"); + + assert(me != NULL); + + me->isa = &HTFWriter; + me->end_command = NULL; + me->remove_command = NULL; /* If needed, put into end_command */ + me->announce = YES; + + /* Save the file under a suitably suffixed name */ + + suffix = HTFileSuffix(pres->rep, anchor->content_encoding); + + fnam = (char *) malloc(L_tmpnam + 16 + strlen(suffix)); + if (fnam == NULL) + outofmem(__FILE__, "HTSaveLocally"); + + assert(fnam != NULL); + + tmpnam(fnam); + strcat(fnam, suffix); + + /* Save Panel */ + answer = HTPrompt(GIVE_FILENAME, fnam); + + FREE(fnam); + + me->fp = fopen(answer, BIN_W); + if (!me->fp) { + HTAlert(CANNOT_OPEN_OUTPUT); + FREE(answer); + FREE(me); + return NULL; + } + + FREE(answer); + return me; +} + +/* Format Converter using system command + * ------------------------------------- + */ diff --git a/WWW/Library/Implementation/HTFWriter.h b/WWW/Library/Implementation/HTFWriter.h new file mode 100644 index 00000000..015ea15f --- /dev/null +++ b/WWW/Library/Implementation/HTFWriter.h @@ -0,0 +1,30 @@ +/* File Writer for libwww + C FILE WRITER + + It is useful to have both FWriter and Writer for environments in which fdopen() doesn't + exist for example. + + */ +#ifndef HTFWRITE_H +#define HTFWRITE_H + +#include <HTStream.h> +#include <HTFormat.h> + +#ifdef __cplusplus +extern "C" { +#endif + extern HTStream *HTFWriter_new(FILE *fp); + + extern HTStream *HTSaveAndExecute(HTPresentation *pres, + HTParentAnchor *anchor, /* Not used */ + HTStream *sink); + + extern HTStream *HTSaveLocally(HTPresentation *pres, + HTParentAnchor *anchor, /* Not used */ + HTStream *sink); + +#ifdef __cplusplus +} +#endif +#endif /* HTFWRITE_H */ diff --git a/WWW/Library/Implementation/HTFile.c b/WWW/Library/Implementation/HTFile.c new file mode 100644 index 00000000..be561e44 --- /dev/null +++ b/WWW/Library/Implementation/HTFile.c @@ -0,0 +1,3240 @@ +/* + * $LynxId: HTFile.c,v 1.129 2012/02/10 01:10:51 tom Exp $ + * + * File Access HTFile.c + * =========== + * + * This is unix-specific code in general, with some VMS bits. + * These are routines for file access used by browsers. + * Development of this module for Unix DIRED_SUPPORT in Lynx + * regrettably has has been conducted in a manner with now + * creates a major impediment for hopes of adapting Lynx to + * a newer version of the library. + * + * History: + * Feb 91 Written Tim Berners-Lee CERN/CN + * Apr 91 vms-vms access included using DECnet syntax + * 26 Jun 92 (JFG) When running over DECnet, suppressed FTP. + * Fixed access bug for relative names on VMS. + * Sep 93 (MD) Access to VMS files allows sharing. + * 15 Nov 93 (MD) Moved HTVMSname to HTVMSUTILS.C + * 27 Dec 93 (FM) FTP now works with VMS hosts. + * FTP path must be Unix-style and cannot include + * the device or top directory. + */ + +#include <HTUtils.h> + +#ifndef VMS +#if defined(DOSPATH) +#undef LONG_LIST +#define LONG_LIST /* Define this for long style unix listings (ls -l), + the actual style is configurable from lynx.cfg */ +#endif +/* #define NO_PARENT_DIR_REFERENCE */ +/* Define this for no parent links */ +#endif /* !VMS */ + +#if defined(DOSPATH) +#define HAVE_READDIR 1 +#define USE_DIRENT +#endif + +#if defined(USE_DOS_DRIVES) +#include <HTDOS.h> +#endif + +#include <HTFile.h> /* Implemented here */ + +#ifdef VMS +#include <stat.h> +#endif /* VMS */ + +#if defined (USE_ZLIB) || defined (USE_BZLIB) +#include <GridText.h> +#endif + +#define MULTI_SUFFIX ".multi" /* Extension for scanning formats */ + +#include <HTParse.h> +#include <HTTCP.h> +#ifndef DECNET +#include <HTFTP.h> +#endif /* !DECNET */ +#include <HTAnchor.h> +#include <HTAtom.h> +#include <HTAAProt.h> +#include <HTFWriter.h> +#include <HTInit.h> +#include <HTBTree.h> +#include <HTAlert.h> +#include <HTCJK.h> +#include <UCDefs.h> +#include <UCMap.h> +#include <UCAux.h> + +#include <LYexit.h> +#include <LYCharSets.h> +#include <LYGlobalDefs.h> +#include <LYStrings.h> +#include <LYUtils.h> + +#ifdef USE_PRETTYSRC +# include <LYPrettySrc.h> +#endif + +#include <LYLeaks.h> + +typedef struct _HTSuffix { + char *suffix; + HTAtom *rep; + HTAtom *encoding; + char *desc; + float quality; +} HTSuffix; + +typedef struct { + struct stat file_info; + char sort_tags; + char file_name[1]; /* on the end of the struct, since its length varies */ +} DIRED; + +#ifndef NGROUPS +#ifdef NGROUPS_MAX +#define NGROUPS NGROUPS_MAX +#else +#define NGROUPS 32 +#endif /* NGROUPS_MAX */ +#endif /* NGROUPS */ + +#ifndef GETGROUPS_T +#define GETGROUPS_T int +#endif + +#include <HTML.h> /* For directory object building */ + +#define PUTC(c) (*target->isa->put_character)(target, c) +#define PUTS(s) (*target->isa->put_string)(target, s) +#define START(e) (*target->isa->start_element)(target, e, 0, 0, -1, 0) +#define END(e) (*target->isa->end_element)(target, e, 0) +#define MAYBE_END(e) if (HTML_dtd.tags[e].contents != SGML_EMPTY) \ + (*target->isa->end_element)(target, e, 0) +#define FREE_TARGET (*target->isa->_free)(target) +#define ABORT_TARGET (*targetClass._abort)(target, NULL); + +struct _HTStructured { + const HTStructuredClass *isa; + /* ... */ +}; + +/* + * Controlling globals. + */ +int HTDirAccess = HT_DIR_OK; + +#ifdef DIRED_SUPPORT +int HTDirReadme = HT_DIR_README_NONE; + +#else +int HTDirReadme = HT_DIR_README_TOP; +#endif /* DIRED_SUPPORT */ + +static const char *HTMountRoot = "/Net/"; /* Where to find mounts */ + +#ifdef VMS +static const char *HTCacheRoot = "/WWW$SCRATCH"; /* Where to cache things */ + +#else +static const char *HTCacheRoot = "/tmp/W3_Cache_"; /* Where to cache things */ +#endif /* VMS */ + +static char s_no_suffix[] = "*"; +static char s_unknown_suffix[] = "*.*"; + +/* + * Suffix registration. + */ +static HTList *HTSuffixes = 0; + +static HTSuffix no_suffix = +{ + s_no_suffix, NULL, NULL, NULL, 1.0 +}; + +static HTSuffix unknown_suffix = +{ + s_unknown_suffix, NULL, NULL, NULL, 1.0 +}; + +/* To free up the suffixes at program exit. + * ---------------------------------------- + */ +#ifdef LY_FIND_LEAKS +static void free_suffixes(void); +#endif + +#ifdef LONG_LIST +static char *FormatStr(char **bufp, + char *start, + const char *entry) +{ + char fmt[512]; + + if (*start) { + sprintf(fmt, "%%%.*ss", (int) sizeof(fmt) - 3, start); + HTSprintf0(bufp, fmt, entry); + } else if (*bufp && !(entry && *entry)) { + **bufp = '\0'; + } else if (entry) { + StrAllocCopy(*bufp, entry); + } + return *bufp; +} + +static char *FormatNum(char **bufp, + char *start, + int entry) +{ + char fmt[512]; + + if (*start) { + sprintf(fmt, "%%%.*sd", (int) sizeof(fmt) - 3, start); + HTSprintf0(bufp, fmt, entry); + } else { + sprintf(fmt, "%d", entry); + StrAllocCopy(*bufp, fmt); + } + return *bufp; +} + +static void LYListFmtParse(const char *fmtstr, + DIRED * data, + char *file, + HTStructured * target, + char *tail) +{ + char c; + char *s; + char *end; + char *start; + char *str = NULL; + char *buf = NULL; + char tmp[LY_MAXPATH]; + char type; + +#ifndef NOUSERS + const char *name; +#endif + time_t now; + char *datestr; + +#ifdef S_IFLNK + int len; +#endif +#define SEC_PER_YEAR (60 * 60 * 24 * 365) + +#ifdef _WINDOWS /* 1998/01/06 (Tue) 21:20:53 */ + static const char *pbits[] = + { + "---", "--x", "-w-", "-wx", + "r--", "r-x", "rw-", "rwx", + 0}; + +#define PBIT(a, n, s) pbits[((a) >> (n)) & 0x7] + +#else + static const char *pbits[] = + {"---", "--x", "-w-", "-wx", + "r--", "r-x", "rw-", "rwx", 0}; + static const char *psbits[] = + {"--S", "--s", "-wS", "-ws", + "r-S", "r-s", "rwS", "rws", 0}; + +#define PBIT(a, n, s) (s) ? psbits[((a) >> (n)) & 0x7] : \ + pbits[((a) >> (n)) & 0x7] +#endif +#if defined(S_ISVTX) && !defined(_WINDOWS) + static const char *ptbits[] = + {"--T", "--t", "-wT", "-wt", + "r-T", "r-t", "rwT", "rwt", 0}; + +#define PTBIT(a, s) (s) ? ptbits[(a) & 0x7] : pbits[(a) & 0x7] +#else +#define PTBIT(a, s) PBIT(a, 0, 0) +#endif + + if (data->file_info.st_mode == 0) + fmtstr = " %a"; /* can't stat so just do anchor */ + + StrAllocCopy(str, fmtstr); + s = str; + end = str + strlen(str); + while (*s) { + start = s; + while (*s) { + if (*s == '%') { + if (*(s + 1) == '%') /* literal % */ + s++; + else + break; + } + s++; + } + /* s is positioned either at a % or at \0 */ + *s = '\0'; + if (s > start) { /* some literal chars. */ + PUTS(start); + } + if (s == end) + break; + start = ++s; + while (isdigit(UCH(*s)) || *s == '.' || *s == '-' || *s == ' ' || + *s == '#' || *s == '+' || *s == '\'') + s++; + c = *s; /* the format char. or \0 */ + *s = '\0'; + + switch (c) { + case '\0': + PUTS(start); + continue; + + case 'A': + case 'a': /* anchor */ + HTDirEntry(target, tail, data->file_name); + FormatStr(&buf, start, data->file_name); + PUTS(buf); + END(HTML_A); + *buf = '\0'; +#ifdef S_IFLNK + if (c != 'A' && S_ISLNK(data->file_info.st_mode) && + (len = (int) readlink(file, tmp, sizeof(tmp) - 1)) >= 0) { + PUTS(" -> "); + tmp[len] = '\0'; + PUTS(tmp); + } +#endif + break; + + case 'T': /* MIME type */ + case 't': /* MIME type description */ + if (S_ISDIR(data->file_info.st_mode)) { + if (c != 'T') { + FormatStr(&buf, start, ENTRY_IS_DIRECTORY); + } else { + FormatStr(&buf, start, ""); + } + } else { + const char *cp2; + HTFormat format; + + format = HTFileFormat(file, NULL, &cp2); + + if (c != 'T') { + if (cp2 == NULL) { + if (!StrNCmp(HTAtom_name(format), + "application", 11)) { + cp2 = HTAtom_name(format) + 12; + if (!StrNCmp(cp2, "x-", 2)) + cp2 += 2; + } else { + cp2 = HTAtom_name(format); + } + } + FormatStr(&buf, start, cp2); + } else { + FormatStr(&buf, start, HTAtom_name(format)); + } + } + break; + + case 'd': /* date */ + now = time(0); + datestr = ctime(&data->file_info.st_mtime); + if ((now - data->file_info.st_mtime) < SEC_PER_YEAR / 2) + /* + * MMM DD HH:MM + */ + sprintf(tmp, "%.12s", datestr + 4); + else + /* + * MMM DD YYYY + */ + sprintf(tmp, "%.7s %.4s ", datestr + 4, + datestr + 20); + FormatStr(&buf, start, tmp); + break; + + case 's': /* size in bytes */ + FormatNum(&buf, start, (int) data->file_info.st_size); + break; + + case 'K': /* size in Kilobytes but not for directories */ + if (S_ISDIR(data->file_info.st_mode)) { + FormatStr(&buf, start, ""); + StrAllocCat(buf, " "); + break; + } + /* FALL THROUGH */ + case 'k': /* size in Kilobytes */ + FormatNum(&buf, start, (int) ((data->file_info.st_size + 1023) / 1024)); + StrAllocCat(buf, "K"); + break; + + case 'p': /* unix-style permission bits */ + switch (data->file_info.st_mode & S_IFMT) { +#if defined(_MSC_VER) && defined(_S_IFIFO) + case _S_IFIFO: + type = 'p'; + break; +#else + case S_IFIFO: + type = 'p'; + break; +#endif + case S_IFCHR: + type = 'c'; + break; + case S_IFDIR: + type = 'd'; + break; + case S_IFREG: + type = '-'; + break; +#ifdef S_IFBLK + case S_IFBLK: + type = 'b'; + break; +#endif +#ifdef S_IFLNK + case S_IFLNK: + type = 'l'; + break; +#endif +#ifdef S_IFSOCK +# ifdef S_IFIFO /* some older machines (e.g., apollo) have a conflict */ +# if S_IFIFO != S_IFSOCK + case S_IFSOCK: + type = 's'; + break; +# endif +# else + case S_IFSOCK: + type = 's'; + break; +# endif +#endif /* S_IFSOCK */ + default: + type = '?'; + break; + } +#ifdef _WINDOWS + sprintf(tmp, "%c%s", type, + PBIT(data->file_info.st_mode, 6, data->file_info.st_mode & S_IRWXU)); +#else + sprintf(tmp, "%c%s%s%s", type, + PBIT(data->file_info.st_mode, 6, data->file_info.st_mode & S_ISUID), + PBIT(data->file_info.st_mode, 3, data->file_info.st_mode & S_ISGID), + PTBIT(data->file_info.st_mode, data->file_info.st_mode & S_ISVTX)); +#endif + FormatStr(&buf, start, tmp); + break; + + case 'o': /* owner */ +#ifndef NOUSERS + name = HTAA_UidToName((int) data->file_info.st_uid); + if (*name) { + FormatStr(&buf, start, name); + } else { + FormatNum(&buf, start, (int) data->file_info.st_uid); + } +#endif + break; + + case 'g': /* group */ +#ifndef NOUSERS + name = HTAA_GidToName((int) data->file_info.st_gid); + if (*name) { + FormatStr(&buf, start, name); + } else { + FormatNum(&buf, start, (int) data->file_info.st_gid); + } +#endif + break; + + case 'l': /* link count */ + FormatNum(&buf, start, (int) data->file_info.st_nlink); + break; + + case '%': /* literal % with flags/width */ + FormatStr(&buf, start, "%"); + break; + + default: + fprintf(stderr, + "Unknown format character `%c' in list format\n", c); + break; + } + if (buf) + PUTS(buf); + + s++; + } + FREE(buf); + PUTC('\n'); + FREE(str); +} +#endif /* LONG_LIST */ + +/* Define the representation associated with a file suffix. + * -------------------------------------------------------- + * + * Calling this with suffix set to "*" will set the default + * representation. + * Calling this with suffix set to "*.*" will set the default + * representation for unknown suffix files which contain a ".". + * + * The encoding parameter can give a trivial (8bit, 7bit, binary) + * or real (gzip, compress) encoding. + * + * If filename suffix is already defined with the same encoding + * its previous definition is overridden. + */ +void HTSetSuffix5(const char *suffix, + const char *representation, + const char *encoding, + const char *desc, + double value) +{ + HTSuffix *suff; + BOOL trivial_enc = (BOOL) IsUnityEncStr(encoding); + + if (strcmp(suffix, s_no_suffix) == 0) + suff = &no_suffix; + else if (strcmp(suffix, s_unknown_suffix) == 0) + suff = &unknown_suffix; + else { + HTList *cur = HTSuffixes; + + while (NULL != (suff = (HTSuffix *) HTList_nextObject(cur))) { + if (suff->suffix && 0 == strcmp(suff->suffix, suffix) && + ((trivial_enc && IsUnityEnc(suff->encoding)) || + (!trivial_enc && !IsUnityEnc(suff->encoding) && + strcmp(encoding, HTAtom_name(suff->encoding)) == 0))) + break; + } + if (!suff) { /* Not found -- create a new node */ + suff = typecalloc(HTSuffix); + if (suff == NULL) + outofmem(__FILE__, "HTSetSuffix"); + + assert(suff != NULL); + + if (!HTSuffixes) { + HTSuffixes = HTList_new(); +#ifdef LY_FIND_LEAKS + atexit(free_suffixes); +#endif + } + + HTList_addObject(HTSuffixes, suff); + + StrAllocCopy(suff->suffix, suffix); + } + } + + if (representation) + suff->rep = HTAtom_for(representation); + + /* + * Memory leak fixed. + * 05-28-94 Lynx 2-3-1 Garrett Arch Blythe + * Invariant code removed. + */ + suff->encoding = HTAtom_for(encoding); + + StrAllocCopy(suff->desc, desc); + + suff->quality = (float) value; +} + +#ifdef LY_FIND_LEAKS +/* + * Purpose: Free all added suffixes. + * Arguments: void + * Return Value: void + * Remarks/Portability/Dependencies/Restrictions: + * To be used at program exit. + * Revision History: + * 05-28-94 created Lynx 2-3-1 Garrett Arch Blythe + */ +static void free_suffixes(void) +{ + HTSuffix *suff = NULL; + + /* + * Loop through all suffixes. + */ + while (!HTList_isEmpty(HTSuffixes)) { + /* + * Free off each item and its members if need be. + */ + suff = (HTSuffix *) HTList_removeLastObject(HTSuffixes); + FREE(suff->suffix); + FREE(suff->desc); + FREE(suff); + } + /* + * Free off the list itself. + */ + HTList_delete(HTSuffixes); + HTSuffixes = NULL; +} +#endif /* LY_FIND_LEAKS */ + +/* Make the cache file name for a W3 document. + * ------------------------------------------- + * Make up a suitable name for saving the node in + * + * E.g. /tmp/WWW_Cache_news/1234@cernvax.cern.ch + * /tmp/WWW_Cache_http/crnvmc/FIND/xx.xxx.xx + * + * On exit: + * Returns a malloc'ed string which must be freed by the caller. + */ +char *HTCacheFileName(const char *name) +{ + char *acc_method = HTParse(name, "", PARSE_ACCESS); + char *host = HTParse(name, "", PARSE_HOST); + char *path = HTParse(name, "", PARSE_PATH + PARSE_PUNCTUATION); + char *result = NULL; + + HTSprintf0(&result, "%s/WWW/%s/%s%s", HTCacheRoot, acc_method, host, path); + + FREE(path); + FREE(acc_method); + FREE(host); + return result; +} + +/* Open a file for write, creating the path. + * ----------------------------------------- + */ +#ifdef NOT_IMPLEMENTED +static int HTCreatePath(const char *path) +{ + return -1; +} +#endif /* NOT_IMPLEMENTED */ + +/* Convert filename from URL-path syntax to local path format + * ---------------------------------------------------------- + * Input name is assumed to be the URL-path of a local file + * URL, i.e. what comes after the "file://localhost". + * '#'-fragments to be treated as such must already be stripped. + * If expand_all is FALSE, unescape only escaped '/'. - kw + * + * On exit: + * Returns a malloc'ed string which must be freed by the caller. + */ +char *HTURLPath_toFile(const char *name, + int expand_all, + int is_remote GCC_UNUSED) +{ + char *path = NULL; + char *result = NULL; + + StrAllocCopy(path, name); + if (expand_all) + HTUnEscape(path); /* Interpret all % signs */ + else + HTUnEscapeSome(path, "/"); /* Interpret % signs for path delims */ + + CTRACE((tfp, "URLPath `%s' means path `%s'\n", name, path)); +#if defined(USE_DOS_DRIVES) + StrAllocCopy(result, is_remote ? path : HTDOS_name(path)); +#else + StrAllocCopy(result, path); +#endif + + FREE(path); + + return result; +} +/* Convert filenames between local and WWW formats. + * ------------------------------------------------ + * Make up a suitable name for saving the node in + * + * E.g. $(HOME)/WWW/news/1234@cernvax.cern.ch + * $(HOME)/WWW/http/crnvmc/FIND/xx.xxx.xx + * + * On exit: + * Returns a malloc'ed string which must be freed by the caller. + */ +/* NOTE: Don't use this function if you know that the input is a URL path + rather than a full URL, use HTURLPath_toFile instead. Otherwise + this function will return the wrong thing for some unusual + paths (like ones containing "//", possibly escaped). - kw +*/ +char *HTnameOfFile_WWW(const char *name, + int WWW_prefix, + int expand_all) +{ + char *acc_method = HTParse(name, "", PARSE_ACCESS); + char *host = HTParse(name, "", PARSE_HOST); + char *path = HTParse(name, "", PARSE_PATH + PARSE_PUNCTUATION); + const char *home; + char *result = NULL; + + if (expand_all) { + HTUnEscape(path); /* Interpret all % signs */ + } else + HTUnEscapeSome(path, "/"); /* Interpret % signs for path delims */ + + if (0 == strcmp(acc_method, "file") /* local file */ + ||!*acc_method) { /* implicitly local? */ + if ((0 == strcasecomp(host, HTHostName())) || + (0 == strcasecomp(host, "localhost")) || !*host) { + CTRACE((tfp, "Node `%s' means path `%s'\n", name, path)); + StrAllocCopy(result, HTSYS_name(path)); + } else if (WWW_prefix) { + HTSprintf0(&result, "%s%s%s", "/Net/", host, path); + CTRACE((tfp, "Node `%s' means file `%s'\n", name, result)); + } else { + StrAllocCopy(result, path); + } + } else if (WWW_prefix) { /* other access */ +#ifdef VMS + if ((home = LYGetEnv("HOME")) == NULL) + home = HTCacheRoot; + else + home = HTVMS_wwwName(home); +#else +#if defined(_WINDOWS) /* 1997/10/16 (Thu) 20:42:51 */ + home = Home_Dir(); +#else + home = LYGetEnv("HOME"); +#endif + if (home == NULL) + home = "/tmp"; +#endif /* VMS */ + HTSprintf0(&result, "%s/WWW/%s/%s%s", home, acc_method, host, path); + } else { + StrAllocCopy(result, path); + } + + FREE(host); + FREE(path); + FREE(acc_method); + + CTRACE((tfp, "HTnameOfFile_WWW(%s,%d,%d) = %s\n", + name, WWW_prefix, expand_all, result)); + + return result; +} + +/* Make a WWW name from a full local path name. + * -------------------------------------------- + * + * Bugs: + * At present, only the names of two network root nodes are hand-coded + * in and valid for the NeXT only. This should be configurable in + * the general case. + */ +char *WWW_nameOfFile(const char *name) +{ + char *result = NULL; + +#ifdef NeXT + if (0 == StrNCmp("/private/Net/", name, 13)) { + HTSprintf0(&result, "%s//%s", STR_FILE_URL, name + 13); + } else +#endif /* NeXT */ + if (0 == StrNCmp(HTMountRoot, name, 5)) { + HTSprintf0(&result, "%s//%s", STR_FILE_URL, name + 5); + } else { + HTSprintf0(&result, "%s//%s%s", STR_FILE_URL, HTHostName(), name); + } + CTRACE((tfp, "File `%s'\n\tmeans node `%s'\n", name, result)); + return result; +} + +/* Determine a suitable suffix, given the representation. + * ------------------------------------------------------ + * + * On entry, + * rep is the atomized MIME style representation + * enc is an encoding, trivial (8bit, binary, etc.) or gzip etc. + * + * On exit: + * Returns a pointer to a suitable suffix string if one has been + * found, else "". + */ +const char *HTFileSuffix(HTAtom *rep, + const char *enc) +{ + HTSuffix *suff; + +#ifdef FNAMES_8_3 + HTSuffix *first_found = NULL; +#endif + BOOL trivial_enc; + int n; + int i; + +#define NO_INIT /* don't init anymore since I do it in Lynx at startup */ +#ifndef NO_INIT + if (!HTSuffixes) + HTFileInit(); +#endif /* !NO_INIT */ + + trivial_enc = (BOOL) IsUnityEncStr(enc); + n = HTList_count(HTSuffixes); + for (i = 0; i < n; i++) { + suff = (HTSuffix *) HTList_objectAt(HTSuffixes, i); + if (suff->rep == rep && +#if defined(VMS) || defined(FNAMES_8_3) + /* Don't return a suffix whose first char is a dot, and which + has more dots or asterisks after that, for + these systems - kw */ + (!suff->suffix || !suff->suffix[0] || suff->suffix[0] != '.' || + (strchr(suff->suffix + 1, '.') == NULL && + strchr(suff->suffix + 1, '*') == NULL)) && +#endif + ((trivial_enc && IsUnityEnc(suff->encoding)) || + (!trivial_enc && !IsUnityEnc(suff->encoding) && + strcmp(enc, HTAtom_name(suff->encoding)) == 0))) { +#ifdef FNAMES_8_3 + if (suff->suffix && (strlen(suff->suffix) <= 4)) { + /* + * If length of suffix (including dot) is 4 or smaller, return + * this one even if we found a longer one earlier - kw + */ + return suff->suffix; + } else if (!first_found) { + first_found = suff; /* remember this one */ + } +#else + return suff->suffix; /* OK -- found */ +#endif + } + } +#ifdef FNAMES_8_3 + if (first_found) + return first_found->suffix; +#endif + return ""; /* Dunno */ +} + +/* + * Trim version from VMS filenames to avoid confusing comparisons. + */ +#ifdef VMS +static const char *VMS_trim_version(const char *filename) +{ + const char *result = filename; + const char *version = strchr(filename, ';'); + + if (version != 0) { + static char *stripped; + + StrAllocCopy(stripped, filename); + stripped[version - filename] = '\0'; + result = (const char *) stripped; + } + return result; +} +#define VMS_DEL_VERSION(name) name = VMS_trim_version(name) +#else +#define VMS_DEL_VERSION(name) /* nothing */ +#endif + +/* Determine file format from file name. + * ------------------------------------- + * + * This version will return the representation and also set + * a variable for the encoding. + * + * Encoding may be a unity encoding (binary, 8bit, etc.) or + * a content-coding like gzip, compress. + * + * It will handle for example x.txt, x.txt,Z, x.Z + */ +HTFormat HTFileFormat(const char *filename, + HTAtom **pencoding, + const char **pdesc) +{ + HTSuffix *suff; + int n; + int i; + int lf; + + VMS_DEL_VERSION(filename); + + if (pencoding) + *pencoding = NULL; + if (pdesc) + *pdesc = NULL; + if (LYforce_HTML_mode) { + if (pencoding) + *pencoding = WWW_ENC_8BIT; + return WWW_HTML; + } +#ifndef NO_INIT + if (!HTSuffixes) + HTFileInit(); +#endif /* !NO_INIT */ + lf = (int) strlen(filename); + n = HTList_count(HTSuffixes); + for (i = 0; i < n; i++) { + int ls; + + suff = (HTSuffix *) HTList_objectAt(HTSuffixes, i); + ls = (int) strlen(suff->suffix); + if ((ls <= lf) && 0 == strcasecomp(suff->suffix, filename + lf - ls)) { + int j; + + if (pencoding) + *pencoding = suff->encoding; + if (pdesc) + *pdesc = suff->desc; + if (suff->rep) { + return suff->rep; /* OK -- found */ + } + for (j = 0; j < n; j++) { /* Got encoding, need representation */ + int ls2; + + suff = (HTSuffix *) HTList_objectAt(HTSuffixes, j); + ls2 = (int) strlen(suff->suffix); + if ((ls + ls2 <= lf) && + !strncasecomp(suff->suffix, + filename + lf - ls - ls2, ls2)) { + if (suff->rep) { + if (pdesc && !(*pdesc)) + *pdesc = suff->desc; + if (pencoding && IsUnityEnc(*pencoding) && + *pencoding != WWW_ENC_7BIT && + !IsUnityEnc(suff->encoding)) + *pencoding = suff->encoding; + return suff->rep; + } + } + } + + } + } + + /* defaults tree */ + + suff = (strchr(filename, '.') + ? (unknown_suffix.rep + ? &unknown_suffix + : &no_suffix) + : &no_suffix); + + /* + * Set default encoding unless found with suffix already. + */ + if (pencoding && !*pencoding) { + *pencoding = (suff->encoding + ? suff->encoding + : HTAtom_for("binary")); + } + return suff->rep ? suff->rep : WWW_BINARY; +} + +/* Revise the file format in relation to the Lynx charset. - FM + * ------------------------------------------------------- + * + * This checks the format associated with an anchor for + * an extended MIME Content-Type, and if a charset is + * indicated, sets Lynx up for proper handling in relation + * to the currently selected character set. - FM + */ +HTFormat HTCharsetFormat(HTFormat format, + HTParentAnchor *anchor, + int default_LYhndl) +{ + char *cp = NULL, *cp1, *cp2, *cp3 = NULL, *cp4; + BOOL chartrans_ok = FALSE; + int chndl = -1; + + FREE(anchor->charset); + StrAllocCopy(cp, format->name); + LYLowerCase(cp); + if (((cp1 = strchr(cp, ';')) != NULL) && + (cp2 = strstr(cp1, "charset")) != NULL) { + CTRACE((tfp, "HTCharsetFormat: Extended MIME Content-Type is %s\n", + format->name)); + cp2 += 7; + while (*cp2 == ' ' || *cp2 == '=') + cp2++; + StrAllocCopy(cp3, cp2); /* copy to mutilate more */ + for (cp4 = cp3; (*cp4 != '\0' && *cp4 != '"' && + *cp4 != ';' && *cp4 != ':' && + !WHITE(*cp4)); cp4++) { + ; /* do nothing */ + } + *cp4 = '\0'; + cp4 = cp3; + chndl = UCGetLYhndl_byMIME(cp3); + if (UCCanTranslateFromTo(chndl, current_char_set)) { + chartrans_ok = YES; + *cp1 = '\0'; + format = HTAtom_for(cp); + StrAllocCopy(anchor->charset, cp4); + HTAnchor_setUCInfoStage(anchor, chndl, + UCT_STAGE_MIME, + UCT_SETBY_MIME); + } else if (chndl < 0) { + /* + * Got something but we don't recognize it. + */ + chndl = UCLYhndl_for_unrec; + if (chndl < 0) + /* + * UCLYhndl_for_unrec not defined :-( fallback to + * UCLYhndl_for_unspec which always valid. + */ + chndl = UCLYhndl_for_unspec; /* always >= 0 */ + if (UCCanTranslateFromTo(chndl, current_char_set)) { + chartrans_ok = YES; + HTAnchor_setUCInfoStage(anchor, chndl, + UCT_STAGE_MIME, + UCT_SETBY_DEFAULT); + } + } + if (chartrans_ok) { + LYUCcharset *p_in = HTAnchor_getUCInfoStage(anchor, + UCT_STAGE_MIME); + LYUCcharset *p_out = HTAnchor_setUCInfoStage(anchor, + current_char_set, + UCT_STAGE_HTEXT, + UCT_SETBY_DEFAULT); + + if (!p_out) { + /* + * Try again. + */ + p_out = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_HTEXT); + } + if (!strcmp(p_in->MIMEname, "x-transparent")) { + HTPassEightBitRaw = TRUE; + HTAnchor_setUCInfoStage(anchor, + HTAnchor_getUCLYhndl(anchor, + UCT_STAGE_HTEXT), + UCT_STAGE_MIME, + UCT_SETBY_DEFAULT); + } + if (!strcmp(p_out->MIMEname, "x-transparent")) { + HTPassEightBitRaw = TRUE; + HTAnchor_setUCInfoStage(anchor, + HTAnchor_getUCLYhndl(anchor, + UCT_STAGE_MIME), + UCT_STAGE_HTEXT, + UCT_SETBY_DEFAULT); + } + if (p_in->enc != UCT_ENC_CJK) { + HTCJK = NOCJK; + if (!(p_in->codepoints & + UCT_CP_SUBSETOF_LAT1) && + chndl == current_char_set) { + HTPassEightBitRaw = TRUE; + } + } else if (p_out->enc == UCT_ENC_CJK) { + Set_HTCJK(p_in->MIMEname, p_out->MIMEname); + } + } else { + /* + * Cannot translate. If according to some heuristic the given + * charset and the current display character both are likely to be + * like ISO-8859 in structure, pretend we have some kind of match. + */ + BOOL given_is_8859 = (BOOL) (!StrNCmp(cp4, "iso-8859-", 9) && + isdigit(UCH(cp4[9]))); + BOOL given_is_8859like = (BOOL) (given_is_8859 || + !StrNCmp(cp4, "windows-", 8) || + !StrNCmp(cp4, "cp12", 4) || + !StrNCmp(cp4, "cp-12", 5)); + BOOL given_and_display_8859like = (BOOL) (given_is_8859like && + (strstr(LYchar_set_names[current_char_set], + "ISO-8859") || + strstr(LYchar_set_names[current_char_set], + "windows-"))); + + if (given_and_display_8859like) { + *cp1 = '\0'; + format = HTAtom_for(cp); + } + if (given_is_8859) { + cp1 = &cp4[10]; + while (*cp1 && + isdigit(UCH(*cp1))) + cp1++; + *cp1 = '\0'; + } + if (given_and_display_8859like) { + StrAllocCopy(anchor->charset, cp4); + HTPassEightBitRaw = TRUE; + } + HTAlert(*cp4 ? cp4 : anchor->charset); + } + FREE(cp3); + } else if (cp1 != NULL) { + /* + * No charset parameter is present. Ignore all other parameters, as we + * do when charset is present. - FM + */ + *cp1 = '\0'; + format = HTAtom_for(cp); + } + FREE(cp); + + /* + * Set up defaults, if needed. - FM + */ + if (!chartrans_ok && !anchor->charset && default_LYhndl >= 0) { + HTAnchor_setUCInfoStage(anchor, default_LYhndl, + UCT_STAGE_MIME, + UCT_SETBY_DEFAULT); + } + HTAnchor_copyUCInfoStage(anchor, + UCT_STAGE_PARSER, + UCT_STAGE_MIME, + -1); + + return format; +} + +/* Get various pieces of meta info from file name. + * ----------------------------------------------- + * + * LYGetFileInfo fills in information that can be determined without + * an actual (new) access to the filesystem, based on current suffix + * and character set configuration. If the file has been loaded and + * parsed before (with the same URL generated here!) and the anchor + * is still around, some results may be influenced by that (in + * particular, charset info from a META tag - this is not actually + * tested!). + * The caller should not keep pointers to the returned objects around + * for too long, the valid lifetimes vary. In particular, the returned + * charset string should be copied if necessary. If return of the + * file_anchor is requested, that one can be used to retrieve + * additional bits of info that are stored in the anchor object and + * are not covered here; as usual, don't keep pointers to the + * file_anchor longer than necessary since the object may disappear + * through HTuncache_current_document or at the next document load. + * - kw + */ +void LYGetFileInfo(const char *filename, + HTParentAnchor **pfile_anchor, + HTFormat *pformat, + HTAtom **pencoding, + const char **pdesc, + const char **pcharset, + int *pfile_cs) +{ + char *Afn; + char *Aname = NULL; + HTFormat format; + HTAtom *myEnc = NULL; + HTParentAnchor *file_anchor; + const char *file_csname; + int file_cs; + + /* + * Convert filename to URL. Note that it is always supposed to be a + * filename, not maybe-filename-maybe-URL, so we don't use + * LYFillLocalFileURL and LYEnsureAbsoluteURL. - kw + */ + Afn = HTEscape(filename, URL_PATH); + LYLocalFileToURL(&Aname, Afn); + file_anchor = HTAnchor_findSimpleAddress(Aname); + + file_csname = file_anchor->charset; + format = HTFileFormat(filename, &myEnc, pdesc); + format = HTCharsetFormat(format, file_anchor, UCLYhndl_HTFile_for_unspec); + file_cs = HTAnchor_getUCLYhndl(file_anchor, UCT_STAGE_MIME); + if (!file_csname) { + if (file_cs >= 0) + file_csname = LYCharSet_UC[file_cs].MIMEname; + else + file_csname = "display character set"; + } + CTRACE((tfp, "GetFileInfo: '%s' is a%s %s %s file, charset=%s (%d).\n", + filename, + ((myEnc && *HTAtom_name(myEnc) == '8') ? "n" : myEnc ? "" : + *HTAtom_name(format) == 'a' ? "n" : ""), + myEnc ? HTAtom_name(myEnc) : "", + HTAtom_name(format), + file_csname, + file_cs)); + FREE(Afn); + FREE(Aname); + if (pfile_anchor) + *pfile_anchor = file_anchor; + if (pformat) + *pformat = format; + if (pencoding) + *pencoding = myEnc; + if (pcharset) + *pcharset = file_csname; + if (pfile_cs) + *pfile_cs = file_cs; +} + +/* Determine value from file name. + * ------------------------------- + * + */ +float HTFileValue(const char *filename) +{ + HTSuffix *suff; + int n; + int i; + int lf = (int) strlen(filename); + +#ifndef NO_INIT + if (!HTSuffixes) + HTFileInit(); +#endif /* !NO_INIT */ + n = HTList_count(HTSuffixes); + for (i = 0; i < n; i++) { + int ls; + + suff = (HTSuffix *) HTList_objectAt(HTSuffixes, i); + ls = (int) strlen(suff->suffix); + if ((ls <= lf) && 0 == strcmp(suff->suffix, filename + lf - ls)) { + CTRACE((tfp, "File: Value of %s is %.3f\n", + filename, suff->quality)); + return suff->quality; /* OK -- found */ + } + } + return (float) 0.3; /* Dunno! */ +} + +/* + * Determine compression type from file name, by looking at its suffix. + * Sets as side-effect a pointer to the "dot" that begins the suffix. + */ +CompressFileType HTCompressFileType(const char *filename, + const char *dots, + int *rootlen) +{ + CompressFileType result = cftNone; + size_t len = strlen(filename); + const char *ftype = filename + len; + + VMS_DEL_VERSION(filename); + + if ((len > 4) + && !strcasecomp((ftype - 3), "bz2") + && strchr(dots, ftype[-4]) != 0) { + result = cftBzip2; + ftype -= 4; + } else if ((len > 3) + && !strcasecomp((ftype - 2), "gz") + && strchr(dots, ftype[-3]) != 0) { + result = cftGzip; + ftype -= 3; + } else if ((len > 3) + && !strcasecomp((ftype - 2), "zz") + && strchr(dots, ftype[-3]) != 0) { + result = cftDeflate; + ftype -= 3; + } else if ((len > 2) + && !strcmp((ftype - 1), "Z") + && strchr(dots, ftype[-2]) != 0) { + result = cftCompress; + ftype -= 2; + } + + *rootlen = (int) (ftype - filename); + + CTRACE((tfp, "HTCompressFileType(%s) returns %d:%s\n", + filename, (int) result, filename + *rootlen)); + return result; +} + +/* + * Determine expected file-suffix from the compression method. + */ +const char *HTCompressTypeToSuffix(CompressFileType method) +{ + const char *result = ""; + + switch (method) { + default: + case cftNone: + result = ""; + break; + case cftGzip: + result = ".gz"; + break; + case cftCompress: + result = ".Z"; + break; + case cftBzip2: + result = ".bz2"; + break; + case cftDeflate: + result = ".zz"; + break; + } + return result; +} + +/* + * Determine compression encoding from the compression method. + */ +const char *HTCompressTypeToEncoding(CompressFileType method) +{ + const char *result = NULL; + + switch (method) { + default: + case cftNone: + result = NULL; + break; + case cftGzip: + result = "gzip"; + break; + case cftCompress: + result = "compress"; + break; + case cftBzip2: + result = "bzip2"; + break; + case cftDeflate: + result = "deflate"; + break; + } + return result; +} + +/* + * Check if the token from "Content-Encoding" corresponds to a compression + * type. RFC 2068 (and cut/paste into RFC 2616) lists these: + * gzip + * compress + * deflate + * as well as "identity" (but that does nothing). + */ +CompressFileType HTEncodingToCompressType(const char *coding) +{ + CompressFileType result = cftNone; + + if (coding == NULL) { + result = cftNone; + } else if (!strcasecomp(coding, "gzip") || + !strcasecomp(coding, "x-gzip")) { + result = cftGzip; + } else if (!strcasecomp(coding, "compress") || + !strcasecomp(coding, "x-compress")) { + result = cftCompress; + } else if (!strcasecomp(coding, "bzip2") || + !strcasecomp(coding, "x-bzip2")) { + result = cftBzip2; + } else if (!strcasecomp(coding, "deflate") || + !strcasecomp(coding, "x-deflate")) { + result = cftDeflate; + } + return result; +} + +CompressFileType HTContentTypeToCompressType(const char *ct) +{ + CompressFileType method = cftNone; + + if (ct == NULL) { + method = cftNone; + } else if (!strncasecomp(ct, "application/gzip", 16) || + !strncasecomp(ct, "application/x-gzip", 18)) { + method = cftGzip; + } else if (!strncasecomp(ct, "application/compress", 20) || + !strncasecomp(ct, "application/x-compress", 22)) { + method = cftCompress; + } else if (!strncasecomp(ct, "application/bzip2", 17) || + !strncasecomp(ct, "application/x-bzip2", 19)) { + method = cftBzip2; + } + return method; +} + +/* + * Check the anchor's content_type and content_encoding elements for a gzip or + * Unix compressed file -FM, TD + */ +CompressFileType HTContentToCompressType(HTParentAnchor *anchor) +{ + CompressFileType method = cftNone; + const char *ct = HTAnchor_content_type(anchor); + const char *ce = HTAnchor_content_encoding(anchor); + + if (ce == NULL && ct != 0) { + method = HTContentTypeToCompressType(ct); + } else if (ce != 0) { + method = HTEncodingToCompressType(ce); + } + return method; +} + +/* Determine write access to a file. + * --------------------------------- + * + * On exit: + * Returns YES if file can be accessed and can be written to. + * + * Bugs: + * 1. No code for non-unix systems. + * 2. Isn't there a quicker way? + */ +BOOL HTEditable(const char *filename GCC_UNUSED) +{ +#ifndef NO_GROUPS + GETGROUPS_T groups[NGROUPS]; + uid_t myUid; + int ngroups; /* The number of groups */ + struct stat fileStatus; + int i; + + if (stat(filename, &fileStatus)) /* Get details of filename */ + return NO; /* Can't even access file! */ + + ngroups = getgroups(NGROUPS, groups); /* Groups to which I belong */ + myUid = geteuid(); /* Get my user identifier */ + + if (TRACE) { + int i2; + + fprintf(tfp, + "File mode is 0%o, uid=%d, gid=%d. My uid=%d, %d groups (", + (unsigned int) fileStatus.st_mode, + (int) fileStatus.st_uid, + (int) fileStatus.st_gid, + (int) myUid, + (int) ngroups); + for (i2 = 0; i2 < ngroups; i2++) + fprintf(tfp, " %d", (int) groups[i2]); + fprintf(tfp, ")\n"); + } + + if (fileStatus.st_mode & 0002) /* I can write anyway? */ + return YES; + + if ((fileStatus.st_mode & 0200) /* I can write my own file? */ + &&(fileStatus.st_uid == myUid)) + return YES; + + if (fileStatus.st_mode & 0020) /* Group I am in can write? */ + { + for (i = 0; i < ngroups; i++) { + if (groups[i] == fileStatus.st_gid) + return YES; + } + } + CTRACE((tfp, "\tFile is not editable.\n")); +#endif /* NO_GROUPS */ + return NO; /* If no excuse, can't do */ +} + +/* Make a save stream. + * ------------------- + * + * The stream must be used for writing back the file. + * @@@ no backup done + */ +HTStream *HTFileSaveStream(HTParentAnchor *anchor) +{ + const char *addr = anchor->address; + char *localname = HTLocalName(addr); + FILE *fp = fopen(localname, BIN_W); + + FREE(localname); + if (!fp) + return NULL; + + return HTFWriter_new(fp); +} + +/* Output one directory entry. + * --------------------------- + */ +void HTDirEntry(HTStructured * target, const char *tail, + const char *entry) +{ + char *relative = NULL; + char *stripped = NULL; + char *escaped = NULL; + int len; + + StrAllocCopy(escaped, entry); + LYTrimPathSep(escaped); + if (strcmp(escaped, "..") != 0) { + stripped = escaped; + escaped = HTEscape(stripped, URL_XPALPHAS); + if (((len = (int) strlen(escaped)) > 2) && + escaped[(len - 3)] == '%' && + escaped[(len - 2)] == '2' && + TOUPPER(escaped[(len - 1)]) == 'F') { + escaped[(len - 3)] = '\0'; + } + } + + if (isEmpty(tail)) { + /* + * Handle extra slash at end of path. + */ + HTStartAnchor(target, NULL, (escaped[0] != '\0' ? escaped : "/")); + } else { + /* + * If empty tail, gives absolute ref below. + */ + relative = 0; + HTSprintf0(&relative, "%s%s%s", + tail, + (*escaped != '\0' ? "/" : ""), + escaped); + HTStartAnchor(target, NULL, relative); + FREE(relative); + } + FREE(stripped); + FREE(escaped); +} + +static BOOL view_structured(HTFormat format_out) +{ + BOOL result = FALSE; + +#ifdef USE_PRETTYSRC + if (psrc_view + || (format_out == HTAtom_for("www/dump"))) + result = TRUE; +#else + if (format_out == WWW_SOURCE) + result = TRUE; +#endif + return result; +} + +/* + * Write a DOCTYPE to the given stream if we happen to want to see the + * source view, or are dumping source. This is not needed when the source + * is not visible, since the document is rendered from a HTStructured object. + */ +void HTStructured_doctype(HTStructured * target, HTFormat format_out) +{ + if (view_structured(format_out)) + PUTS("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n"); +} + +void HTStructured_meta(HTStructured * target, HTFormat format_out) +{ + if (view_structured(format_out)) + PUTS("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">\n"); +} +/* Output parent directory entry. + * ------------------------------ + * + * This gives the TITLE and H1 header, and also a link + * to the parent directory if appropriate. + * + * On exit: + * Returns TRUE if an "Up to <parent>" link was not created + * for a readable local directory because LONG_LIST is defined + * and NO_PARENT_DIR_REFERENCE is not defined, so that the + * calling function should use LYListFmtParse() to create a link + * to the parent directory. Otherwise, it returns FALSE. - FM + */ +BOOL HTDirTitles(HTStructured * target, HTParentAnchor *anchor, + HTFormat format_out, + int tildeIsTop) +{ + const char *logical = anchor->address; + char *path = HTParse(logical, "", PARSE_PATH + PARSE_PUNCTUATION); + char *current; + char *cp = NULL; + BOOL need_parent_link = FALSE; + int i; + +#if defined(USE_DOS_DRIVES) + BOOL local_link = (strlen(logical) > 18 + && !strncasecomp(logical, "file://localhost/", 17) + && LYIsDosDrive(logical + 17)); + BOOL is_remote = !local_link; + +#else +#define is_remote TRUE +#endif + + /* + * Check tildeIsTop for treating home directory as Welcome (assume the + * tilde is not followed by a username). - FM + */ + if (tildeIsTop && !StrNCmp(path, "/~", 2)) { + if (path[2] == '\0') { + path[1] = '\0'; + } else { + for (i = 0; path[(i + 2)]; i++) { + path[i] = path[(i + 2)]; + } + path[i] = '\0'; + } + } + + /* + * Trim out the ;type= parameter, if present. - FM + */ + if ((cp = strrchr(path, ';')) != NULL) { + if (!strncasecomp((cp + 1), "type=", 5)) { + if (TOUPPER(*(cp + 6)) == 'D' || + TOUPPER(*(cp + 6)) == 'A' || + TOUPPER(*(cp + 6)) == 'I') + *cp = '\0'; + } + cp = NULL; + } + current = LYPathLeaf(path); /* last part or "" */ + + { + char *printable = NULL; + +#ifdef DIRED_SUPPORT + printable = HTURLPath_toFile(((!strncasecomp(path, "/%2F", 4)) /* "//" ? */ + ? (path + 1) + : path), + TRUE, + is_remote); + if (0 == strncasecomp(printable, "/vmsysu:", 8) || + 0 == strncasecomp(printable, "/anonymou.", 10)) { + StrAllocCopy(cp, (printable + 1)); + StrAllocCopy(printable, cp); + FREE(cp); + } +#else + StrAllocCopy(printable, current); + HTUnEscape(printable); +#endif /* DIRED_SUPPORT */ + + HTStructured_doctype(target, format_out); + + START(HTML_HEAD); + PUTC('\n'); + START(HTML_TITLE); + PUTS(*printable ? printable : WELCOME_MSG); + PUTS(SEGMENT_DIRECTORY); + END(HTML_TITLE); + PUTC('\n'); + HTStructured_meta(target, format_out); + END(HTML_HEAD); + PUTC('\n'); + + START(HTML_BODY); + PUTC('\n'); + +#ifdef DIRED_SUPPORT + START(HTML_H2); + PUTS(*printable ? SEGMENT_CURRENT_DIR : ""); + PUTS(*printable ? printable : WELCOME_MSG); + END(HTML_H2); + PUTC('\n'); +#else + START(HTML_H1); + PUTS(*printable ? printable : WELCOME_MSG); + END(HTML_H1); + PUTC('\n'); +#endif /* DIRED_SUPPORT */ + if (((0 == strncasecomp(printable, "vmsysu:", 7)) && + (cp = strchr(printable, '.')) != NULL && + strchr(cp, '/') == NULL) || + (0 == strncasecomp(printable, "anonymou.", 9) && + strchr(printable, '/') == NULL)) { + FREE(printable); + FREE(path); + return (need_parent_link); + } + FREE(printable); + } + +#ifndef NO_PARENT_DIR_REFERENCE + /* + * Make link back to parent directory. + */ + if (current - path > 0 + && LYIsPathSep(current[-1]) + && current[0] != '\0') { /* was a slash AND something else too */ + char *parent = NULL; + char *relative = NULL; + + current[-1] = '\0'; + parent = strrchr(path, '/'); /* penultimate slash */ + + if ((parent && + (!strcmp(parent, "/..") || + !strncasecomp(parent, "/%2F", 4))) || + !strncasecomp(current, "%2F", 3)) { + FREE(path); + return (need_parent_link); + } + + relative = 0; + HTSprintf0(&relative, "%s/..", current); + +#if defined(DOSPATH) || defined(__EMX__) + if (local_link) { + if (parent != 0 && strlen(parent) == 3) { + StrAllocCat(relative, "/."); + } + } else +#endif + +#if !defined (VMS) + { + /* + * On Unix, if it's not ftp and the directory cannot be read, don't + * put out a link. + * + * On VMS, this problem is dealt with internally by + * HTVMSBrowseDir(). + */ + DIR *dp = NULL; + + if (LYisLocalFile(logical)) { + /* + * We need an absolute file path for the opendir. We also need + * to unescape for this test. Don't worry about %2F now, they + * presumably have been dealt with above, and shouldn't appear + * for local files anyway... Assume OS / filesystem will just + * ignore superfluous slashes. - KW + */ + char *fullparentpath = NULL; + + /* + * Path has been shortened above. + */ + StrAllocCopy(fullparentpath, *path ? path : "/"); + + /* + * Guard against weirdness. + */ + if (0 == strcmp(current, "..")) { + StrAllocCat(fullparentpath, "/../.."); + } else if (0 == strcmp(current, ".")) { + StrAllocCat(fullparentpath, "/.."); + } + + HTUnEscape(fullparentpath); + if ((dp = opendir(fullparentpath)) == NULL) { + FREE(fullparentpath); + FREE(relative); + FREE(path); + return (need_parent_link); + } + closedir(dp); + FREE(fullparentpath); +#ifdef LONG_LIST + need_parent_link = TRUE; + FREE(path); + FREE(relative); + return (need_parent_link); +#endif /* LONG_LIST */ + } + } +#endif /* !VMS */ + HTStartAnchor(target, "", relative); + FREE(relative); + + PUTS(SEGMENT_UP_TO); + if (parent) { + if ((0 == strcmp(current, ".")) || + (0 == strcmp(current, ".."))) { + /* + * Should not happen, but if it does, at least avoid giving + * misleading info. - KW + */ + PUTS(".."); + } else { + char *printable = NULL; + + StrAllocCopy(printable, parent + 1); + HTUnEscape(printable); + PUTS(printable); + FREE(printable); + } + } else { + PUTC('/'); + } + END(HTML_A); + PUTC('\n'); + } +#endif /* !NO_PARENT_DIR_REFERENCE */ + + FREE(path); + return (need_parent_link); +} + +#if defined HAVE_READDIR +/* Send README file. + * ----------------- + * + * If a README file exists, then it is inserted into the document here. + */ +static void do_readme(HTStructured * target, const char *localname) +{ + FILE *fp; + char *readme_file_name = NULL; + int ch; + + HTSprintf0(&readme_file_name, "%s/%s", localname, HT_DIR_README_FILE); + + fp = fopen(readme_file_name, "r"); + + if (fp) { + START(HTML_PRE); + while ((ch = fgetc(fp)) != EOF) { + PUTC((char) ch); + } + END(HTML_PRE); + HTDisplayPartial(); + fclose(fp); + } + FREE(readme_file_name); +} + +#define DIRED_BLOK(obj) (((DIRED *)(obj))->sort_tags) +#define DIRED_NAME(obj) (((DIRED *)(obj))->file_name) + +#define NM_cmp(a,b) ((a) < (b) ? -1 : ((a) > (b) ? 1 : 0)) + +#if defined(LONG_LIST) && defined(DIRED_SUPPORT) +static const char *file_type(const char *path) +{ + const char *type; + + while (*path == '.') + ++path; + type = strchr(path, '.'); + if (type == NULL) + type = ""; + return type; +} +#endif /* LONG_LIST && DIRED_SUPPORT */ + +static int dired_cmp(void *a, void *b) +{ + DIRED *p = (DIRED *) a; + DIRED *q = (DIRED *) b; + int code = p->sort_tags - q->sort_tags; + +#if defined(LONG_LIST) && defined(DIRED_SUPPORT) + if (code == 0) { + switch (dir_list_order) { + case ORDER_BY_SIZE: + code = -NM_cmp(p->file_info.st_size, q->file_info.st_size); + break; + case ORDER_BY_DATE: + code = -NM_cmp(p->file_info.st_mtime, q->file_info.st_mtime); + break; + case ORDER_BY_MODE: + code = NM_cmp(p->file_info.st_mode, q->file_info.st_mode); + break; + case ORDER_BY_USER: + code = NM_cmp(p->file_info.st_uid, q->file_info.st_uid); + break; + case ORDER_BY_GROUP: + code = NM_cmp(p->file_info.st_gid, q->file_info.st_gid); + break; + case ORDER_BY_TYPE: + code = AS_cmp(file_type(p->file_name), file_type(q->file_name)); + break; + default: + code = 0; + break; + } + } +#endif /* LONG_LIST && DIRED_SUPPORT */ + if (code == 0) + code = AS_cmp(p->file_name, q->file_name); +#if 0 + CTRACE((tfp, "dired_cmp(%d) ->%d\n\t%c:%s (%s)\n\t%c:%s (%s)\n", + dir_list_order, + code, + p->sort_tags, p->file_name, file_type(p->file_name), + q->sort_tags, q->file_name, file_type(q->file_name))); +#endif + return code; +} + +static int print_local_dir(DIR *dp, char *localname, + HTParentAnchor *anchor, + HTFormat format_out, + HTStream *sink) +{ + HTStructured *target; /* HTML object */ + HTBTree *bt; + HTStructuredClass targetClass; + STRUCT_DIRENT *dirbuf; + char *pathname = NULL; + char *tail = NULL; + const char *p; + char *tmpfilename = NULL; + BOOL need_parent_link = FALSE; + BOOL preformatted = FALSE; + int status; + struct stat *actual_info; + +#ifdef DISP_PARTIAL + int num_of_entries = 0; /* lines counter */ +#endif + +#ifdef S_IFLNK + struct stat link_info; +#endif + + CTRACE((tfp, "print_local_dir() started\n")); + + pathname = HTParse(anchor->address, "", + PARSE_PATH + PARSE_PUNCTUATION); + + if ((p = strrchr(pathname, '/')) == NULL) + p = "/"; + StrAllocCopy(tail, (p + 1)); + FREE(pathname); + + if (UCLYhndl_HTFile_for_unspec >= 0) { + HTAnchor_setUCInfoStage(anchor, + UCLYhndl_HTFile_for_unspec, + UCT_STAGE_PARSER, + UCT_SETBY_DEFAULT); + } + + target = HTML_new(anchor, format_out, sink); + targetClass = *target->isa; /* Copy routine entry points */ + + /* + * The need_parent_link flag will be set if an "Up to <parent>" link was + * not created for a readable parent in HTDirTitles() because LONG_LIST is + * defined and NO_PARENT_DIR_REFERENCE is not defined so that need we to + * create the link via an LYListFmtParse() call. - FM + */ + need_parent_link = HTDirTitles(target, anchor, format_out, FALSE); + +#ifdef DIRED_SUPPORT + if (!isLYNXCGI(anchor->address)) { + HTAnchor_setFormat(anchor, WWW_DIRED); + lynx_edit_mode = TRUE; + } +#endif /* DIRED_SUPPORT */ + if (HTDirReadme == HT_DIR_README_TOP) + do_readme(target, localname); + + bt = HTBTree_new(dired_cmp); + + _HTProgress(READING_DIRECTORY); + status = HT_LOADED; /* assume we don't get interrupted */ + while ((dirbuf = readdir(dp)) != NULL) { + /* + * While there are directory entries to be read... + */ + DIRED *data = NULL; + +#ifdef STRUCT_DIRENT__D_INO + if (dirbuf->d_ino == 0) + /* + * If the entry is not being used, skip it. + */ + continue; +#endif + /* + * Skip self, parent if handled in HTDirTitles() or if + * NO_PARENT_DIR_REFERENCE is not defined, and any dot files if + * no_dotfiles is set or show_dotfiles is not set. - FM + */ + if (!strcmp(dirbuf->d_name, ".") /* self */ || + (!strcmp(dirbuf->d_name, "..") /* parent */ && + need_parent_link == FALSE) || + ((strcmp(dirbuf->d_name, "..")) && + (dirbuf->d_name[0] == '.' && + (no_dotfiles || !show_dotfiles)))) + continue; + + StrAllocCopy(tmpfilename, localname); + /* + * If filename is not root directory, add trailing separator. + */ + LYAddPathSep(&tmpfilename); + + StrAllocCat(tmpfilename, dirbuf->d_name); + data = (DIRED *) malloc(sizeof(DIRED) + strlen(dirbuf->d_name) + 4); + if (data == NULL) { + status = HT_PARTIAL_CONTENT; + break; + } + LYTrimPathSep(tmpfilename); + + actual_info = &(data->file_info); +#ifdef S_IFLNK + if (lstat(tmpfilename, actual_info) < 0) { + actual_info->st_mode = 0; + } else { + if (S_ISLNK(actual_info->st_mode)) { + actual_info = &link_info; + if (stat(tmpfilename, actual_info) < 0) + actual_info->st_mode = 0; + } + } +#else + if (stat(tmpfilename, actual_info) < 0) + actual_info->st_mode = 0; +#endif + + strcpy(data->file_name, dirbuf->d_name); +#ifndef DIRED_SUPPORT + if (S_ISDIR(actual_info->st_mode)) { + data->sort_tags = 'D'; + } else { + data->sort_tags = 'F'; + /* D & F to have first directories, then files */ + } +#else + if (S_ISDIR(actual_info->st_mode)) { + if (dir_list_style == MIXED_STYLE) { + data->sort_tags = ' '; + LYAddPathSep0(data->file_name); + } else if (!strcmp(dirbuf->d_name, "..")) { + data->sort_tags = 'A'; + } else { + data->sort_tags = 'D'; + } + } else if (dir_list_style == MIXED_STYLE) { + data->sort_tags = ' '; + } else if (dir_list_style == FILES_FIRST) { + data->sort_tags = 'C'; + /* C & D to have first files, then directories */ + } else { + data->sort_tags = 'F'; + } +#endif /* !DIRED_SUPPORT */ + /* + * Sort dirname in the tree bt. + */ + HTBTree_add(bt, data); + +#ifdef DISP_PARTIAL + /* optimize for expensive operation: */ + if (num_of_entries % (partial_threshold > 0 ? + partial_threshold : display_lines) == 0) { + if (HTCheckForInterrupt()) { + status = HT_PARTIAL_CONTENT; + break; + } + } + num_of_entries++; +#endif /* DISP_PARTIAL */ + + } /* end while directory entries left to read */ + + if (status != HT_PARTIAL_CONTENT) + _HTProgress(OPERATION_OK); + else + CTRACE((tfp, "Reading the directory interrupted by user\n")); + + /* + * Run through tree printing out in order. + */ + { + HTBTElement *next_element = HTBTree_next(bt, NULL); + + /* pick up the first element of the list */ + int num_of_entries_output = 0; /* lines counter */ + + char state; + + /* I for initial (.. file), + D for directory file, + F for file */ + +#ifdef DIRED_SUPPORT + char test; +#endif /* DIRED_SUPPORT */ + state = 'I'; + + while (next_element != NULL) { + DIRED *entry; + +#ifndef DISP_PARTIAL + if (num_of_entries_output % HTMAX(display_lines, 10) == 0) { + if (HTCheckForInterrupt()) { + _HTProgress(TRANSFER_INTERRUPTED); + status = HT_PARTIAL_CONTENT; + break; + } + } +#endif + StrAllocCopy(tmpfilename, localname); + /* + * If filename is not root directory. + */ + LYAddPathSep(&tmpfilename); + + entry = (DIRED *) (HTBTree_object(next_element)); + /* + * Append the current entry's filename to the path. + */ + StrAllocCat(tmpfilename, entry->file_name); + HTSimplify(tmpfilename); + /* + * Output the directory entry. + */ + if (strcmp(DIRED_NAME(HTBTree_object(next_element)), "..")) { +#ifdef DIRED_SUPPORT + test = + (char) (DIRED_BLOK(HTBTree_object(next_element)) + == 'D' ? 'D' : 'F'); + if (state != test) { +#ifndef LONG_LIST + if (dir_list_style == FILES_FIRST) { + if (state == 'F') { + END(HTML_DIR); + PUTC('\n'); + } + } else if (dir_list_style != MIXED_STYLE) + if (state == 'D') { + END(HTML_DIR); + PUTC('\n'); + } +#endif /* !LONG_LIST */ + state = + (char) (DIRED_BLOK(HTBTree_object(next_element)) + == 'D' ? 'D' : 'F'); + if (preformatted) { + END(HTML_PRE); + PUTC('\n'); + preformatted = FALSE; + } + START(HTML_H2); + if (dir_list_style != MIXED_STYLE) { + START(HTML_EM); + PUTS(state == 'D' + ? LABEL_SUBDIRECTORIES + : LABEL_FILES); + END(HTML_EM); + } + END(HTML_H2); + PUTC('\n'); +#ifndef LONG_LIST + START(HTML_DIR); + PUTC('\n'); +#endif /* !LONG_LIST */ + } +#else + if (state != DIRED_BLOK(HTBTree_object(next_element))) { +#ifndef LONG_LIST + if (state == 'D') { + END(HTML_DIR); + PUTC('\n'); + } +#endif /* !LONG_LIST */ + state = + (char) (DIRED_BLOK(HTBTree_object(next_element)) + == 'D' ? 'D' : 'F'); + if (preformatted) { + END(HTML_PRE); + PUTC('\n'); + preformatted = FALSE; + } + START(HTML_H2); + START(HTML_EM); + PUTS(state == 'D' + ? LABEL_SUBDIRECTORIES + : LABEL_FILES); + END(HTML_EM); + END(HTML_H2); + PUTC('\n'); +#ifndef LONG_LIST + START(HTML_DIR); + PUTC('\n'); +#endif /* !LONG_LIST */ + } +#endif /* DIRED_SUPPORT */ +#ifndef LONG_LIST + START(HTML_LI); +#endif /* !LONG_LIST */ + } + if (!preformatted) { + START(HTML_PRE); + PUTC('\n'); + preformatted = TRUE; + } +#ifdef LONG_LIST + LYListFmtParse(list_format, entry, tmpfilename, target, tail); +#else + HTDirEntry(target, tail, entry->file_name); + PUTS(entry->file_name); + END(HTML_A); + MAYBE_END(HTML_LI); + PUTC('\n'); +#endif /* LONG_LIST */ + + next_element = HTBTree_next(bt, next_element); + /* pick up the next element of the list; + if none, return NULL */ + + /* optimize for expensive operation: */ +#ifdef DISP_PARTIAL + if (num_of_entries_output % + ((partial_threshold > 0) + ? partial_threshold + : display_lines) == 0) { + /* num_of_entries, num_of_entries_output... */ + HTDisplayPartial(); + + if (HTCheckForInterrupt()) { + _HTProgress(TRANSFER_INTERRUPTED); + status = HT_PARTIAL_CONTENT; + break; + } + } + num_of_entries_output++; +#endif /* DISP_PARTIAL */ + + } /* end while next_element */ + + if (status == HT_LOADED) { + if (state == 'I') { + START(HTML_P); + PUTS("Empty Directory"); + } +#ifndef LONG_LIST + else + END(HTML_DIR); +#endif /* !LONG_LIST */ + } + } /* end printing out the tree in order */ + if (preformatted) { + END(HTML_PRE); + PUTC('\n'); + } + END(HTML_BODY); + PUTC('\n'); + + FREE(tmpfilename); + FREE(tail); + HTBTreeAndObject_free(bt); + + if (status == HT_LOADED) { + if (HTDirReadme == HT_DIR_README_BOTTOM) + do_readme(target, localname); + FREE_TARGET; + } else { + ABORT_TARGET; + } + HTFinishDisplayPartial(); + return status; /* document loaded, maybe partial */ +} +#endif /* HAVE_READDIR */ + +#ifndef VMS +int HTStat(const char *filename, + struct stat *data) +{ + int result = -1; + size_t len = strlen(filename); + + if (len != 0 && LYIsPathSep(filename[len - 1])) { + char *temp_name = NULL; + + HTSprintf0(&temp_name, "%s.", filename); + result = HTStat(temp_name, data); + FREE(temp_name); + } else { + result = stat(filename, data); +#ifdef _WINDOWS + /* + * Someone claims that stat() doesn't give the proper result for a + * directory on Windows. + */ + if (result == -1 + && access(filename, 0) == 0) { + data->st_mode = S_IFDIR; + result = 0; + } +#endif + } + return result; +} +#endif + +#if defined(USE_ZLIB) || defined(USE_BZLIB) +static BOOL sniffStream(FILE *fp, char *buffer, size_t needed) +{ + long offset = ftell(fp); + BOOL result = FALSE; + + if (fread(buffer, sizeof(char), needed, fp) == needed) { + result = TRUE; + } + if (fseek(fp, offset, SEEK_SET) < 0) { + CTRACE((tfp, "error seeking in stream\n")); + result = FALSE; + } + return result; +} +#endif + +#ifdef USE_ZLIB +static BOOL isGzipStream(FILE *fp) +{ + char buffer[3]; + BOOL result; + + if (sniffStream(fp, buffer, sizeof(buffer)) + && !MemCmp(buffer, "\037\213", sizeof(buffer) - 1)) { + result = TRUE; + } else { + CTRACE((tfp, "not a gzip-stream\n")); + result = FALSE; + } + return result; +} + +static BOOL isDeflateStream(FILE *fp) +{ + char buffer[3]; + BOOL result; + + if (sniffStream(fp, buffer, sizeof(buffer)) + && !MemCmp(buffer, "\170\234", sizeof(buffer) - 1)) { + result = TRUE; + } else { + CTRACE((tfp, "not a deflate-stream\n")); + result = FALSE; + } + return result; +} +#endif + +#ifdef USE_BZLIB +static BOOL isBzip2Stream(FILE *fp) +{ + char buffer[6]; + BOOL result; + + if (sniffStream(fp, buffer, sizeof(buffer)) + && !MemCmp(buffer, "BZh", 3) + && isdigit(UCH(buffer[3])) + && isdigit(UCH(buffer[4]))) { + result = TRUE; + } else { + CTRACE((tfp, "not a bzip2-stream\n")); + result = FALSE; + } + return result; +} +#endif + +#ifdef VMS +#define FOPEN_MODE(bin) "r", "shr=put", "shr=upd" +#define DOT_STRING "._-" /* FIXME: should we check if suffix is after ']' or ':' ? */ +#else +#define FOPEN_MODE(bin) (bin ? BIN_R : "r") +#define DOT_STRING "." +#endif + +static int decompressAndParse(HTParentAnchor *anchor, + HTFormat format_out, + HTStream *sink, + char *nodename GCC_UNUSED, + char *filename, + HTAtom *myEncoding, + HTFormat format, + int *statusp) +{ + HTAtom *encoding = 0; + +#ifdef USE_ZLIB + FILE *zzfp = 0; + gzFile gzfp = 0; +#endif /* USE_ZLIB */ +#ifdef USE_BZLIB + BZFILE *bzfp = 0; +#endif /* USE_ZLIB */ +#if defined(USE_ZLIB) || defined(USE_BZLIB) + CompressFileType internal_decompress = cftNone; + BOOL failed_decompress = NO; +#endif + int rootlen = 0; + char *localname = filename; + int bin; + FILE *fp; + +#ifdef VMS + /* + * Assume that the file is in Unix-style syntax if it contains a '/' after + * the leading one. @@ + */ + localname = (strchr(localname + 1, '/') + ? HTVMS_name(nodename, localname) + : localname + 1); +#endif /* VMS */ + + bin = HTCompressFileType(filename, ".", &rootlen) != cftNone; + fp = fopen(localname, FOPEN_MODE(bin)); + +#ifdef VMS + /* + * If the file wasn't VMS syntax, then perhaps it is Ultrix. + */ + if (!fp) { + char *ultrixname = 0; + + CTRACE((tfp, "HTLoadFile: Can't open as %s\n", localname)); + HTSprintf0(&ultrixname, "%s::\"%s\"", nodename, filename); + fp = fopen(ultrixname, FOPEN_MODE(bin)); + if (!fp) { + CTRACE((tfp, "HTLoadFile: Can't open as %s\n", ultrixname)); + } + FREE(ultrixname); + } +#endif /* VMS */ + CTRACE((tfp, "HTLoadFile: Opening `%s' gives %p\n", localname, (void *) fp)); + if (fp) { /* Good! */ + if (HTEditable(localname)) { + HTAtom *put = HTAtom_for("PUT"); + HTList *methods = HTAnchor_methods(anchor); + + if (HTList_indexOf(methods, put) == (-1)) { + HTList_addObject(methods, put); + } + } + /* + * Fake a Content-Encoding for compressed files. - FM + */ + if (!IsUnityEnc(myEncoding)) { + /* + * We already know from the call to HTFileFormat that + * this is a compressed file, no need to look at the filename + * again. - kw + */ +#if defined(USE_ZLIB) || defined(USE_BZLIB) + CompressFileType method = HTEncodingToCompressType(HTAtom_name(myEncoding)); +#endif + +#define isDOWNLOAD(m) (strcmp(format_out->name, "www/download") && (method == m)) +#ifdef USE_ZLIB + if (isDOWNLOAD(cftGzip)) { + if (isGzipStream(fp)) { + fclose(fp); + gzfp = gzopen(localname, BIN_R); + + CTRACE((tfp, "HTLoadFile: gzopen of `%s' gives %p\n", + localname, gzfp)); + } + internal_decompress = cftGzip; + } else if (isDOWNLOAD(cftDeflate)) { + if (isDeflateStream(fp)) { + zzfp = fp; + fp = 0; + + CTRACE((tfp, "HTLoadFile: zzopen of `%s' gives %p\n", + localname, (void *) zzfp)); + } + internal_decompress = cftDeflate; + } else +#endif /* USE_ZLIB */ +#ifdef USE_BZLIB + if (isDOWNLOAD(cftBzip2)) { + if (isBzip2Stream(fp)) { + fclose(fp); + bzfp = BZ2_bzopen(localname, BIN_R); + + CTRACE((tfp, "HTLoadFile: bzopen of `%s' gives %p\n", + localname, bzfp)); + } + internal_decompress = cftBzip2; + } else +#endif /* USE_BZLIB */ + { + StrAllocCopy(anchor->content_type, format->name); + StrAllocCopy(anchor->content_encoding, HTAtom_name(myEncoding)); + format = HTAtom_for("www/compressed"); + } + } else { + CompressFileType cft = HTCompressFileType(localname, DOT_STRING, &rootlen); + + if (cft != cftNone) { + char *cp = NULL; + + StrAllocCopy(cp, localname); + cp[rootlen] = '\0'; + format = HTFileFormat(cp, &encoding, NULL); + FREE(cp); + format = HTCharsetFormat(format, anchor, + UCLYhndl_HTFile_for_unspec); + StrAllocCopy(anchor->content_type, format->name); + } + + switch (cft) { + case cftCompress: + StrAllocCopy(anchor->content_encoding, "x-compress"); + format = HTAtom_for("www/compressed"); + break; + case cftDeflate: + StrAllocCopy(anchor->content_encoding, "x-deflate"); +#ifdef USE_ZLIB + if (strcmp(format_out->name, "www/download") != 0) { + if (isDeflateStream(fp)) { + zzfp = fp; + fp = 0; + + CTRACE((tfp, "HTLoadFile: zzopen of `%s' gives %p\n", + localname, (void *) zzfp)); + } + internal_decompress = cftDeflate; + } +#else /* USE_ZLIB */ + format = HTAtom_for("www/compressed"); +#endif /* USE_ZLIB */ + break; + case cftGzip: + StrAllocCopy(anchor->content_encoding, "x-gzip"); +#ifdef USE_ZLIB + if (strcmp(format_out->name, "www/download") != 0) { + if (isGzipStream(fp)) { + fclose(fp); + gzfp = gzopen(localname, BIN_R); + + CTRACE((tfp, "HTLoadFile: gzopen of `%s' gives %p\n", + localname, gzfp)); + } + internal_decompress = cftGzip; + } +#else /* USE_ZLIB */ + format = HTAtom_for("www/compressed"); +#endif /* USE_ZLIB */ + break; + case cftBzip2: + StrAllocCopy(anchor->content_encoding, "x-bzip2"); +#ifdef USE_BZLIB + if (strcmp(format_out->name, "www/download") != 0) { + if (isBzip2Stream(fp)) { + fclose(fp); + bzfp = BZ2_bzopen(localname, BIN_R); + + CTRACE((tfp, "HTLoadFile: bzopen of `%s' gives %p\n", + localname, bzfp)); + } + internal_decompress = cftBzip2; + } +#else /* USE_BZLIB */ + format = HTAtom_for("www/compressed"); +#endif /* USE_BZLIB */ + break; + case cftNone: + break; + } + } +#if defined(USE_ZLIB) || defined(USE_BZLIB) + if (internal_decompress != cftNone) { + switch (internal_decompress) { +#ifdef USE_ZLIB + case cftDeflate: + failed_decompress = (BOOLEAN) (zzfp == NULL); + break; + case cftCompress: + case cftGzip: + failed_decompress = (BOOLEAN) (gzfp == NULL); + break; +#endif +#ifdef USE_BZLIB + case cftBzip2: + failed_decompress = (BOOLEAN) (bzfp == NULL); + break; +#endif + default: + failed_decompress = YES; + break; + } + if (failed_decompress) { + *statusp = HTLoadError(NULL, + -(HT_ERROR), + FAILED_OPEN_COMPRESSED_FILE); + } else { + char *sugfname = NULL; + + if (anchor->SugFname) { + StrAllocCopy(sugfname, anchor->SugFname); + } else { + char *anchor_path = HTParse(anchor->address, "", + PARSE_PATH + PARSE_PUNCTUATION); + char *lastslash; + + HTUnEscape(anchor_path); + lastslash = strrchr(anchor_path, '/'); + if (lastslash) + StrAllocCopy(sugfname, lastslash + 1); + FREE(anchor_path); + } + FREE(anchor->content_encoding); + if (sugfname && *sugfname) + HTCheckFnameForCompression(&sugfname, anchor, + TRUE); + if (sugfname && *sugfname) + StrAllocCopy(anchor->SugFname, sugfname); + FREE(sugfname); +#ifdef USE_BZLIB + if (bzfp) + *statusp = HTParseBzFile(format, format_out, + anchor, + bzfp, sink); +#endif +#ifdef USE_ZLIB + if (gzfp) + *statusp = HTParseGzFile(format, format_out, + anchor, + gzfp, sink); + else if (zzfp) + *statusp = HTParseZzFile(format, format_out, + anchor, + zzfp, sink); +#endif + } + } else +#endif /* USE_ZLIB || USE_BZLIB */ + { + *statusp = HTParseFile(format, format_out, anchor, fp, sink); + fclose(fp); + } + return TRUE; + } /* If successful open */ + return FALSE; +} + +/* Load a document. + * ---------------- + * + * On entry: + * addr must point to the fully qualified hypertext reference. + * This is the physical address of the file + * + * On exit: + * returns <0 Error has occurred. + * HTLOADED OK + * + */ +int HTLoadFile(const char *addr, + HTParentAnchor *anchor, + HTFormat format_out, + HTStream *sink) +{ + char *filename = NULL; + char *acc_method = NULL; + HTFormat format; + char *nodename = NULL; + char *newname = NULL; /* Simplified name of file */ + HTAtom *myEncoding = NULL; /* enc of this file, may be gzip etc. */ + int status = -1; + +#ifndef DISABLE_FTP + char *ftp_newhost; +#endif + +#ifdef VMS + struct stat stat_info; +#endif /* VMS */ + + /* + * Reduce the filename to a basic form (hopefully unique!). + */ + StrAllocCopy(newname, addr); + filename = HTParse(newname, "", PARSE_PATH | PARSE_PUNCTUATION); + nodename = HTParse(newname, "", PARSE_HOST); + + /* + * If access is ftp, or file is on another host, invoke ftp now. + */ + acc_method = HTParse(newname, "", PARSE_ACCESS); + if (strcmp("ftp", acc_method) == 0 || + (!LYSameHostname("localhost", nodename) && + !LYSameHostname(nodename, HTHostName()))) { + status = -1; + FREE(newname); + FREE(filename); + FREE(nodename); + FREE(acc_method); +#ifndef DISABLE_FTP + ftp_newhost = HTParse(addr, "", PARSE_HOST); + if (strcmp(ftp_lasthost, ftp_newhost)) + ftp_local_passive = ftp_passive; + + status = HTFTPLoad(addr, anchor, format_out, sink); + + if (ftp_passive == ftp_local_passive) { + if ((status >= 400) || (status < 0)) { + ftp_local_passive = (BOOLEAN) !ftp_passive; + status = HTFTPLoad(addr, anchor, format_out, sink); + } + } + + free(ftp_lasthost); + ftp_lasthost = ftp_newhost; +#endif /* DISABLE_FTP */ + return status; + } else { + FREE(newname); + FREE(acc_method); + } +#if defined(VMS) || defined(USE_DOS_DRIVES) + HTUnEscape(filename); +#endif /* VMS */ + + /* + * Determine the format and encoding mapped to any suffix. + */ + if (anchor->content_type && anchor->content_encoding) { + /* + * If content_type and content_encoding are BOTH already set in the + * anchor object, we believe it and don't try to derive format and + * encoding from the filename. - kw + */ + format = HTAtom_for(anchor->content_type); + myEncoding = HTAtom_for(anchor->content_encoding); + } else { + int default_UCLYhndl = UCLYhndl_HTFile_for_unspec; + + if (force_old_UCLYhndl_on_reload) { + force_old_UCLYhndl_on_reload = FALSE; + default_UCLYhndl = forced_UCLYhdnl; + } + + format = HTFileFormat(filename, &myEncoding, NULL); + + /* + * Check the format for an extended MIME charset value, and act on it + * if present. Otherwise, assume what is indicated by the last + * parameter (fallback will effectively be UCLYhndl_for_unspec, by + * default ISO-8859-1). - kw + */ + format = HTCharsetFormat(format, anchor, default_UCLYhndl); + } + +#ifdef VMS + /* + * Check to see if the 'filename' is in fact a directory. If it is create + * a new hypertext object containing a list of files and subdirectories + * contained in the directory. All of these are links to the directories + * or files listed. + */ + if (HTStat(filename, &stat_info) == -1) { + CTRACE((tfp, "HTLoadFile: Can't stat %s\n", filename)); + } else { + if (S_ISDIR(stat_info.st_mode)) { + if (HTDirAccess == HT_DIR_FORBID) { + FREE(filename); + FREE(nodename); + return HTLoadError(sink, 403, DISALLOWED_DIR_SCAN); + } + + if (HTDirAccess == HT_DIR_SELECTIVE) { + char *enable_file_name = NULL; + + HTSprintf0(&enable_file_name, "%s/%s", filename, HT_DIR_ENABLE_FILE); + if (HTStat(enable_file_name, &stat_info) == -1) { + FREE(filename); + FREE(nodename); + FREE(enable_file_name); + return HTLoadError(sink, 403, DISALLOWED_SELECTIVE_ACCESS); + } + } + + FREE(filename); + FREE(nodename); + return HTVMSBrowseDir(addr, anchor, format_out, sink); + } + } + + if (decompressAndParse(anchor, + format_out, + sink, + nodename, + filename, + myEncoding, + format, + &status)) { + FREE(nodename); + FREE(filename); + return status; + } + FREE(filename); + +#else /* not VMS: */ + + FREE(filename); + + /* + * For unix, we try to translate the name into the name of a transparently + * mounted file. + * + * Not allowed in secure (HTClientHost) situations. TBL 921019 + */ +#ifndef NO_UNIX_IO + /* Need protection here for telnet server but not httpd server. */ + + if (!HTSecure) { /* try local file system */ + char *localname = HTLocalName(addr); + struct stat dir_info; + +#ifdef HAVE_READDIR + /* + * Multiformat handling. + * + * If needed, scan directory to find a good file. Bug: We don't stat + * the file to find the length. + */ + if ((strlen(localname) > strlen(MULTI_SUFFIX)) && + (0 == strcmp(localname + strlen(localname) - strlen(MULTI_SUFFIX), + MULTI_SUFFIX))) { + DIR *dp = 0; + BOOL forget_multi = NO; + + STRUCT_DIRENT *dirbuf; + float best = (float) NO_VALUE_FOUND; /* So far best is bad */ + HTFormat best_rep = NULL; /* Set when rep found */ + HTAtom *best_enc = NULL; + char *best_name = NULL; /* Best dir entry so far */ + + char *base = strrchr(localname, '/'); + size_t baselen = 0; + + if (!base || base == localname) { + forget_multi = YES; + } else { + *base++ = '\0'; /* Just got directory name */ + baselen = strlen(base) - strlen(MULTI_SUFFIX); + base[baselen] = '\0'; /* Chop off suffix */ + + dp = opendir(localname); + } + if (forget_multi || !dp) { + FREE(localname); + FREE(nodename); + return HTLoadError(sink, 500, FAILED_DIR_SCAN); + } + + while ((dirbuf = readdir(dp)) != NULL) { + /* + * While there are directory entries to be read... + */ +#ifdef STRUCT_DIRENT__D_INO + if (dirbuf->d_ino == 0) + continue; /* if the entry is not being used, skip it */ +#endif + if (strlen(dirbuf->d_name) > baselen && /* Match? */ + !StrNCmp(dirbuf->d_name, base, baselen)) { + HTAtom *enc; + HTFormat rep = HTFileFormat(dirbuf->d_name, &enc, NULL); + float filevalue = HTFileValue(dirbuf->d_name); + float value = HTStackValue(rep, format_out, + filevalue, + 0L /* @@@@@@ */ ); + + if (value <= 0.0) { + int rootlen = 0; + const char *atomname = NULL; + CompressFileType cft = + HTCompressFileType(dirbuf->d_name, ".", &rootlen); + char *cp = NULL; + + enc = NULL; + if (cft != cftNone) { + StrAllocCopy(cp, dirbuf->d_name); + cp[rootlen] = '\0'; + format = HTFileFormat(cp, NULL, NULL); + FREE(cp); + value = HTStackValue(format, format_out, + filevalue, 0L); + switch (cft) { + case cftCompress: + atomname = "application/x-compressed"; + break; + case cftGzip: + atomname = "application/x-gzip"; + break; + case cftDeflate: + atomname = "application/x-deflate"; + break; + case cftBzip2: + atomname = "application/x-bzip2"; + break; + case cftNone: + break; + } + } + + if (atomname != NULL) { + value = HTStackValue(format, format_out, + filevalue, 0L); + if (value <= 0.0) { + format = HTAtom_for(atomname); + value = HTStackValue(format, format_out, + filevalue, 0L); + } + if (value <= 0.0) { + format = HTAtom_for("www/compressed"); + value = HTStackValue(format, format_out, + filevalue, 0L); + } + } + } + if (value < NO_VALUE_FOUND) { + CTRACE((tfp, + "HTLoadFile: value of presenting %s is %f\n", + HTAtom_name(rep), value)); + if (value > best) { + best_rep = rep; + best_enc = enc; + best = value; + StrAllocCopy(best_name, dirbuf->d_name); + } + } /* if best so far */ + } + /* if match */ + } /* end while directory entries left to read */ + closedir(dp); + + if (best_rep) { + format = best_rep; + myEncoding = best_enc; + base[-1] = '/'; /* Restore directory name */ + base[0] = '\0'; + StrAllocCat(localname, best_name); + FREE(best_name); + } else { /* If not found suitable file */ + FREE(localname); + FREE(nodename); + return HTLoadError(sink, 403, FAILED_NO_REPRESENTATION); + } + /*NOTREACHED */ + } + /* if multi suffix */ + /* + * Check to see if the 'localname' is in fact a directory. If it is + * create a new hypertext object containing a list of files and + * subdirectories contained in the directory. All of these are links + * to the directories or files listed. NB This assumes the existence + * of a type 'STRUCT_DIRENT', which will hold the directory entry, and + * a type 'DIR' which is used to point to the current directory being + * read. + */ +#if defined(USE_DOS_DRIVES) + if (strlen(localname) == 2 && LYIsDosDrive(localname)) + LYAddPathSep(&localname); +#endif + if (HTStat(localname, &dir_info) == -1) /* get file information */ + { + /* if can't read file information */ + CTRACE((tfp, "HTLoadFile: can't stat %s\n", localname)); + + } else { /* Stat was OK */ + + if (S_ISDIR(dir_info.st_mode)) { + /* + * If localname is a directory. + */ + DIR *dp; + struct stat file_info; + + CTRACE((tfp, "%s is a directory\n", localname)); + + /* + * Check directory access. Selective access means only those + * directories containing a marker file can be browsed. + */ + if (HTDirAccess == HT_DIR_FORBID) { + FREE(localname); + FREE(nodename); + return HTLoadError(sink, 403, DISALLOWED_DIR_SCAN); + } + + if (HTDirAccess == HT_DIR_SELECTIVE) { + char *enable_file_name = NULL; + + HTSprintf0(&enable_file_name, "%s/%s", localname, HT_DIR_ENABLE_FILE); + if (stat(enable_file_name, &file_info) != 0) { + FREE(localname); + FREE(nodename); + FREE(enable_file_name); + return HTLoadError(sink, 403, DISALLOWED_SELECTIVE_ACCESS); + } + } + + CTRACE((tfp, "Opening directory %s\n", localname)); + dp = opendir(localname); + if (!dp) { + FREE(localname); + FREE(nodename); + return HTLoadError(sink, 403, FAILED_DIR_UNREADABLE); + } + + /* + * Directory access is allowed and possible. + */ + + status = print_local_dir(dp, localname, + anchor, format_out, sink); + closedir(dp); + FREE(localname); + FREE(nodename); + return status; /* document loaded, maybe partial */ + + } + /* end if localname is a directory */ + if (S_ISREG(dir_info.st_mode)) { +#ifdef LONG_MAX + if (dir_info.st_size <= LONG_MAX) +#endif + anchor->content_length = (long) dir_info.st_size; + } + + } /* end if file stat worked */ + +/* End of directory reading section +*/ +#endif /* HAVE_READDIR */ + if (decompressAndParse(anchor, + format_out, + sink, + nodename, + localname, + myEncoding, + format, + &status)) { + FREE(nodename); + FREE(localname); + return status; + } + FREE(localname); + } /* local unix file system */ +#endif /* !NO_UNIX_IO */ +#endif /* VMS */ + +#ifndef DECNET + /* + * Now, as transparently mounted access has failed, we try FTP. + */ + { + /* + * Deal with case-sensitivity differences on VMS versus Unix. + */ +#ifdef VMS + if (strcasecomp(nodename, HTHostName()) != 0) +#else + if (strcmp(nodename, HTHostName()) != 0) +#endif /* VMS */ + { + status = -1; + FREE(nodename); + if (StrNCmp(addr, "file://localhost", 16)) { + /* never go to ftp site when URL + * is file://localhost + */ +#ifndef DISABLE_FTP + status = HTFTPLoad(addr, anchor, format_out, sink); +#endif /* DISABLE_FTP */ + } + return status; + } + FREE(nodename); + } +#endif /* !DECNET */ + + /* + * All attempts have failed. + */ + { + CTRACE((tfp, "Can't open `%s', errno=%d\n", addr, SOCKET_ERRNO)); + + return HTLoadError(sink, 403, FAILED_FILE_UNREADABLE); + } +} + +static const char *program_paths[pp_Last]; + +/* + * Given a program number, return its path + */ +const char *HTGetProgramPath(ProgramPaths code) +{ + const char *result = NULL; + + if (code > ppUnknown && code < pp_Last) + result = program_paths[code]; + return result; +} + +/* + * Store a program's path. The caller must allocate the string used for 'path', + * since HTInitProgramPaths() may free it. + */ +void HTSetProgramPath(ProgramPaths code, const char *path) +{ + if (code > ppUnknown && code < pp_Last) { + program_paths[code] = isEmpty(path) ? 0 : path; + } +} + +/* + * Reset the list of known program paths to the ones that are compiled-in + */ +void HTInitProgramPaths(BOOL init) +{ + ProgramPaths code; + int n; + const char *path; + const char *test; + + for (n = (int) ppUnknown + 1; n < (int) pp_Last; ++n) { + switch (code = (ProgramPaths) n) { +#ifdef BZIP2_PATH + case ppBZIP2: + path = BZIP2_PATH; + break; +#endif +#ifdef CHMOD_PATH + case ppCHMOD: + path = CHMOD_PATH; + break; +#endif +#ifdef COMPRESS_PATH + case ppCOMPRESS: + path = COMPRESS_PATH; + break; +#endif +#ifdef COPY_PATH + case ppCOPY: + path = COPY_PATH; + break; +#endif +#ifdef CSWING_PATH + case ppCSWING: + path = CSWING_PATH; + break; +#endif +#ifdef GZIP_PATH + case ppGZIP: + path = GZIP_PATH; + break; +#endif +#ifdef INFLATE_PATH + case ppINFLATE: + path = INFLATE_PATH; + break; +#endif +#ifdef INSTALL_PATH + case ppINSTALL: + path = INSTALL_PATH; + break; +#endif +#ifdef MKDIR_PATH + case ppMKDIR: + path = MKDIR_PATH; + break; +#endif +#ifdef MV_PATH + case ppMV: + path = MV_PATH; + break; +#endif +#ifdef RLOGIN_PATH + case ppRLOGIN: + path = RLOGIN_PATH; + break; +#endif +#ifdef RM_PATH + case ppRM: + path = RM_PATH; + break; +#endif +#ifdef RMDIR_PATH + case ppRMDIR: + path = RMDIR_PATH; + break; +#endif +#ifdef SETFONT_PATH + case ppSETFONT: + path = SETFONT_PATH; + break; +#endif +#ifdef TAR_PATH + case ppTAR: + path = TAR_PATH; + break; +#endif +#ifdef TELNET_PATH + case ppTELNET: + path = TELNET_PATH; + break; +#endif +#ifdef TN3270_PATH + case ppTN3270: + path = TN3270_PATH; + break; +#endif +#ifdef TOUCH_PATH + case ppTOUCH: + path = TOUCH_PATH; + break; +#endif +#ifdef UNCOMPRESS_PATH + case ppUNCOMPRESS: + path = UNCOMPRESS_PATH; + break; +#endif +#ifdef UNZIP_PATH + case ppUNZIP: + path = UNZIP_PATH; + break; +#endif +#ifdef UUDECODE_PATH + case ppUUDECODE: + path = UUDECODE_PATH; + break; +#endif +#ifdef ZCAT_PATH + case ppZCAT: + path = ZCAT_PATH; + break; +#endif +#ifdef ZIP_PATH + case ppZIP: + path = ZIP_PATH; + break; +#endif + default: + path = NULL; + break; + } + test = HTGetProgramPath(code); + if (test != NULL && test != path) { + free((char *) test); + } + if (init) { + HTSetProgramPath(code, path); + } + } +} + +/* + * Protocol descriptors + */ +#ifdef GLOBALDEF_IS_MACRO +#define _HTFILE_C_1_INIT { "ftp", HTLoadFile, 0 } +GLOBALDEF(HTProtocol, HTFTP, _HTFILE_C_1_INIT); +#define _HTFILE_C_2_INIT { "file", HTLoadFile, HTFileSaveStream } +GLOBALDEF(HTProtocol, HTFile, _HTFILE_C_2_INIT); +#else +GLOBALDEF HTProtocol HTFTP = +{"ftp", HTLoadFile, 0}; +GLOBALDEF HTProtocol HTFile = +{"file", HTLoadFile, HTFileSaveStream}; +#endif /* GLOBALDEF_IS_MACRO */ diff --git a/WWW/Library/Implementation/HTFile.h b/WWW/Library/Implementation/HTFile.h new file mode 100644 index 00000000..937907d4 --- /dev/null +++ b/WWW/Library/Implementation/HTFile.h @@ -0,0 +1,366 @@ +/* + * $LynxId: HTFile.h,v 1.33 2012/02/10 00:59:15 tom Exp $ + * File access in libwww + * FILE ACCESS + * + * These are routines for local file access used by WWW browsers and servers. + * Implemented by HTFile.c. + * + * If the file is not a local file, then we pass it on to HTFTP in case it + * can be reached by FTP. + */ +#ifndef HTFILE_H +#define HTFILE_H + +#include <HTFormat.h> +#include <HTAccess.h> + +#ifndef HTML_H +#include <HTML.h> /* SCW */ +#endif /* HTML_H */ + +#ifdef __cplusplus +extern "C" { +#endif +/* + * Controlling globals + * + * These flags control how directories and files are represented as + * hypertext, and are typically set by the application from command + * line options, etc. + */ extern int HTDirAccess; + /* Directory access level */ + +#define HT_DIR_FORBID 0 /* Altogether forbidden */ +#define HT_DIR_SELECTIVE 1 /* If HT_DIR_ENABLE_FILE exists */ +#define HT_DIR_OK 2 /* Any accesible directory */ + +#define HT_DIR_ENABLE_FILE ".www_browsable" /* If exists, can browse */ + + extern int HTDirReadme; /* Include readme files in listing? */ + + /* Values: */ +#define HT_DIR_README_NONE 0 /* No */ +#define HT_DIR_README_TOP 1 /* Yes, first */ +#define HT_DIR_README_BOTTOM 2 /* Yes, at the end */ + +#define HT_DIR_README_FILE "README" + +/* + * Convert filenames between local and WWW formats + */ + extern char *HTURLPath_toFile(const char *name, int expand_all, int is_remote); + extern char *HTnameOfFile_WWW(const char *name, int WWW_prefix, int expand_all); + +#define HTLocalName(name) HTnameOfFile_WWW(name,TRUE,TRUE) +#define HTfullURL_toFile(name) HTnameOfFile_WWW(name,FALSE,TRUE) +#define HTpartURL_toFile(name) HTnameOfFile_WWW(name,FALSE,FALSE) + +/* + * Make a WWW name from a full local path name + */ + extern char *WWW_nameOfFile(const char *name); + +/* + * Generate the name of a cache file + */ + extern char *HTCacheFileName(const char *name); + +/* + * Generate fragments of HTML for source-view: + */ + extern void HTStructured_doctype(HTStructured * target, HTFormat format_out); + + extern void HTStructured_meta(HTStructured * target, HTFormat format_out); +/* + * Output directory titles + * + * This is (like the next one) used by HTFTP. It is common code to generate + * the title and heading 1 and the parent directory link for any anchor. + * + * changed to return TRUE if parent directory link was generated, + * FALSE otherwise - KW + */ + extern BOOL HTDirTitles(HTStructured * target, HTParentAnchor *anchor, + HTFormat format_out, + int tildeIsTop); + +/* + * Check existence. + */ + extern int HTStat(const char *filename, + struct stat *data); + +/* Load a document. + * ---------------- + */ + extern int HTLoadFile(const char *addr, + HTParentAnchor *anchor, + HTFormat format_out, + HTStream *sink); + +/* + * Output a directory entry + * + * This is used by HTFTP.c for example -- it is a common routine for + * generating a linked directory entry. + */ + extern void HTDirEntry(HTStructured * target, /* in which to put the linked text */ const char *tail, /* last part of directory name */ + const char *entry); /* name of this entry */ + +/* + * HTSetSuffix: Define the representation for a file suffix + * + * This defines a mapping between local file suffixes and file content + * types and encodings. + * + * ON ENTRY, + * + * suffix includes the "." if that is important (normally, yes!) + * + * representation is MIME-style content-type + * + * encoding is MIME-style content-transfer-encoding + * (8bit, 7bit, etc) or HTTP-style content-encoding + * (gzip, compress etc.) + * + * quality an a priori judgement of the quality of such files + * (0.0..1.0) + * + * HTSetSuffix5 has one more parameter for a short description of the type + * which is otherwise derived from the representation: + * + * desc is a short textual description, or NULL + * + * Examples: HTSetSuffix(".ps", "application/postscript", "8bit", 1.0); + * Examples: HTSetSuffix(".psz", "application/postscript", "gzip", 1.0); + * A MIME type could also indicate a non-trivial encoding on its own + * ("application/x-compressed-tar"), but in that case don't use enconding + * to also indicate it but use "binary" etc. + */ + extern void HTSetSuffix5(const char *suffix, + const char *representation, + const char *encoding, + const char *desc, + double quality); + +#define HTSetSuffix(suff,rep,enc,q) HTSetSuffix5(suff, rep, enc, NULL, q) + +/* + * HTFileFormat: Get Representation and Encoding from file name. + * + * ON EXIT, + * + * return The represntation it imagines the file is in. + * + * *pEncoding The encoding (binary, 7bit, etc). See HTSetSuffix. + */ + extern HTFormat HTFileFormat(const char *filename, + HTAtom **pEncoding, + const char **pDesc); + +/* + * HTCharsetFormat: Revise the file format in relation to the Lynx charset. + * + * This checks the format associated with an anchor for + * for an extended MIME Content-Type, and if a charset is + * indicated, sets Lynx up for proper handling in relation + * to the currently selected character set. - FM + */ + extern HTFormat HTCharsetFormat(HTFormat format, + HTParentAnchor *anchor, + int default_LYhndl); + +/* Get various pieces of meta info from file name. + * ----------------------------------------------- + * + * LYGetFileInfo fills in information that can be determined without + * an actual (new) access to the filesystem, based on current suffix + * and character set configuration. If the file has been loaded and + * parsed before (with the same URL generated here!) and the anchor + * is still around, some results may be influenced by that (in + * particular, charset info from a META tag - this is not actually + * tested!). + * The caller should not keep pointers to the returned objects around + * for too long, the valid lifetimes vary. In particular, the returned + * charset string should be copied if necessary. If return of the + * file_anchor is requested, that one can be used to retrieve + * additional bits of info that are stored in the anchor object and + * are not covered here; as usual, don't keep pointers to the + * file_anchor longer than necessary since the object may disappear + * through HTuncache_current_document or at the next document load. + * - kw + */ + extern void LYGetFileInfo(const char *filename, + HTParentAnchor **pfile_anchor, + HTFormat *pformat, + HTAtom **pencoding, + const char **pdesc, + const char **pcharset, + int *pfile_cs); + +/* + * Determine file value from file name. + */ + extern float HTFileValue(const char *filename); + +/* + * Known compression types. + */ + typedef enum { + cftNone + ,cftCompress + ,cftGzip + ,cftBzip2 + ,cftDeflate + } CompressFileType; + +/* + * Determine compression type from file name, by looking at its suffix. + */ + extern CompressFileType HTCompressFileType(const char *filename, + const char *dots, + int *rootlen); + +/* + * Determine compression type from the content-encoding. + */ + extern CompressFileType HTEncodingToCompressType(const char *encoding); +/* + * Determine compression type from the content-encoding. + */ + extern CompressFileType HTContentTypeToCompressType(const char *ct); +/* + * Determine compression type from the content-type and/or content-encoding. + */ + extern CompressFileType HTContentToCompressType(HTParentAnchor *anchor); +/* + * Determine compression encoding from the compression method. + */ + extern const char *HTCompressTypeToEncoding(CompressFileType method); +/* + * Determine expected file-suffix from the compression method. + */ + extern const char *HTCompressTypeToSuffix(CompressFileType method); +/* + * Determine write access to a file. + * + * ON EXIT, + * + * return value YES if file can be accessed and can be written to. + * + * BUGS + * + * Isn't there a quicker way? + */ + +#if defined(HAVE_CONFIG_H) + +#ifndef HAVE_GETGROUPS +#define NO_GROUPS +#endif + +#else + +#ifdef VMS +#define NO_GROUPS +#endif /* VMS */ +#ifdef NO_UNIX_IO +#define NO_GROUPS +#endif /* NO_UNIX_IO */ +#ifdef PCNFS +#define NO_GROUPS +#endif /* PCNFS */ +#ifdef NOUSERS +#define NO_GROUPS +#endif /* PCNFS */ + +#endif /* HAVE_CONFIG_H */ + + extern BOOL HTEditable(const char *filename); + +/* Make a save stream. + * ------------------- + */ + extern HTStream *HTFileSaveStream(HTParentAnchor *anchor); + +/* + * Determine a suitable suffix, given the representation. + * + * ON ENTRY, + * + * rep is the atomized MIME style representation + * enc is an encoding (8bit, binary, gzip, compress,..) + * + * ON EXIT, + * + * returns a pointer to a suitable suffix string if one has + * been found, else NULL. + */ + extern const char *HTFileSuffix(HTAtom *rep, + const char *enc); + +/* + * Enumerate external programs that lynx may assume exists. Unlike those + * given in download scripts, etc., lynx would really like to know their + * absolute paths, for better security. + */ + typedef enum { + ppUnknown = 0 + ,ppBZIP2 + ,ppCHMOD + ,ppCOMPRESS + ,ppCOPY + ,ppCSWING + ,ppGZIP + ,ppINFLATE + ,ppINSTALL + ,ppMKDIR + ,ppMV + ,ppRLOGIN + ,ppRM + ,ppRMDIR + ,ppSETFONT + ,ppTAR + ,ppTELNET + ,ppTN3270 + ,ppTOUCH + ,ppUNCOMPRESS + ,ppUNZIP + ,ppUUDECODE + ,ppZCAT + ,ppZIP + ,pp_Last + } ProgramPaths; + +/* + * Given a program number, return its path + */ + extern const char *HTGetProgramPath(ProgramPaths code); + +/* + * Store a program's path + */ + extern void HTSetProgramPath(ProgramPaths code, + const char *path); + +/* + * Reset the list of known program paths to the ones that are compiled-in + */ + extern void HTInitProgramPaths(BOOL init); + +/* + * The Protocols + */ +#ifdef GLOBALREF_IS_MACRO + extern GLOBALREF (HTProtocol, HTFTP); + extern GLOBALREF (HTProtocol, HTFile); + +#else + GLOBALREF HTProtocol HTFTP, HTFile; +#endif /* GLOBALREF_IS_MACRO */ + +#ifdef __cplusplus +} +#endif +#endif /* HTFILE_H */ diff --git a/WWW/Library/Implementation/HTFinger.c b/WWW/Library/Implementation/HTFinger.c new file mode 100644 index 00000000..0dfc7a68 --- /dev/null +++ b/WWW/Library/Implementation/HTFinger.c @@ -0,0 +1,422 @@ +/* + * $LynxId: HTFinger.c,v 1.29 2011/05/24 09:21:13 tom Exp $ + * + * FINGER ACCESS HTFinger.c + * ============= + * Authors: + * ARB Andrew Brooks + * + * History: + * 21 Apr 94 First version (ARB, from HTNews.c by TBL) + * 12 Mar 96 Made the URL and command buffering secure from + * stack modifications, beautified the HTLoadFinger() + * and response() functions, and added support for the + * following URL formats for sending a "", "/w", + * "username[@host]", or "/w username[@host]" command + * to the server: + * finger://host + * finger://host/ + * finger://host/%2fw + * finger://host/%2fw%20username[@host] + * finger://host/w/username[@host] + * finger://host/username[@host] + * finger://host/username[@host]/w + * finger://username@host + * finger://username@host/ + * finger://username@host/w + * 15 Mar 96 Added support for port 79 gtype 0 gopher URLs + * relayed from HTLoadGopher. - FM + */ + +#include <HTUtils.h> + +#ifndef DISABLE_FINGER + +#include <HTAlert.h> +#include <HTML.h> +#include <HTParse.h> +#include <HTFormat.h> +#include <HTTCP.h> +#include <HTString.h> +#include <HTFinger.h> + +#include <LYUtils.h> +#include <LYLeaks.h> + +#define FINGER_PORT 79 /* See rfc742 */ +#define BIG 1024 /* Bug */ + +#define PUTC(c) (*targetClass.put_character)(target, c) +#define PUTS(s) (*targetClass.put_string)(target, s) +#define START(e) (*targetClass.start_element)(target, e, 0, 0, -1, 0) +#define END(e) (*targetClass.end_element)(target, e, 0) +#define FREE_TARGET (*targetClass._free)(target) +#define NEXT_CHAR HTGetCharacter() + +/* Module-wide variables +*/ +static int finger_fd; /* Socket for FingerHost */ + +struct _HTStructured { + const HTStructuredClass *isa; /* For gopher streams */ + /* ... */ +}; + +static HTStructured *target; /* The output sink */ +static HTStructuredClass targetClass; /* Copy of fn addresses */ + +/* Initialisation for this module + * ------------------------------ + */ +static BOOL initialized = NO; +static BOOL initialize(void) +{ + finger_fd = -1; /* Disconnected */ + return YES; +} + +/* Start anchor element + * -------------------- + */ +static void start_anchor(const char *href) +{ + BOOL present[HTML_A_ATTRIBUTES]; + const char *value[HTML_A_ATTRIBUTES]; + + { + int i; + + for (i = 0; i < HTML_A_ATTRIBUTES; i++) + present[i] = (BOOL) (i == HTML_A_HREF); + } + ((const char **) value)[HTML_A_HREF] = href; + (*targetClass.start_element) (target, HTML_A, present, + (const char **) value, -1, 0); + +} + +/* Send Finger Command line to remote host & Check Response + * -------------------------------------------------------- + * + * On entry, + * command points to the command to be sent, including CRLF, or is null + * pointer if no command to be sent. + * On exit, + * Negative status indicates transmission error, socket closed. + * Positive status is a Finger status. + */ + +static int response(char *command, + char *sitename, + HTParentAnchor *anAnchor, + HTFormat format_out, + HTStream *sink) +{ + int status; + int length = (int) strlen(command); + int ch, i; + char line[BIG], *l, *cmd = NULL; + char *p = line, *href = NULL; + + if (length == 0) + return (-1); + + /* Set up buffering. + */ + HTInitInput(finger_fd); + + /* Send the command. + */ + CTRACE((tfp, "HTFinger command to be sent: %s", command)); + status = (int) NETWRITE(finger_fd, (char *) command, (unsigned) length); + if (status < 0) { + CTRACE((tfp, "HTFinger: Unable to send command. Disconnecting.\n")); + NETCLOSE(finger_fd); + finger_fd = -1; + return status; + } + /* if bad status */ + /* Make a hypertext object with an anchor list. + */ + target = HTML_new(anAnchor, format_out, sink); + targetClass = *target->isa; /* Copy routine entry points */ + + /* Create the results report. + */ + CTRACE((tfp, "HTFinger: Reading finger information\n")); + START(HTML_HTML); + PUTC('\n'); + START(HTML_HEAD); + PUTC('\n'); + START(HTML_TITLE); + PUTS("Finger server on "); + PUTS(sitename); + END(HTML_TITLE); + PUTC('\n'); + END(HTML_HEAD); + PUTC('\n'); + START(HTML_BODY); + PUTC('\n'); + START(HTML_H1); + PUTS("Finger server on "); + START(HTML_EM); + PUTS(sitename); + END(HTML_EM); + PUTS(": "); + if (command) { + StrAllocCopy(cmd, command); + } else { + StrAllocCopy(cmd, ""); + } + for (i = ((int) strlen(cmd) - 1); i >= 0; i--) { + if (cmd[i] == LF || cmd[i] == CR) { + cmd[i] = '\0'; + } else { + break; + } + } + PUTS(cmd); + FREE(cmd); + END(HTML_H1); + PUTC('\n'); + START(HTML_PRE); + + while ((ch = NEXT_CHAR) != EOF) { + + if (interrupted_in_htgetcharacter) { + CTRACE((tfp, + "HTFinger: Interrupted in HTGetCharacter, apparently.\n")); + _HTProgress(CONNECTION_INTERRUPTED); + goto end_html; + } + + if (ch != LF) { + *p = (char) ch; /* Put character in line */ + if (p < &line[BIG - 1]) { + p++; + } + } else { + *p = '\0'; /* Terminate line */ + /* + * OK we now have a line. + * Load it as 'l' and parse it. + */ + p = l = line; + while (*l) { + if (StrNCmp(l, STR_NEWS_URL, LEN_NEWS_URL) && + StrNCmp(l, "snews://", 8) && + StrNCmp(l, "nntp://", 7) && + StrNCmp(l, "snewspost:", 10) && + StrNCmp(l, "snewsreply:", 11) && + StrNCmp(l, "newspost:", 9) && + StrNCmp(l, "newsreply:", 10) && + StrNCmp(l, "ftp://", 6) && + StrNCmp(l, "file:/", 6) && + StrNCmp(l, "finger://", 9) && + StrNCmp(l, "http://", 7) && + StrNCmp(l, "https://", 8) && + StrNCmp(l, "wais://", 7) && + StrNCmp(l, STR_MAILTO_URL, LEN_MAILTO_URL) && + StrNCmp(l, "cso://", 6) && + StrNCmp(l, "gopher://", 9)) + PUTC(*l++); + else { + StrAllocCopy(href, l); + start_anchor(strtok(href, " \r\n\t,>)\"")); + while (*l && !strchr(" \r\n\t,>)\"", *l)) + PUTC(*l++); + END(HTML_A); + FREE(href); + } + } + PUTC('\n'); + } + } + NETCLOSE(finger_fd); + finger_fd = -1; + + end_html: + END(HTML_PRE); + PUTC('\n'); + END(HTML_BODY); + PUTC('\n'); + END(HTML_HTML); + PUTC('\n'); + FREE_TARGET; + return (0); +} + +/* Load by name HTLoadFinger + * ============ + */ +int HTLoadFinger(const char *arg, + HTParentAnchor *anAnchor, + HTFormat format_out, + HTStream *stream) +{ + static char empty[1]; + + char *username, *sitename; /* Fields extracted from URL */ + char *slash, *at_sign; /* Fields extracted from URL */ + char *command, *str, *param; /* Buffers */ + int port; /* Port number from URL */ + int status; /* tcp return */ + int result = HT_LOADED; + BOOL IsGopherURL = FALSE; + const char *p1 = arg; + + CTRACE((tfp, "HTFinger: Looking for %s\n", (arg ? arg : "NULL"))); + + if (!(arg && *arg)) { + HTAlert(COULD_NOT_LOAD_DATA); + return HT_NOT_LOADED; /* Ignore if no name */ + } + + if (!initialized) + initialized = initialize(); + if (!initialized) { + HTAlert(gettext("Could not set up finger connection.")); + return HT_NOT_LOADED; /* FAIL */ + } + + /* Set up the host and command fields. + */ + if (!strncasecomp(arg, "finger://", 9)) { + p1 = arg + 9; /* Skip "finger://" prefix */ + } else if (!strncasecomp(arg, "gopher://", 9)) { + p1 = arg + 9; /* Skip "gopher://" prefix */ + IsGopherURL = TRUE; + } + + param = 0; + sitename = StrAllocCopy(param, p1); + if (param == 0) { + HTAlert(COULD_NOT_LOAD_DATA); + return HT_NOT_LOADED; + } else if ((slash = strchr(sitename, '/')) != NULL) { + *slash++ = '\0'; + HTUnEscape(slash); + if (IsGopherURL) { + if (*slash != '0') { + HTAlert(COULD_NOT_LOAD_DATA); + return HT_NOT_LOADED; /* FAIL */ + } + *slash++ = '\0'; + } + } + + if ((at_sign = strchr(sitename, '@')) != NULL) { + if (IsGopherURL) { + HTAlert(COULD_NOT_LOAD_DATA); + return HT_NOT_LOADED; /* FAIL */ + } else { + *at_sign++ = '\0'; + username = sitename; + sitename = at_sign; + HTUnEscape(username); + } + } else if (slash) { + username = slash; + } else { + username = empty; + } + + if (*sitename == '\0') { + HTAlert(gettext("Could not load data (no sitename in finger URL)")); + result = HT_NOT_LOADED; /* Ignore if no name */ + } else if (HTParsePort(sitename, &port) != NULL) { + if (port != 79) { + HTAlert(gettext("Invalid port number - will only use port 79!")); + result = HT_NOT_LOADED; /* Ignore if wrong port */ + } + } + + if (result == HT_LOADED) { + /* Load the string for making a connection/ + */ + str = 0; + HTSprintf0(&str, "lose://%s/", sitename); + + /* Load the command for the finger server. + */ + command = 0; + if (at_sign && slash) { + if (*slash == 'w' || *slash == 'W') { + HTSprintf0(&command, "/w %s%c%c", username, CR, LF); + } else { + HTSprintf0(&command, "%s%c%c", username, CR, LF); + } + } else if (at_sign) { + HTSprintf0(&command, "%s%c%c", username, CR, LF); + } else if (*username == '/') { + if ((slash = strchr((username + 1), '/')) != NULL) { + *slash = ' '; + } + HTSprintf0(&command, "%s%c%c", username, CR, LF); + } else if ((*username == 'w' || *username == 'W') && + *(username + 1) == '/') { + if (*username + 2 != '\0') { + *(username + 1) = ' '; + } else { + *(username + 1) = '\0'; + } + HTSprintf0(&command, "/%s%c%c", username, CR, LF); + } else if ((*username == 'w' || *username == 'W') && + *(username + 1) == '\0') { + HTSprintf0(&command, "/%s%c%c", username, CR, LF); + } else if ((slash = strchr(username, '/')) != NULL) { + *slash++ = '\0'; + if (*slash == 'w' || *slash == 'W') { + HTSprintf0(&command, "/w %s%c%c", username, CR, LF); + } else { + HTSprintf0(&command, "%s%c%c", username, CR, LF); + } + } else { + HTSprintf0(&command, "%s%c%c", username, CR, LF); + } + + /* Now, let's get a stream setup up from the FingerHost: + * CONNECTING to finger host + */ + CTRACE((tfp, "HTFinger: doing HTDoConnect on '%s'\n", str)); + status = HTDoConnect(str, "finger", FINGER_PORT, &finger_fd); + CTRACE((tfp, "HTFinger: Done DoConnect; status %d\n", status)); + + if (status == HT_INTERRUPTED) { + /* Interrupt cleanly */ + CTRACE((tfp, + "HTFinger: Interrupted on connect; recovering cleanly.\n")); + HTProgress(CONNECTION_INTERRUPTED); + result = HT_NOT_LOADED; + } else if (status < 0) { + NETCLOSE(finger_fd); + finger_fd = -1; + CTRACE((tfp, "HTFinger: Unable to connect to finger host.\n")); + HTAlert(gettext("Could not access finger host.")); + result = HT_NOT_LOADED; /* FAIL */ + } else { + CTRACE((tfp, "HTFinger: Connected to finger host '%s'.\n", str)); + + /* Send the command, and process response if successful. + */ + if (response(command, sitename, anAnchor, format_out, stream) != 0) { + HTAlert(gettext("No response from finger server.")); + result = HT_NOT_LOADED; + } + } + FREE(str); + FREE(command); + } + FREE(param); + return result; +} + +#ifdef GLOBALDEF_IS_MACRO +#define _HTFINGER_C_1_INIT { "finger", HTLoadFinger, NULL } +GLOBALDEF(HTProtocol, HTFinger, _HTFINGER_C_1_INIT); +#else +GLOBALDEF HTProtocol HTFinger = +{"finger", HTLoadFinger, NULL}; +#endif /* GLOBALDEF_IS_MACRO */ + +#endif /* not DISABLE_FINGER */ diff --git a/WWW/Library/Implementation/HTFinger.h b/WWW/Library/Implementation/HTFinger.h new file mode 100644 index 00000000..071d43bc --- /dev/null +++ b/WWW/Library/Implementation/HTFinger.h @@ -0,0 +1,30 @@ +/* Finger protocol module for the WWW library */ +/* History: + * 21 Apr 94 Andrew Brooks + */ + +#ifndef HTFINGER_H +#define HTFINGER_H + +#include <HTAccess.h> +#include <HTAnchor.h> + +#ifdef __cplusplus +extern "C" { +#endif +#ifdef GLOBALREF_IS_MACRO + extern GLOBALREF (HTProtocol, HTFinger); + +#else + GLOBALREF HTProtocol HTFinger; +#endif /* GLOBALREF_IS_MACRO */ + + extern int HTLoadFinger(const char *arg, + HTParentAnchor *anAnchor, + HTFormat format_out, + HTStream *stream); + +#ifdef __cplusplus +} +#endif +#endif /* HTFINGER_H */ diff --git a/WWW/Library/Implementation/HTFormat.c b/WWW/Library/Implementation/HTFormat.c new file mode 100644 index 00000000..ef574499 --- /dev/null +++ b/WWW/Library/Implementation/HTFormat.c @@ -0,0 +1,1911 @@ +/* + * $LynxId: HTFormat.c,v 1.74 2011/06/11 12:13:09 tom Exp $ + * + * Manage different file formats HTFormat.c + * ============================= + * + * Bugs: + * Not reentrant. + * + * Assumes the incoming stream is ASCII, rather than a local file + * format, and so ALWAYS converts from ASCII on non-ASCII machines. + * Therefore, non-ASCII machines can't read local files. + * + */ + +#define HTSTREAM_INTERNAL 1 + +#include <HTUtils.h> + +/* Implements: +*/ +#include <HTFormat.h> + +static float HTMaxSecs = 1e10; /* No effective limit */ + +#ifdef UNIX +#ifdef NeXT +#define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n" +#else +#define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n" + /* Full pathname would be better! */ +#endif /* NeXT */ +#endif /* UNIX */ + +#include <HTML.h> +#include <HTMLDTD.h> +#include <HText.h> +#include <HTAlert.h> +#include <HTList.h> +#include <HTInit.h> +#include <HTTCP.h> +#include <HTTP.h> +/* Streams and structured streams which we use: +*/ +#include <HTFWriter.h> +#include <HTPlain.h> +#include <SGML.h> +#include <HTMLGen.h> + +#include <LYexit.h> +#include <LYUtils.h> +#include <GridText.h> +#include <LYGlobalDefs.h> +#include <LYLeaks.h> + +#ifdef DISP_PARTIAL +#include <LYMainLoop.h> +#endif + +BOOL HTOutputSource = NO; /* Flag: shortcut parser to stdout */ + +/* this version used by the NetToText stream */ +struct _HTStream { + const HTStreamClass *isa; + BOOL had_cr; + HTStream *sink; +}; + +/* Presentation methods + * -------------------- + */ +HTList *HTPresentations = NULL; +HTPresentation *default_presentation = NULL; + +/* + * To free off the presentation list. + */ +#ifdef LY_FIND_LEAKS +static void HTFreePresentations(void); +#endif + +/* Define a presentation system command for a content-type + * ------------------------------------------------------- + */ +void HTSetPresentation(const char *representation, + const char *command, + const char *testcommand, + double quality, + double secs, + double secs_per_byte, + long int maxbytes, + AcceptMedia media) +{ + HTPresentation *pres = typecalloc(HTPresentation); + + if (pres == NULL) + outofmem(__FILE__, "HTSetPresentation"); + + assert(pres != NULL); + assert(representation != NULL); + + CTRACE2(TRACE_CFG, + (tfp, + "HTSetPresentation rep=%s, command=%s, test=%s, qual=%f\n", + NonNull(representation), + NonNull(command), + NonNull(testcommand), + quality)); + + pres->rep = HTAtom_for(representation); + pres->rep_out = WWW_PRESENT; /* Fixed for now ... :-) */ + pres->converter = HTSaveAndExecute; /* Fixed for now ... */ + pres->quality = (float) quality; + pres->secs = (float) secs; + pres->secs_per_byte = (float) secs_per_byte; + pres->maxbytes = maxbytes; + pres->get_accept = 0; + pres->accept_opt = media; + + pres->command = NULL; + StrAllocCopy(pres->command, command); + + pres->testcommand = NULL; + StrAllocCopy(pres->testcommand, testcommand); + + /* + * Memory leak fixed. + * 05-28-94 Lynx 2-3-1 Garrett Arch Blythe + */ + if (!HTPresentations) { + HTPresentations = HTList_new(); +#ifdef LY_FIND_LEAKS + atexit(HTFreePresentations); +#endif + } + + if (strcmp(representation, "*") == 0) { + FREE(default_presentation); + default_presentation = pres; + } else { + HTList_addObject(HTPresentations, pres); + } +} + +/* Define a built-in function for a content-type + * --------------------------------------------- + */ +void HTSetConversion(const char *representation_in, + const char *representation_out, + HTConverter *converter, + double quality, + double secs, + double secs_per_byte, + long int maxbytes, + AcceptMedia media) +{ + HTPresentation *pres = typecalloc(HTPresentation); + + if (pres == NULL) + outofmem(__FILE__, "HTSetConversion"); + + assert(pres != NULL); + + CTRACE2(TRACE_CFG, + (tfp, + "HTSetConversion rep_in=%s, rep_out=%s, qual=%f\n", + NonNull(representation_in), + NonNull(representation_out), + quality)); + + pres->rep = HTAtom_for(representation_in); + pres->rep_out = HTAtom_for(representation_out); + pres->converter = converter; + pres->command = NULL; + pres->testcommand = NULL; + pres->quality = (float) quality; + pres->secs = (float) secs; + pres->secs_per_byte = (float) secs_per_byte; + pres->maxbytes = maxbytes; + pres->get_accept = TRUE; + pres->accept_opt = media; + + /* + * Memory Leak fixed. + * 05-28-94 Lynx 2-3-1 Garrett Arch Blythe + */ + if (!HTPresentations) { + HTPresentations = HTList_new(); +#ifdef LY_FIND_LEAKS + atexit(HTFreePresentations); +#endif + } + + HTList_addObject(HTPresentations, pres); +} + +#ifdef LY_FIND_LEAKS +/* + * Purpose: Free the presentation list. + * Arguments: void + * Return Value: void + * Remarks/Portability/Dependencies/Restrictions: + * Made to clean up Lynx's bad leakage. + * Revision History: + * 05-28-94 created Lynx 2-3-1 Garrett Arch Blythe + */ +static void HTFreePresentations(void) +{ + HTPresentation *pres = NULL; + + /* + * Loop through the list. + */ + while (!HTList_isEmpty(HTPresentations)) { + /* + * Free off each item. May also need to free off it's items, but not + * sure as of yet. + */ + pres = (HTPresentation *) HTList_removeLastObject(HTPresentations); + FREE(pres->command); + FREE(pres->testcommand); + FREE(pres); + } + /* + * Free the list itself. + */ + HTList_delete(HTPresentations); + HTPresentations = NULL; +} +#endif /* LY_FIND_LEAKS */ + +/* File buffering + * -------------- + * + * The input file is read using the macro which can read from + * a socket or a file. + * The input buffer size, if large will give greater efficiency and + * release the server faster, and if small will save space on PCs etc. + */ +#define INPUT_BUFFER_SIZE 4096 /* Tradeoff */ +static char input_buffer[INPUT_BUFFER_SIZE]; +static char *input_pointer; +static char *input_limit; +static int input_file_number; + +/* Set up the buffering + * + * These routines are public because they are in fact needed by + * many parsers, and on PCs and Macs we should not duplicate + * the static buffer area. + */ +void HTInitInput(int file_number) +{ + input_file_number = file_number; + input_pointer = input_limit = input_buffer; +} + +int interrupted_in_htgetcharacter = 0; +int HTGetCharacter(void) +{ + char ch; + + interrupted_in_htgetcharacter = 0; + do { + if (input_pointer >= input_limit) { + int status = NETREAD(input_file_number, + input_buffer, INPUT_BUFFER_SIZE); + + if (status <= 0) { + if (status == 0) + return EOF; + if (status == HT_INTERRUPTED) { + CTRACE((tfp, "HTFormat: Interrupted in HTGetCharacter\n")); + interrupted_in_htgetcharacter = 1; + return EOF; + } + CTRACE((tfp, "HTFormat: File read error %d\n", status)); + return EOF; /* -1 is returned by UCX + at end of HTTP link */ + } + input_pointer = input_buffer; + input_limit = input_buffer + status; + } + ch = *input_pointer++; + } while (ch == (char) 13); /* Ignore ASCII carriage return */ + + return FROMASCII(UCH(ch)); +} + +#ifdef USE_SSL +int HTGetSSLCharacter(void *handle) +{ + char ch; + + interrupted_in_htgetcharacter = 0; + if (!handle) + return (char) EOF; + do { + if (input_pointer >= input_limit) { + int status = SSL_read((SSL *) handle, + input_buffer, INPUT_BUFFER_SIZE); + + if (status <= 0) { + if (status == 0) + return (char) EOF; + if (status == HT_INTERRUPTED) { + CTRACE((tfp, + "HTFormat: Interrupted in HTGetSSLCharacter\n")); + interrupted_in_htgetcharacter = 1; + return (char) EOF; + } + CTRACE((tfp, "HTFormat: SSL_read error %d\n", status)); + return (char) EOF; /* -1 is returned by UCX + at end of HTTP link */ + } + input_pointer = input_buffer; + input_limit = input_buffer + status; + } + ch = *input_pointer++; + } while (ch == (char) 13); /* Ignore ASCII carriage return */ + + return FROMASCII(ch); +} +#endif /* USE_SSL */ + +/* Match maintype to any MIME type starting with maintype, for example: + * image/gif should match image + */ +static int half_match(char *trial_type, char *target) +{ + char *cp = strchr(trial_type, '/'); + + /* if no '/' or no '*' */ + if (!cp || *(cp + 1) != '*') + return 0; + + CTRACE((tfp, "HTFormat: comparing %s and %s for half match\n", + trial_type, target)); + + /* main type matches */ + if (!StrNCmp(trial_type, target, ((cp - trial_type) - 1))) + return 1; + + return 0; +} + +/* + * Evaluate a deferred mailcap test command, i.e.,. one that substitutes the + * document's charset or other values in %{name} format. + */ +static BOOL failsMailcap(HTPresentation *pres, HTParentAnchor *anchor) +{ + if (pres->testcommand != 0) { + if (LYTestMailcapCommand(pres->testcommand, + anchor->content_type_params) != 0) + return TRUE; + } + return FALSE; +} + +#define WWW_WILDCARD_REP_OUT HTAtom_for("*") + +/* Look up a presentation + * ---------------------- + * + * If fill_in is NULL, only look for an exact match. + * If a wildcard match is made, *fill_in is used to store + * a possibly modified presentation, and a pointer to it is + * returned. For an exact match, a pointer to the presentation + * in the HTPresentations list is returned. Returns NULL if + * nothing found. - kw + * + */ +static HTPresentation *HTFindPresentation(HTFormat rep_in, + HTFormat rep_out, + HTPresentation *fill_in, + HTParentAnchor *anchor) +{ + HTAtom *wildcard = NULL; /* = HTAtom_for("*"); lookup when needed - kw */ + int n; + int i; + HTPresentation *pres; + HTPresentation *match; + HTPresentation *strong_wildcard_match = 0; + HTPresentation *weak_wildcard_match = 0; + HTPresentation *last_default_match = 0; + HTPresentation *strong_subtype_wildcard_match = 0; + + CTRACE((tfp, "HTFormat: Looking up presentation for %s to %s\n", + HTAtom_name(rep_in), HTAtom_name(rep_out))); + + n = HTList_count(HTPresentations); + for (i = 0; i < n; i++) { + pres = (HTPresentation *) HTList_objectAt(HTPresentations, i); + if (pres->rep == rep_in) { + if (pres->rep_out == rep_out) { + if (failsMailcap(pres, anchor)) + continue; + CTRACE((tfp, "FindPresentation: found exact match: %s -> %s\n", + HTAtom_name(pres->rep), + HTAtom_name(pres->rep_out))); + return pres; + + } else if (!fill_in) { + continue; + } else { + if (!wildcard) + wildcard = WWW_WILDCARD_REP_OUT; + if (pres->rep_out == wildcard) { + if (failsMailcap(pres, anchor)) + continue; + if (!strong_wildcard_match) + strong_wildcard_match = pres; + /* otherwise use the first one */ + CTRACE((tfp, + "StreamStack: found strong wildcard match: %s -> %s\n", + HTAtom_name(pres->rep), + HTAtom_name(pres->rep_out))); + } + } + + } else if (!fill_in) { + continue; + + } else if (half_match(HTAtom_name(pres->rep), + HTAtom_name(rep_in))) { + if (pres->rep_out == rep_out) { + if (failsMailcap(pres, anchor)) + continue; + if (!strong_subtype_wildcard_match) + strong_subtype_wildcard_match = pres; + /* otherwise use the first one */ + CTRACE((tfp, + "StreamStack: found strong subtype wildcard match: %s -> %s\n", + HTAtom_name(pres->rep), + HTAtom_name(pres->rep_out))); + } + } + + if (pres->rep == WWW_SOURCE) { + if (pres->rep_out == rep_out) { + if (failsMailcap(pres, anchor)) + continue; + if (!weak_wildcard_match) + weak_wildcard_match = pres; + /* otherwise use the first one */ + CTRACE((tfp, + "StreamStack: found weak wildcard match: %s\n", + HTAtom_name(pres->rep_out))); + + } else if (!last_default_match) { + if (!wildcard) + wildcard = WWW_WILDCARD_REP_OUT; + if (pres->rep_out == wildcard) { + if (failsMailcap(pres, anchor)) + continue; + last_default_match = pres; + /* otherwise use the first one */ + } + } + } + } + + match = (strong_subtype_wildcard_match + ? strong_subtype_wildcard_match + : (strong_wildcard_match + ? strong_wildcard_match + : (weak_wildcard_match + ? weak_wildcard_match + : last_default_match))); + + if (match) { + *fill_in = *match; /* Specific instance */ + fill_in->rep = rep_in; /* yuk */ + fill_in->rep_out = rep_out; /* yuk */ + return fill_in; + } + + return NULL; +} + +/* Create a filter stack + * --------------------- + * + * If a wildcard match is made, a temporary HTPresentation + * structure is made to hold the destination format while the + * new stack is generated. This is just to pass the out format to + * MIME so far. Storing the format of a stream in the stream might + * be a lot neater. + * + */ +HTStream *HTStreamStack(HTFormat rep_in, + HTFormat rep_out, + HTStream *sink, + HTParentAnchor *anchor) +{ + HTPresentation temp; + HTPresentation *match; + HTStream *result; + + CTRACE((tfp, "HTFormat: Constructing stream stack for %s to %s (%s)\n", + HTAtom_name(rep_in), + HTAtom_name(rep_out), + NONNULL(anchor->content_type_params))); + + /* don't return on WWW_SOURCE some people might like + * to make use of the source!!!! LJM + */ +#if 0 + if (rep_out == WWW_SOURCE || rep_out == rep_in) + return sink; /* LJM */ +#endif + + if (rep_out == rep_in) { + result = sink; + + } else if ((match = HTFindPresentation(rep_in, rep_out, &temp, anchor))) { + if (match == &temp) { + CTRACE((tfp, "StreamStack: Using %s\n", HTAtom_name(temp.rep_out))); + } else { + CTRACE((tfp, "StreamStack: found exact match: %s -> %s\n", + HTAtom_name(match->rep), + HTAtom_name(match->rep_out))); + } + result = (*match->converter) (match, anchor, sink); + } else { + result = NULL; + } + if (TRACE) { + if (result && result->isa && result->isa->name) { + CTRACE((tfp, "StreamStack: Returning \"%s\"\n", result->isa->name)); + } else if (result) { + CTRACE((tfp, "StreamStack: Returning *unknown* stream!\n")); + } else { + CTRACE((tfp, "StreamStack: Returning NULL!\n")); + CTRACE_FLUSH(tfp); /* a crash may be imminent... - kw */ + } + } + return result; +} + +/* Put a presentation near start of list + * ------------------------------------- + * + * Look up a presentation (exact match only) and, if found, reorder + * it to the start of the HTPresentations list. - kw + */ +void HTReorderPresentation(HTFormat rep_in, + HTFormat rep_out) +{ + HTPresentation *match; + + if ((match = HTFindPresentation(rep_in, rep_out, NULL, NULL))) { + HTList_removeObject(HTPresentations, match); + HTList_addObject(HTPresentations, match); + } +} + +/* + * Setup 'get_accept' flag to denote presentations that are not redundant, + * and will be listed in "Accept:" header. + */ +void HTFilterPresentations(void) +{ + int i, j; + int n = HTList_count(HTPresentations); + HTPresentation *p, *q; + BOOL matched; + char *s, *t; + + CTRACE((tfp, "HTFilterPresentations (AcceptMedia %#x)\n", LYAcceptMedia)); + for (i = 0; i < n; i++) { + p = (HTPresentation *) HTList_objectAt(HTPresentations, i); + s = HTAtom_name(p->rep); + + p->get_accept = FALSE; + if ((LYAcceptMedia & p->accept_opt) != 0 + && p->rep_out == WWW_PRESENT + && p->rep != WWW_SOURCE + && strcasecomp(s, "www/mime") + && strcasecomp(s, "www/compressed") + && p->quality <= 1.0 && p->quality >= 0.0) { + matched = TRUE; + for (j = 0; j < i; j++) { + q = (HTPresentation *) HTList_objectAt(HTPresentations, j); + t = HTAtom_name(q->rep); + + if (!strcasecomp(s, t)) { + matched = FALSE; + CTRACE((tfp, " match %s %s\n", s, t)); + break; + } + } + p->get_accept = matched; + } + } +} + +/* Find the cost of a filter stack + * ------------------------------- + * + * Must return the cost of the same stack which StreamStack would set up. + * + * On entry, + * length The size of the data to be converted + */ +float HTStackValue(HTFormat rep_in, + HTFormat rep_out, + double initial_value, + long int length) +{ + HTAtom *wildcard = WWW_WILDCARD_REP_OUT; + + CTRACE((tfp, "HTFormat: Evaluating stream stack for %s worth %.3f to %s\n", + HTAtom_name(rep_in), initial_value, HTAtom_name(rep_out))); + + if (rep_out == WWW_SOURCE || rep_out == rep_in) + return 0.0; + + { + int n = HTList_count(HTPresentations); + int i; + HTPresentation *pres; + + for (i = 0; i < n; i++) { + pres = (HTPresentation *) HTList_objectAt(HTPresentations, i); + if (pres->rep == rep_in && + (pres->rep_out == rep_out || pres->rep_out == wildcard)) { + float value = (float) (initial_value * pres->quality); + + if (HTMaxSecs > 0.0) + value = (value + - ((float) length * pres->secs_per_byte + + pres->secs) + / HTMaxSecs); + return value; + } + } + } + + return (float) -1e30; /* Really bad */ + +} + +/* Display the page while transfer in progress + * ------------------------------------------- + * + * Repaint the page only when necessary. + * This is a traverse call for HText_pageDisplay() - it works!. + * + */ +void HTDisplayPartial(void) +{ +#ifdef DISP_PARTIAL + if (display_partial) { + /* + * HText_getNumOfLines() = "current" number of complete lines received + * NumOfLines_partial = number of lines at the moment of last repaint. + * (we update NumOfLines_partial only when we repaint the display.) + * + * display_partial could only be enabled in HText_new() so a new + * HTMainText object available - all HText_ functions use it, lines + * counter HText_getNumOfLines() in particular. + * + * Otherwise HTMainText holds info from the previous document and we + * may repaint it instead of the new one: prev doc scrolled to the + * first line (=Newline_partial) is not good looking :-) 23 Aug 1998 + * Leonid Pauzner + * + * So repaint the page only when necessary: + */ + int Newline_partial = LYGetNewline(); + + if (((Newline_partial + display_lines) - 1 > NumOfLines_partial) + /* current page not complete... */ + && (partial_threshold > 0 ? + ((Newline_partial + partial_threshold) - 1 <= + HText_getNumOfLines()) : + ((Newline_partial + display_lines) - 1 <= HText_getNumOfLines())) + /* + * Originally we rendered by increments of 2 lines, + * but that got annoying on slow network connections. + * Then we switched to full-pages. Now it's configurable. + * If partial_threshold <= 0, then it's a full page + */ + ) { + if (LYMainLoop_pageDisplay(Newline_partial)) + NumOfLines_partial = HText_getNumOfLines(); + } + } +#else /* nothing */ +#endif /* DISP_PARTIAL */ +} + +/* Put this as early as possible, OK just after HTDisplayPartial() */ +void HTFinishDisplayPartial(void) +{ +#ifdef DISP_PARTIAL + /* + * End of incremental rendering stage here. + */ + display_partial = FALSE; +#endif /* DISP_PARTIAL */ +} + +/* Push data from a socket down a stream + * ------------------------------------- + * + * This routine is responsible for creating and PRESENTING any + * graphic (or other) objects described by the file. + * + * The file number given is assumed to be a TELNET stream, i.e., containing + * CRLF at the end of lines which need to be stripped to LF for unix + * when the format is textual. + * + * State of socket and target stream on entry: + * socket (file_number) assumed open, + * target (sink) assumed valid. + * + * Return values: + * HT_INTERRUPTED Interruption or error after some data received. + * -2 Unexpected disconnect before any data received. + * -1 Interruption or error before any data received, or + * (UNIX) other read error before any data received, or + * download cancelled. + * HT_LOADED Normal close of socket (end of file indication + * received), or + * unexpected disconnect after some data received, or + * other read error after some data received, or + * (not UNIX) other read error before any data received. + * + * State of socket and target stream on return depends on return value: + * HT_INTERRUPTED socket still open, target aborted. + * -2 socket still open, target stream still valid. + * -1 socket still open, target aborted. + * otherwise socket closed, target stream still valid. + */ +int HTCopy(HTParentAnchor *anchor, + int file_number, + void *handle GCC_UNUSED, + HTStream *sink) +{ + HTStreamClass targetClass; + BOOL suppress_readprogress = NO; + off_t bytes; + int rv = 0; + + /* Push the data down the stream + */ + targetClass = *(sink->isa); /* Copy pointers to procedures */ + + /* + * Push binary from socket down sink + * + * This operation could be put into a main event loop + */ + HTReadProgress(bytes = 0, (off_t) 0); + for (;;) { + int status; + + if (LYCancelDownload) { + LYCancelDownload = FALSE; + (*targetClass._abort) (sink, NULL); + rv = -1; + goto finished; + } + + if (HTCheckForInterrupt()) { + _HTProgress(TRANSFER_INTERRUPTED); + (*targetClass._abort) (sink, NULL); + if (bytes) + rv = HT_INTERRUPTED; + else + rv = -1; + goto finished; + } +#ifdef USE_SSL + if (handle) + status = SSL_read((SSL *) handle, input_buffer, INPUT_BUFFER_SIZE); + else + status = NETREAD(file_number, input_buffer, INPUT_BUFFER_SIZE); +#else + status = NETREAD(file_number, input_buffer, INPUT_BUFFER_SIZE); +#endif /* USE_SSL */ + + if (status <= 0) { + if (status == 0) { + break; + } else if (status == HT_INTERRUPTED) { + _HTProgress(TRANSFER_INTERRUPTED); + (*targetClass._abort) (sink, NULL); + if (bytes) + rv = HT_INTERRUPTED; + else + rv = -1; + goto finished; + } else if (SOCKET_ERRNO == ENOTCONN || +#ifdef _WINDOWS /* 1997/11/10 (Mon) 16:57:18 */ + SOCKET_ERRNO == ETIMEDOUT || +#endif + SOCKET_ERRNO == ECONNRESET || + SOCKET_ERRNO == EPIPE) { + /* + * Arrrrgh, HTTP 0/1 compatibility problem, maybe. + */ + if (bytes <= 0) { + /* + * Don't have any data, so let the calling function decide + * what to do about it. - FM + */ + rv = -2; + goto finished; + } else { +#ifdef UNIX + /* + * Treat what we've received already as the complete + * transmission, but not without giving the user an alert. + * I don't know about all the different TCP stacks for VMS + * etc., so this is currently only for UNIX. - kw + */ + HTInetStatus("NETREAD"); + HTAlert("Unexpected server disconnect."); + CTRACE((tfp, + "HTCopy: Unexpected server disconnect. Treating as completed.\n")); +#else /* !UNIX */ + /* + * Treat what we've gotten already as the complete + * transmission. - FM + */ + CTRACE((tfp, + "HTCopy: Unexpected server disconnect. Treating as completed.\n")); + status = 0; +#endif /* UNIX */ + } +#ifdef UNIX + } else { /* status < 0 and other errno */ + /* + * Treat what we've received already as the complete + * transmission, but not without giving the user an alert. I + * don't know about all the different TCP stacks for VMS etc., + * so this is currently only for UNIX. - kw + */ + HTInetStatus("NETREAD"); + HTAlert("Unexpected read error."); + if (bytes) { + (void) NETCLOSE(file_number); + rv = HT_LOADED; + } else { + (*targetClass._abort) (sink, NULL); + rv = -1; + } + goto finished; +#endif + } + break; + } + + /* + * Suppress ReadProgress messages when collecting a redirection + * message, at least initially (unless/until anchor->content_type gets + * changed, probably by the MIME message parser). That way messages + * put up by the HTTP module or elsewhere can linger in the statusline + * for a while. - kw + */ + suppress_readprogress = (BOOL) (anchor && anchor->content_type && + !strcmp(anchor->content_type, + "message/x-http-redirection")); +#ifdef NOT_ASCII + { + char *p; + + for (p = input_buffer; p < input_buffer + status; p++) { + *p = FROMASCII(*p); + } + } +#endif /* NOT_ASCII */ + + (*targetClass.put_block) (sink, input_buffer, status); + bytes += status; + if (!suppress_readprogress) + HTReadProgress(bytes, (off_t) (anchor ? anchor->content_length : 0)); + HTDisplayPartial(); + + } /* next bufferload */ + + _HTProgress(TRANSFER_COMPLETE); + (void) NETCLOSE(file_number); + rv = HT_LOADED; + + finished: + HTFinishDisplayPartial(); + return (rv); +} + +/* Push data from a file pointer down a stream + * ------------------------------------- + * + * This routine is responsible for creating and PRESENTING any + * graphic (or other) objects described by the file. + * + * + * State of file and target stream on entry: + * FILE* (fp) assumed open, + * target (sink) assumed valid. + * + * Return values: + * HT_INTERRUPTED Interruption after some data read. + * HT_PARTIAL_CONTENT Error after some data read. + * -1 Error before any data read. + * HT_LOADED Normal end of file indication on reading. + * + * State of file and target stream on return: + * always fp still open, target stream still valid. + */ +int HTFileCopy(FILE *fp, HTStream *sink) +{ + HTStreamClass targetClass; + int status; + off_t bytes; + int rv = HT_OK; + + /* Push the data down the stream + */ + targetClass = *(sink->isa); /* Copy pointers to procedures */ + + /* Push binary from socket down sink + */ + HTReadProgress(bytes = 0, (off_t) 0); + for (;;) { + status = (int) fread(input_buffer, + (size_t) 1, + (size_t) INPUT_BUFFER_SIZE, fp); + if (status == 0) { /* EOF or error */ + if (ferror(fp) == 0) { + rv = HT_LOADED; + break; + } + CTRACE((tfp, "HTFormat: Read error, read returns %d\n", + ferror(fp))); + if (bytes) { + rv = HT_PARTIAL_CONTENT; + } else { + rv = -1; + } + break; + } + + (*targetClass.put_block) (sink, input_buffer, status); + bytes += status; + HTReadProgress(bytes, (off_t) 0); + /* Suppress last screen update in partial mode - a regular update under + * control of mainloop() should follow anyway. - kw + */ +#ifdef DISP_PARTIAL + if (display_partial && bytes != HTMainAnchor->content_length) + HTDisplayPartial(); +#endif + + if (HTCheckForInterrupt()) { + _HTProgress(TRANSFER_INTERRUPTED); + if (bytes) { + rv = HT_INTERRUPTED; + } else { + rv = -1; + } + break; + } + } /* next bufferload */ + + HTFinishDisplayPartial(); + return rv; +} + +#ifdef USE_SOURCE_CACHE +/* Push data from an HTChunk down a stream + * --------------------------------------- + * + * This routine is responsible for creating and PRESENTING any + * graphic (or other) objects described by the file. + * + * State of memory and target stream on entry: + * HTChunk* (chunk) and target (sink) assumed valid. + * + * Return values: + * HT_LOADED All data sent. + * HT_INTERRUPTED Interruption after some data read. + * + * State of memory and target stream on return: + * always chunk unchanged, target stream still valid. + */ +int HTMemCopy(HTChunk *chunk, HTStream *sink) +{ + HTStreamClass targetClass; + off_t bytes; + int rv = HT_OK; + + targetClass = *(sink->isa); + HTReadProgress(bytes = 0, (off_t) 0); + for (; chunk != NULL; chunk = chunk->next) { + + /* Push the data down the stream a piece at a time, in case we're + * running a large document on a slow machine. + */ + (*targetClass.put_block) (sink, chunk->data, chunk->size); + bytes += chunk->size; + + HTReadProgress(bytes, (off_t) 0); + HTDisplayPartial(); + + if (HTCheckForInterrupt()) { + _HTProgress(TRANSFER_INTERRUPTED); + if (bytes) { + rv = HT_INTERRUPTED; + } else { + rv = -1; + } + break; + } + } + + HTFinishDisplayPartial(); + return rv; +} +#endif + +#ifdef USE_ZLIB +/* Push data from a gzip file pointer down a stream + * ------------------------------------- + * + * This routine is responsible for creating and PRESENTING any + * graphic (or other) objects described by the file. + * + * + * State of file and target stream on entry: + * gzFile (gzfp) assumed open (should have gzipped content), + * target (sink) assumed valid. + * + * Return values: + * HT_INTERRUPTED Interruption after some data read. + * HT_PARTIAL_CONTENT Error after some data read. + * -1 Error before any data read. + * HT_LOADED Normal end of file indication on reading. + * + * State of file and target stream on return: + * always gzfp still open, target stream still valid. + */ +static int HTGzFileCopy(gzFile gzfp, HTStream *sink) +{ + HTStreamClass targetClass; + int status; + off_t bytes; + int gzerrnum; + int rv = HT_OK; + + /* Push the data down the stream + */ + targetClass = *(sink->isa); /* Copy pointers to procedures */ + + /* read and inflate gzip'd file, and push binary down sink + */ + HTReadProgress(bytes = 0, (off_t) 0); + for (;;) { + status = gzread(gzfp, input_buffer, INPUT_BUFFER_SIZE); + if (status <= 0) { /* EOF or error */ + if (status == 0) { + rv = HT_LOADED; + break; + } + CTRACE((tfp, "HTGzFileCopy: Read error, gzread returns %d\n", + status)); + CTRACE((tfp, "gzerror : %s\n", + gzerror(gzfp, &gzerrnum))); + if (TRACE) { + if (gzerrnum == Z_ERRNO) + perror("gzerror "); + } + if (bytes) { + rv = HT_PARTIAL_CONTENT; + } else { + rv = -1; + } + break; + } + + (*targetClass.put_block) (sink, input_buffer, status); + bytes += status; + HTReadProgress(bytes, (off_t) -1); + HTDisplayPartial(); + + if (HTCheckForInterrupt()) { + _HTProgress(TRANSFER_INTERRUPTED); + if (bytes) { + rv = HT_INTERRUPTED; + } else { + rv = -1; + } + break; + } + } /* next bufferload */ + + HTFinishDisplayPartial(); + return rv; +} + +#ifndef HAVE_ZERROR +#define zError(s) LynxZError(s) +static const char *zError(int status) +{ + static char result[80]; + + sprintf(result, "zlib error %d", status); + return result; +} +#endif + +/* Push data from a deflate file pointer down a stream + * ------------------------------------- + * + * This routine is responsible for creating and PRESENTING any + * graphic (or other) objects described by the file. The code is + * loosely based on the inflate.c file from w3m. + * + * + * State of file and target stream on entry: + * FILE (zzfp) assumed open (should have deflated content), + * target (sink) assumed valid. + * + * Return values: + * HT_INTERRUPTED Interruption after some data read. + * HT_PARTIAL_CONTENT Error after some data read. + * -1 Error before any data read. + * HT_LOADED Normal end of file indication on reading. + * + * State of file and target stream on return: + * always zzfp still open, target stream still valid. + */ +static int HTZzFileCopy(FILE *zzfp, HTStream *sink) +{ + static char dummy_head[1 + 1] = + { + 0x8 + 0x7 * 0x10, + (((0x8 + 0x7 * 0x10) * 0x100 + 30) / 31 * 31) & 0xFF, + }; + + z_stream s; + HTStreamClass targetClass; + off_t bytes; + int rv = HT_OK; + char output_buffer[INPUT_BUFFER_SIZE]; + int status; + int flush; + int retry = 0; + int len = 0; + + /* Push the data down the stream + */ + targetClass = *(sink->isa); /* Copy pointers to procedures */ + + s.zalloc = Z_NULL; + s.zfree = Z_NULL; + s.opaque = Z_NULL; + status = inflateInit(&s); + if (status != Z_OK) { + CTRACE((tfp, "HTZzFileCopy inflateInit() %s\n", zError(status))); + exit_immediately(EXIT_FAILURE); + } + s.avail_in = 0; + s.next_out = (Bytef *) output_buffer; + s.avail_out = sizeof(output_buffer); + flush = Z_NO_FLUSH; + + /* read and inflate deflate'd file, and push binary down sink + */ + HTReadProgress(bytes = 0, (off_t) 0); + for (;;) { + if (s.avail_in == 0) { + s.next_in = (Bytef *) input_buffer; + s.avail_in = (uInt) fread(input_buffer, + (size_t) 1, + (size_t) INPUT_BUFFER_SIZE, zzfp); + len = (int) s.avail_in; + } + status = inflate(&s, flush); + if (status == Z_STREAM_END || status == Z_BUF_ERROR) { + len = (int) sizeof(output_buffer) - (int) s.avail_out; + if (len > 0) { + (*targetClass.put_block) (sink, output_buffer, len); + bytes += len; + HTReadProgress(bytes, (off_t) -1); + HTDisplayPartial(); + } + rv = HT_LOADED; + break; + } else if (status == Z_DATA_ERROR && !retry++) { + status = inflateReset(&s); + if (status != Z_OK) { + CTRACE((tfp, "HTZzFileCopy inflateReset() %s\n", zError(status))); + rv = bytes ? HT_PARTIAL_CONTENT : -1; + break; + } + s.next_in = (Bytef *) dummy_head; + s.avail_in = sizeof(dummy_head); + (void) inflate(&s, flush); + s.next_in = (Bytef *) input_buffer; + s.avail_in = (unsigned) len; + continue; + } else if (status != Z_OK) { + CTRACE((tfp, "HTZzFileCopy inflate() %s\n", zError(status))); + rv = bytes ? HT_PARTIAL_CONTENT : -1; + break; + } else if (s.avail_out == 0) { + len = sizeof(output_buffer); + s.next_out = (Bytef *) output_buffer; + s.avail_out = sizeof(output_buffer); + + (*targetClass.put_block) (sink, output_buffer, len); + bytes += len; + HTReadProgress(bytes, (off_t) -1); + HTDisplayPartial(); + + if (HTCheckForInterrupt()) { + _HTProgress(TRANSFER_INTERRUPTED); + rv = bytes ? HT_INTERRUPTED : -1; + break; + } + } + retry = 1; + } /* next bufferload */ + + inflateEnd(&s); + HTFinishDisplayPartial(); + return rv; +} +#endif /* USE_ZLIB */ + +#ifdef USE_BZLIB +/* Push data from a bzip file pointer down a stream + * ------------------------------------- + * + * This routine is responsible for creating and PRESENTING any + * graphic (or other) objects described by the file. + * + * + * State of file and target stream on entry: + * BZFILE (bzfp) assumed open (should have bzipped content), + * target (sink) assumed valid. + * + * Return values: + * HT_INTERRUPTED Interruption after some data read. + * HT_PARTIAL_CONTENT Error after some data read. + * -1 Error before any data read. + * HT_LOADED Normal end of file indication on reading. + * + * State of file and target stream on return: + * always bzfp still open, target stream still valid. + */ +static int HTBzFileCopy(BZFILE * bzfp, HTStream *sink) +{ + HTStreamClass targetClass; + int status; + off_t bytes; + int bzerrnum; + int rv = HT_OK; + + /* Push the data down the stream + */ + targetClass = *(sink->isa); /* Copy pointers to procedures */ + + /* read and inflate bzip'd file, and push binary down sink + */ + HTReadProgress(bytes = 0, (off_t) 0); + for (;;) { + status = BZ2_bzread(bzfp, input_buffer, INPUT_BUFFER_SIZE); + if (status <= 0) { /* EOF or error */ + if (status == 0) { + rv = HT_LOADED; + break; + } + CTRACE((tfp, "HTBzFileCopy: Read error, bzread returns %d\n", + status)); + CTRACE((tfp, "bzerror : %s\n", + BZ2_bzerror(bzfp, &bzerrnum))); + if (bytes) { + rv = HT_PARTIAL_CONTENT; + } else { + rv = -1; + } + break; + } + + (*targetClass.put_block) (sink, input_buffer, status); + bytes += status; + HTReadProgress(bytes, (off_t) -1); + HTDisplayPartial(); + + if (HTCheckForInterrupt()) { + _HTProgress(TRANSFER_INTERRUPTED); + if (bytes) { + rv = HT_INTERRUPTED; + } else { + rv = -1; + } + break; + } + } /* next bufferload */ + + HTFinishDisplayPartial(); + return rv; +} +#endif /* USE_BZLIB */ + +/* Push data from a socket down a stream STRIPPING CR + * -------------------------------------------------- + * + * This routine is responsible for creating and PRESENTING any + * graphic (or other) objects described by the socket. + * + * The file number given is assumed to be a TELNET stream ie containing + * CRLF at the end of lines which need to be stripped to LF for unix + * when the format is textual. + * + */ +void HTCopyNoCR(HTParentAnchor *anchor GCC_UNUSED, + int file_number, + HTStream *sink) +{ + HTStreamClass targetClass; + int character; + + /* Push the data, ignoring CRLF, down the stream + */ + targetClass = *(sink->isa); /* Copy pointers to procedures */ + + /* + * Push text from telnet socket down sink + * + * @@@@@ To push strings could be faster? (especially is we cheat and + * don't ignore CR! :-} + */ + HTInitInput(file_number); + for (;;) { + character = HTGetCharacter(); + if (character == EOF) + break; + (*targetClass.put_character) (sink, (char) character); + } +} + +/* Parse a socket given format and file number + * + * This routine is responsible for creating and PRESENTING any + * graphic (or other) objects described by the file. + * + * The file number given is assumed to be a TELNET stream ie containing + * CRLF at the end of lines which need to be stripped to LF for unix + * when the format is textual. + * + * State of socket and target stream on entry: + * socket (file_number) assumed open, + * target (sink) usually NULL (will call stream stack). + * + * Return values: + * HT_INTERRUPTED Interruption or error after some data received. + * -501 Stream stack failed (cannot present or convert). + * -2 Unexpected disconnect before any data received. + * -1 Stream stack failed (cannot present or convert), or + * Interruption or error before any data received, or + * (UNIX) other read error before any data received, or + * download cancelled. + * HT_LOADED Normal close of socket (end of file indication + * received), or + * unexpected disconnect after some data received, or + * other read error after some data received, or + * (not UNIX) other read error before any data received. + * + * State of socket and target stream on return depends on return value: + * HT_INTERRUPTED socket still open, target aborted. + * -501 socket still open, target stream NULL. + * -2 socket still open, target freed. + * -1 socket still open, target stream aborted or NULL. + * otherwise socket closed, target stream freed. + */ +int HTParseSocket(HTFormat rep_in, + HTFormat format_out, + HTParentAnchor *anchor, + int file_number, + HTStream *sink) +{ + HTStream *stream; + HTStreamClass targetClass; + int rv; + + stream = HTStreamStack(rep_in, format_out, sink, anchor); + + if (!stream) { + char *buffer = 0; + + if (LYCancelDownload) { + LYCancelDownload = FALSE; + return -1; + } + HTSprintf0(&buffer, CANNOT_CONVERT_I_TO_O, + HTAtom_name(rep_in), HTAtom_name(format_out)); + CTRACE((tfp, "HTFormat: %s\n", buffer)); + rv = HTLoadError(sink, 501, buffer); /* returns -501 */ + FREE(buffer); + } else { + /* + * Push the data, don't worry about CRLF we can strip them later. + */ + targetClass = *(stream->isa); /* Copy pointers to procedures */ + rv = HTCopy(anchor, file_number, NULL, stream); + if (rv != -1 && rv != HT_INTERRUPTED) + (*targetClass._free) (stream); + } + return rv; + /* Originally: full: HT_LOADED; partial: HT_INTERRUPTED; no bytes: -1 */ +} + +/* Parse a file given format and file pointer + * + * This routine is responsible for creating and PRESENTING any + * graphic (or other) objects described by the file. + * + * The file number given is assumed to be a TELNET stream ie containing + * CRLF at the end of lines which need to be stripped to \n for unix + * when the format is textual. + * + * State of file and target stream on entry: + * FILE* (fp) assumed open, + * target (sink) usually NULL (will call stream stack). + * + * Return values: + * -501 Stream stack failed (cannot present or convert). + * -1 Download cancelled. + * HT_NO_DATA Error before any data read. + * HT_PARTIAL_CONTENT Interruption or error after some data read. + * HT_LOADED Normal end of file indication on reading. + * + * State of file and target stream on return: + * always fp still open; target freed, aborted, or NULL. + */ +int HTParseFile(HTFormat rep_in, + HTFormat format_out, + HTParentAnchor *anchor, + FILE *fp, + HTStream *sink) +{ + HTStream *stream; + HTStreamClass targetClass; + int rv; + + if (fp == NULL) + return HT_LOADED; + + stream = HTStreamStack(rep_in, format_out, sink, anchor); + + if (!stream) { + char *buffer = 0; + + if (LYCancelDownload) { + LYCancelDownload = FALSE; + return -1; + } + HTSprintf0(&buffer, CANNOT_CONVERT_I_TO_O, + HTAtom_name(rep_in), HTAtom_name(format_out)); + CTRACE((tfp, "HTFormat(in HTParseFile): %s\n", buffer)); + rv = HTLoadError(sink, 501, buffer); + FREE(buffer); + return rv; + } + + /* + * Push the data down the stream + * + * @@ Bug: This decision ought to be made based on "encoding" rather than + * on content-type. @@@ When we handle encoding. The current method + * smells anyway. + */ + targetClass = *(stream->isa); /* Copy pointers to procedures */ + rv = HTFileCopy(fp, stream); + if (rv == -1 || rv == HT_INTERRUPTED) { + (*targetClass._abort) (stream, NULL); + } else { + (*targetClass._free) (stream); + } + + if (rv == -1) + return HT_NO_DATA; + else if (rv == HT_INTERRUPTED || (rv > 0 && rv != HT_LOADED)) + return HT_PARTIAL_CONTENT; + else + return HT_LOADED; +} + +#ifdef USE_SOURCE_CACHE +/* Parse a document in memory given format and memory block pointer + * + * This routine is responsible for creating and PRESENTING any + * graphic (or other) objects described by the file. + * + * State of memory and target stream on entry: + * HTChunk* (chunk) assumed valid, + * target (sink) usually NULL (will call stream stack). + * + * Return values: + * -501 Stream stack failed (cannot present or convert). + * HT_LOADED All data sent. + * + * State of memory and target stream on return: + * always chunk unchanged; target freed, aborted, or NULL. + */ +int HTParseMem(HTFormat rep_in, + HTFormat format_out, + HTParentAnchor *anchor, + HTChunk *chunk, + HTStream *sink) +{ + HTStream *stream; + HTStreamClass targetClass; + int rv; + + stream = HTStreamStack(rep_in, format_out, sink, anchor); + if (!stream) { + char *buffer = 0; + + HTSprintf0(&buffer, CANNOT_CONVERT_I_TO_O, + HTAtom_name(rep_in), HTAtom_name(format_out)); + CTRACE((tfp, "HTFormat(in HTParseMem): %s\n", buffer)); + rv = HTLoadError(sink, 501, buffer); + FREE(buffer); + return rv; + } + + /* Push the data down the stream + */ + targetClass = *(stream->isa); + (void) HTMemCopy(chunk, stream); + (*targetClass._free) (stream); + return HT_LOADED; +} +#endif + +#ifdef USE_ZLIB +static int HTCloseGzFile(gzFile gzfp) +{ + int gzres; + + if (gzfp == NULL) + return 0; + gzres = gzclose(gzfp); + if (TRACE) { + if (gzres == Z_ERRNO) { + perror("gzclose "); + } else if (gzres != Z_OK) { + CTRACE((tfp, "gzclose : error number %d\n", gzres)); + } + } + return (gzres); +} + +/* HTParseGzFile + * + * State of file and target stream on entry: + * gzFile (gzfp) assumed open, + * target (sink) usually NULL (will call stream stack). + * + * Return values: + * -501 Stream stack failed (cannot present or convert). + * -1 Download cancelled. + * HT_NO_DATA Error before any data read. + * HT_PARTIAL_CONTENT Interruption or error after some data read. + * HT_LOADED Normal end of file indication on reading. + * + * State of file and target stream on return: + * always gzfp closed; target freed, aborted, or NULL. + */ +int HTParseGzFile(HTFormat rep_in, + HTFormat format_out, + HTParentAnchor *anchor, + gzFile gzfp, + HTStream *sink) +{ + HTStream *stream; + HTStreamClass targetClass; + int rv; + + stream = HTStreamStack(rep_in, format_out, sink, anchor); + + if (!stream) { + char *buffer = 0; + + HTCloseGzFile(gzfp); + if (LYCancelDownload) { + LYCancelDownload = FALSE; + return -1; + } + HTSprintf0(&buffer, CANNOT_CONVERT_I_TO_O, + HTAtom_name(rep_in), HTAtom_name(format_out)); + CTRACE((tfp, "HTFormat(in HTParseGzFile): %s\n", buffer)); + rv = HTLoadError(sink, 501, buffer); + FREE(buffer); + return rv; + } + + /* + * Push the data down the stream + * + * @@ Bug: This decision ought to be made based on "encoding" rather than + * on content-type. @@@ When we handle encoding. The current method + * smells anyway. + */ + targetClass = *(stream->isa); /* Copy pointers to procedures */ + rv = HTGzFileCopy(gzfp, stream); + if (rv == -1 || rv == HT_INTERRUPTED) { + (*targetClass._abort) (stream, NULL); + } else { + (*targetClass._free) (stream); + } + + HTCloseGzFile(gzfp); + if (rv == -1) + return HT_NO_DATA; + else if (rv == HT_INTERRUPTED || (rv > 0 && rv != HT_LOADED)) + return HT_PARTIAL_CONTENT; + else + return HT_LOADED; +} + +/* HTParseZzFile + * + * State of file and target stream on entry: + * FILE (zzfp) assumed open, + * target (sink) usually NULL (will call stream stack). + * + * Return values: + * -501 Stream stack failed (cannot present or convert). + * -1 Download cancelled. + * HT_NO_DATA Error before any data read. + * HT_PARTIAL_CONTENT Interruption or error after some data read. + * HT_LOADED Normal end of file indication on reading. + * + * State of file and target stream on return: + * always zzfp closed; target freed, aborted, or NULL. + */ +int HTParseZzFile(HTFormat rep_in, + HTFormat format_out, + HTParentAnchor *anchor, + FILE *zzfp, + HTStream *sink) +{ + HTStream *stream; + HTStreamClass targetClass; + int rv; + + stream = HTStreamStack(rep_in, format_out, sink, anchor); + + if (!stream) { + char *buffer = 0; + + fclose(zzfp); + if (LYCancelDownload) { + LYCancelDownload = FALSE; + return -1; + } + HTSprintf0(&buffer, CANNOT_CONVERT_I_TO_O, + HTAtom_name(rep_in), HTAtom_name(format_out)); + CTRACE((tfp, "HTFormat(in HTParseGzFile): %s\n", buffer)); + rv = HTLoadError(sink, 501, buffer); + FREE(buffer); + return rv; + } + + /* + * Push the data down the stream + * + * @@ Bug: This decision ought to be made based on "encoding" rather than + * on content-type. @@@ When we handle encoding. The current method + * smells anyway. + */ + targetClass = *(stream->isa); /* Copy pointers to procedures */ + rv = HTZzFileCopy(zzfp, stream); + if (rv == -1 || rv == HT_INTERRUPTED) { + (*targetClass._abort) (stream, NULL); + } else { + (*targetClass._free) (stream); + } + + fclose(zzfp); + if (rv == -1) + return HT_NO_DATA; + else if (rv == HT_INTERRUPTED || (rv > 0 && rv != HT_LOADED)) + return HT_PARTIAL_CONTENT; + else + return HT_LOADED; +} +#endif /* USE_ZLIB */ + +#ifdef USE_BZLIB +static void HTCloseBzFile(BZFILE * bzfp) +{ + if (bzfp) + BZ2_bzclose(bzfp); +} + +/* HTParseBzFile + * + * State of file and target stream on entry: + * bzFile (bzfp) assumed open, + * target (sink) usually NULL (will call stream stack). + * + * Return values: + * -501 Stream stack failed (cannot present or convert). + * -1 Download cancelled. + * HT_NO_DATA Error before any data read. + * HT_PARTIAL_CONTENT Interruption or error after some data read. + * HT_LOADED Normal end of file indication on reading. + * + * State of file and target stream on return: + * always bzfp closed; target freed, aborted, or NULL. + */ +int HTParseBzFile(HTFormat rep_in, + HTFormat format_out, + HTParentAnchor *anchor, + BZFILE * bzfp, + HTStream *sink) +{ + HTStream *stream; + HTStreamClass targetClass; + int rv; + + stream = HTStreamStack(rep_in, format_out, sink, anchor); + + if (!stream) { + char *buffer = 0; + + HTCloseBzFile(bzfp); + if (LYCancelDownload) { + LYCancelDownload = FALSE; + return -1; + } + HTSprintf0(&buffer, CANNOT_CONVERT_I_TO_O, + HTAtom_name(rep_in), HTAtom_name(format_out)); + CTRACE((tfp, "HTFormat(in HTParseBzFile): %s\n", buffer)); + rv = HTLoadError(sink, 501, buffer); + FREE(buffer); + return rv; + } + + /* + * Push the data down the stream + * + * @@ Bug: This decision ought to be made based on "encoding" rather than + * on content-type. @@@ When we handle encoding. The current method + * smells anyway. + */ + targetClass = *(stream->isa); /* Copy pointers to procedures */ + rv = HTBzFileCopy(bzfp, stream); + if (rv == -1 || rv == HT_INTERRUPTED) { + (*targetClass._abort) (stream, NULL); + } else { + (*targetClass._free) (stream); + } + + HTCloseBzFile(bzfp); + if (rv == -1) + return HT_NO_DATA; + else if (rv == HT_INTERRUPTED || (rv > 0 && rv != HT_LOADED)) + return HT_PARTIAL_CONTENT; + else + return HT_LOADED; +} +#endif /* USE_BZLIB */ + +/* Converter stream: Network Telnet to internal character text + * ----------------------------------------------------------- + * + * The input is assumed to be in ASCII, with lines delimited + * by (13,10) pairs, These pairs are converted into (CR,LF) + * pairs in the local representation. The (CR,LF) sequence + * when found is changed to a '\n' character, the internal + * C representation of a new line. + */ + +static void NetToText_put_character(HTStream *me, int net_char) +{ + char c = (char) FROMASCII(net_char); + + if (me->had_cr) { + if (c == LF) { + me->sink->isa->put_character(me->sink, '\n'); /* Newline */ + me->had_cr = NO; + return; + } else { + me->sink->isa->put_character(me->sink, CR); /* leftover */ + } + } + me->had_cr = (BOOL) (c == CR); + if (!me->had_cr) + me->sink->isa->put_character(me->sink, c); /* normal */ +} + +static void NetToText_put_string(HTStream *me, const char *s) +{ + const char *p; + + for (p = s; *p; p++) + NetToText_put_character(me, *p); +} + +static void NetToText_put_block(HTStream *me, const char *s, int l) +{ + const char *p; + + for (p = s; p < (s + l); p++) + NetToText_put_character(me, *p); +} + +static void NetToText_free(HTStream *me) +{ + (me->sink->isa->_free) (me->sink); /* Close rest of pipe */ + FREE(me); +} + +static void NetToText_abort(HTStream *me, HTError e) +{ + me->sink->isa->_abort(me->sink, e); /* Abort rest of pipe */ + FREE(me); +} + +/* The class structure +*/ +static HTStreamClass NetToTextClass = +{ + "NetToText", + NetToText_free, + NetToText_abort, + NetToText_put_character, + NetToText_put_string, + NetToText_put_block +}; + +/* The creation method +*/ +HTStream *HTNetToText(HTStream *sink) +{ + HTStream *me = typecalloc(HTStream); + + if (me == NULL) + outofmem(__FILE__, "NetToText"); + + assert(me != NULL); + + me->isa = &NetToTextClass; + + me->had_cr = NO; + me->sink = sink; + return me; +} + +static HTStream HTBaseStreamInstance; /* Made static */ + +/* + * ERROR STREAM + * ------------ + * There is only one error stream shared by anyone who wants a + * generic error returned from all stream methods. + */ +static void HTErrorStream_put_character(HTStream *me GCC_UNUSED, int c GCC_UNUSED) +{ + LYCancelDownload = TRUE; +} + +static void HTErrorStream_put_string(HTStream *me GCC_UNUSED, const char *s) +{ + if (s && *s) + LYCancelDownload = TRUE; +} + +static void HTErrorStream_write(HTStream *me GCC_UNUSED, const char *s, int l) +{ + if (l && s) + LYCancelDownload = TRUE; +} + +static void HTErrorStream_free(HTStream *me GCC_UNUSED) +{ + return; +} + +static void HTErrorStream_abort(HTStream *me GCC_UNUSED, HTError e GCC_UNUSED) +{ + return; +} + +static const HTStreamClass HTErrorStreamClass = +{ + "ErrorStream", + HTErrorStream_free, + HTErrorStream_abort, + HTErrorStream_put_character, + HTErrorStream_put_string, + HTErrorStream_write +}; + +HTStream *HTErrorStream(void) +{ + CTRACE((tfp, "ErrorStream. Created\n")); + HTBaseStreamInstance.isa = &HTErrorStreamClass; /* The rest is random */ + return &HTBaseStreamInstance; +} diff --git a/WWW/Library/Implementation/HTFormat.h b/WWW/Library/Implementation/HTFormat.h new file mode 100644 index 00000000..1e82fd76 --- /dev/null +++ b/WWW/Library/Implementation/HTFormat.h @@ -0,0 +1,548 @@ +/* + * $LynxId: HTFormat.h,v 1.33 2010/10/27 00:13:53 tom Exp $ + * + * HTFormat: The format manager in the WWW Library + * MANAGE DIFFERENT DOCUMENT FORMATS + * + * Here we describe the functions of the HTFormat module which handles conversion between + * different data representations. (In MIME parlance, a representation is known as a + * content-type. In WWW the term "format" is often used as it is shorter). + * + * This module is implemented by HTFormat.c. This hypertext document is used to generate + * the HTFormat.h include file. Part of the WWW library. + */ +#ifndef HTFORMAT_H +#define HTFORMAT_H + +#include <HTStream.h> +#include <HTAtom.h> +#include <HTList.h> +#include <HTAnchor.h> + +#ifdef USE_SOURCE_CACHE +#include <HTChunk.h> +#endif + +#ifdef USE_BZLIB +#include <bzlib.h> +#endif + +#ifdef USE_ZLIB +#include <zlib.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif +/* + + These macros (which used to be constants) define some basic internally + referenced representations. The www/xxx ones are of course not MIME + standard. + + www/source is an output format which leaves the input untouched. It is + useful for diagnostics, and for users who want to see the original, whatever + it is. + + */ +/* Internal ones */ +/* #define WWW_SOURCE HTAtom_for("www/source") */ +/* Whatever it was originally */ + extern HTAtom *WWW_SOURCE; + /* calculated once, heavy used */ + +/* + + www/present represents the user's perception of the document. If you + convert to www/present, you present the material to the user. + + */ +#define WWW_PRESENT HTAtom_for("www/present") /* The user's perception */ + +#define WWW_DEBUG HTAtom_for("www/debug") +/* + + WWW_DEBUG represents the user's perception of debug information, for example + sent as a HTML document in a HTTP redirection message. + + */ + +/* + + The message/rfc822 format means a MIME message or a plain text message with + no MIME header. This is what is returned by an HTTP server. + + */ +#define WWW_MIME HTAtom_for("www/mime") /* A MIME message */ + +/* + For parsing only the header. - kw + */ +#define WWW_MIME_HEAD HTAtom_for("message/x-rfc822-head") + +/* + + www/print is like www/present except it represents a printed copy. + + */ +#define WWW_PRINT HTAtom_for("www/print") /* A printed copy */ + +/* + + www/unknown is a really unknown type. Some default action is appropriate. + + */ +#define WWW_UNKNOWN HTAtom_for("www/unknown") + +#ifdef DIRED_SUPPORT +/* + www/dired signals directory edit mode. +*/ +#define WWW_DIRED HTAtom_for("www/dired") +#endif + +/* + + These are regular MIME types. HTML is assumed to be added by the W3 code. + application/octet-stream was mistakenly application/binary in earlier libwww + versions (pre 2.11). + + */ +#define WWW_PLAINTEXT HTAtom_for("text/plain") +#define WWW_POSTSCRIPT HTAtom_for("application/postscript") +#define WWW_RICHTEXT HTAtom_for("application/rtf") +#define WWW_AUDIO HTAtom_for("audio/basic") +#define WWW_HTML HTAtom_for("text/html") +#define WWW_BINARY HTAtom_for("application/octet-stream") + + typedef HTAtom *HTEncoding; + +/* + * The following are values for the MIME types: + */ +#define WWW_ENC_7BIT HTAtom_for("7bit") +#define WWW_ENC_8BIT HTAtom_for("8bit") +#define WWW_ENC_BINARY HTAtom_for("binary") + +/* + * We also add + */ +#define WWW_ENC_COMPRESS HTAtom_for("compress") + +/* + * Does a string designate a real encoding, or is it just + * a "dummy" as for example 7bit, 8bit, and binary? + */ +#define IsUnityEncStr(senc) \ + ((senc)==NULL || *(senc)=='\0' || !strcmp(senc,"identity") ||\ + !strcmp(senc,"8bit") || !strcmp(senc,"binary") || !strcmp(senc,"7bit")) + +#define IsUnityEnc(enc) \ + ((enc)==NULL || (enc)==HTAtom_for("identity") ||\ + (enc)==WWW_ENC_8BIT || (enc)==WWW_ENC_BINARY || (enc)==WWW_ENC_7BIT) + +/* + +The HTPresentation and HTConverter types + + This HTPresentation structure represents a possible conversion algorithm + from one format to another. It includes a pointer to a conversion routine. + The conversion routine returns a stream to which data should be fed. See + also HTStreamStack which scans the list of registered converters and calls + one. See the initialisation module for a list of conversion routines. + + */ + typedef struct _HTPresentation HTPresentation; + + typedef HTStream *HTConverter (HTPresentation *pres, + HTParentAnchor *anchor, + HTStream *sink); + + struct _HTPresentation { + HTAtom *rep; /* representation name atomized */ + HTAtom *rep_out; /* resulting representation */ + HTConverter *converter; /* routine to gen the stream stack */ + char *command; /* MIME-format command string */ + char *testcommand; /* MIME-format test string */ + float quality; /* Between 0 (bad) and 1 (good) */ + float secs; + float secs_per_byte; + off_t maxbytes; + BOOL get_accept; /* list in "Accept:" for GET */ + int accept_opt; /* matches against LYAcceptMedia */ + }; + +/* + + The list of presentations is kept by this module. It is also scanned by + modules which want to know the set of formats supported. for example. + + */ + extern HTList *HTPresentations; + +/* + + The default presentation is used when no other is appropriate + + */ + extern HTPresentation *default_presentation; + +/* + * Options used for "Accept:" string + */ + typedef enum { + /* make the components powers of two so we can add them */ + mediaINT = 1 /* internal types predefined in HTInit.c */ + ,mediaEXT = 2 /* external types predefined in HTInit.c */ + ,mediaCFG = 4 /* types, e.g., viewers, from lynx.cfg */ + ,mediaUSR = 8 /* user's mime-types, etc. */ + ,mediaSYS = 16 /* system's mime-types, etc. */ + /* these are useful flavors for the options menu */ + ,mediaOpt1 = mediaINT + ,mediaOpt2 = mediaINT + mediaCFG + ,mediaOpt3 = mediaINT + mediaCFG + mediaUSR + ,mediaOpt4 = mediaINT + mediaCFG + mediaUSR + mediaSYS + /* this is the flavor from pre-2.8.6 */ + ,mediaALL = mediaINT + mediaEXT + mediaCFG + mediaUSR + mediaSYS + } AcceptMedia; + +/* + * Options used for "Accept-Encoding:" string + */ + typedef enum { + encodingNONE = 0 + ,encodingGZIP = 1 + ,encodingDEFLATE = 2 + ,encodingCOMPRESS = 4 + ,encodingBZIP2 = 8 + ,encodingALL = (encodingGZIP + + encodingDEFLATE + + encodingCOMPRESS + + encodingBZIP2) + } AcceptEncoding; + +/* + +HTSetPresentation: Register a system command to present a format + + ON ENTRY, + + rep is the MIME - style format name + + command is the MAILCAP - style command template + + testcommand is the MAILCAP - style testcommand template + + quality A degradation faction 0..1.0 + + secs A limit on the time user will wait (0.0 for infinity) + secs_per_byte + + maxbytes A limit on the length acceptable as input (0 infinite) + + media Used in filtering presentation types for "Accept:" + + */ + extern void HTSetPresentation(const char *representation, + const char *command, + const char *testcommand, + double quality, + double secs, + double secs_per_byte, + long int maxbytes, + AcceptMedia media + ); + +/* + +HTSetConversion: Register a converstion routine + + ON ENTRY, + + rep_in is the content-type input + + rep_out is the resulting content-type + + converter is the routine to make the stream to do it + + */ + + extern void HTSetConversion(const char *rep_in, + const char *rep_out, + HTConverter *converter, + double quality, + double secs, + double secs_per_byte, + long int maxbytes, + AcceptMedia media + ); + +/* + +HTStreamStack: Create a stack of streams + + This is the routine which actually sets up the conversion. It currently + checks only for direct conversions, but multi-stage conversions are forseen. + It takes a stream into which the output should be sent in the final format, + builds the conversion stack, and returns a stream into which the data in the + input format should be fed. The anchor is passed because hypertxet objects + load information into the anchor object which represents them. + + */ + extern HTStream *HTStreamStack(HTFormat format_in, + HTFormat format_out, + HTStream *stream_out, + HTParentAnchor *anchor); + +/* +HTReorderPresentation: put presentation near head of list + + Look up a presentation (exact match only) and, if found, reorder it to the + start of the HTPresentations list. - kw + */ + + extern void HTReorderPresentation(HTFormat format_in, + HTFormat format_out); + +/* + * Setup 'get_accept' flag to denote presentations that are not redundant, + * and will be listed in "Accept:" header. + */ + extern void HTFilterPresentations(void); + +/* + +HTStackValue: Find the cost of a filter stack + + Must return the cost of the same stack which HTStreamStack would set up. + + ON ENTRY, + + format_in The fomat of the data to be converted + + format_out The format required + + initial_value The intrinsic "value" of the data before conversion on a scale + from 0 to 1 + + length The number of bytes expected in the input format + + */ + extern float HTStackValue(HTFormat format_in, + HTFormat rep_out, + double initial_value, + long int length); + +#define NO_VALUE_FOUND -1e20 /* returned if none found */ + +/* Display the page while transfer in progress + * ------------------------------------------- + * + * Repaint the page only when necessary. + * This is a traverse call for HText_pageDispaly() - it works!. + * + */ + extern void HTDisplayPartial(void); + + extern void HTFinishDisplayPartial(void); + +/* + +HTCopy: Copy a socket to a stream + + This is used by the protocol engines to send data down a stream, typically + one which has been generated by HTStreamStack. + + */ + extern int HTCopy(HTParentAnchor *anchor, + int file_number, + void *handle, + HTStream *sink); + +/* + +HTFileCopy: Copy a file to a stream + + This is used by the protocol engines to send data down a stream, typically + one which has been generated by HTStreamStack. It is currently called by + HTParseFile + + */ + extern int HTFileCopy(FILE *fp, + HTStream *sink); + +#ifdef USE_SOURCE_CACHE +/* + +HTMemCopy: Copy a memory chunk to a stream + + This is used by the protocol engines to send data down a stream, typically + one which has been generated by HTStreamStack. It is currently called by + HTParseMem + + */ + extern int HTMemCopy(HTChunk *chunk, + HTStream *sink); +#endif + +/* + +HTCopyNoCR: Copy a socket to a stream, stripping CR characters. + + It is slower than HTCopy . + + */ + + extern void HTCopyNoCR(HTParentAnchor *anchor, + int file_number, + HTStream *sink); + +/* + +Clear input buffer and set file number + + This routine and the one below provide simple character input from sockets. + (They are left over from the older architecture and may not be used very + much.) The existence of a common routine and buffer saves memory space in + small implementations. + + */ + extern void HTInitInput(int file_number); + +/* + +Get next character from buffer + + */ + extern int interrupted_in_htgetcharacter; + extern int HTGetCharacter(void); + +/* + +HTParseSocket: Parse a socket given its format + + This routine is called by protocol modules to load an object. uses + HTStreamStack and the copy routines above. Returns HT_LOADED if successful, + <0 if not. + + */ + extern int HTParseSocket(HTFormat format_in, + HTFormat format_out, + HTParentAnchor *anchor, + int file_number, + HTStream *sink); + +/* + +HTParseFile: Parse a File through a file pointer + + This routine is called by protocols modules to load an object. uses + HTStreamStack and HTFileCopy. Returns HT_LOADED if successful, can also + return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure. + + */ + extern int HTParseFile(HTFormat format_in, + HTFormat format_out, + HTParentAnchor *anchor, + FILE *fp, + HTStream *sink); + +#ifdef USE_SOURCE_CACHE +/* + +HTParseMem: Parse a document in memory + + This routine is called by protocols modules to load an object. uses + HTStreamStack and HTMemCopy. Returns HT_LOADED if successful, can also + return <0 for failure. + + */ + extern int HTParseMem(HTFormat format_in, + HTFormat format_out, + HTParentAnchor *anchor, + HTChunk *chunk, + HTStream *sink); +#endif + +#ifdef USE_ZLIB +/* +HTParseGzFile: Parse a gzip'ed File through a file pointer + + This routine is called by protocols modules to load an object. uses + HTStreamStack and HTGzFileCopy. Returns HT_LOADED if successful, can also + return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure. + */ + extern int HTParseGzFile(HTFormat format_in, + HTFormat format_out, + HTParentAnchor *anchor, + gzFile gzfp, + HTStream *sink); + +/* +HTParseZzFile: Parse a deflate'd File through a file pointer + + This routine is called by protocols modules to load an object. uses + HTStreamStack and HTZzFileCopy. Returns HT_LOADED if successful, can also + return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure. + */ + extern int HTParseZzFile(HTFormat format_in, + HTFormat format_out, + HTParentAnchor *anchor, + FILE *zzfp, + HTStream *sink); + +#endif /* USE_ZLIB */ + +#ifdef USE_BZLIB +/* +HTParseBzFile: Parse a bzip2'ed File through a file pointer + + This routine is called by protocols modules to load an object. uses + HTStreamStack and HTGzFileCopy. Returns HT_LOADED if successful, can also + return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure. + */ + extern int HTParseBzFile(HTFormat format_in, + HTFormat format_out, + HTParentAnchor *anchor, + BZFILE * bzfp, + HTStream *sink); + +#endif /* USE_BZLIB */ + +/* + +HTNetToText: Convert Net ASCII to local representation + + This is a filter stream suitable for taking text from a socket and passing + it into a stream which expects text in the local C representation. It does + ASCII and newline conversion. As usual, pass its output stream to it when + creating it. + + */ + extern HTStream *HTNetToText(HTStream *sink); + +/* + +HTFormatInit: Set up default presentations and conversions + + These are defined in HTInit.c or HTSInit.c if these have been replaced. If + you don't call this routine, and you don't define any presentations, then + this routine will automatically be called the first time a conversion is + needed. However, if you explicitly add some conversions (eg using + HTLoadRules) then you may want also to explicitly call this to get the + defaults as well. + + */ + extern void HTFormatInit(void); + +/* + +Epilogue + + */ + extern BOOL HTOutputSource; /* Flag: shortcut parser */ + +#ifdef __cplusplus +} +#endif +#endif /* HTFORMAT_H */ diff --git a/WWW/Library/Implementation/HTGopher.c b/WWW/Library/Implementation/HTGopher.c new file mode 100644 index 00000000..885943e6 --- /dev/null +++ b/WWW/Library/Implementation/HTGopher.c @@ -0,0 +1,1973 @@ +/* + * $LynxId: HTGopher.c,v 1.55 2011/06/11 13:06:08 tom Exp $ + * + * GOPHER ACCESS HTGopher.c + * ============= + * + * History: + * 26 Sep 90 Adapted from other accesses (News, HTTP) TBL + * 29 Nov 91 Downgraded to C, for portable implementation. + * 10 Mar 96 Foteos Macrides (macrides@sci.wfbr.edu). Added a + * form-based CSO/PH gateway. Can be invoked via a + * "cso://host[:port]/" or "gopher://host:105/2" + * URL. If a gopher URL is used with a query token + * ('?'), the old ISINDEX procedure will be used + * instead of the form-based gateway. + * 15 Mar 96 Foteos Macrides (macrides@sci.wfbr.edu). Pass + * port 79, gtype 0 gopher URLs to the finger + * gateway. + */ + +#define HTSTREAM_INTERNAL 1 + +#include <HTUtils.h> /* Coding convention macros */ + +#ifndef DISABLE_GOPHER +#include <HTAlert.h> +#include <HTParse.h> +#include <HTTCP.h> +#include <HTFinger.h> + +/* + * Implements. + */ +#include <HTGopher.h> + +#define GOPHER_PORT 70 /* See protocol spec */ +#define CSO_PORT 105 /* See protocol spec */ +#define BIG 1024 /* Bug */ +#define LINE_LENGTH 256 /* Bug */ + +/* + * Gopher entity types. + */ +#define GOPHER_TEXT '0' +#define GOPHER_MENU '1' +#define GOPHER_CSO '2' +#define GOPHER_ERROR '3' +#define GOPHER_MACBINHEX '4' +#define GOPHER_PCBINARY '5' +#define GOPHER_UUENCODED '6' +#define GOPHER_INDEX '7' +#define GOPHER_TELNET '8' +#define GOPHER_BINARY '9' +#define GOPHER_GIF 'g' +#define GOPHER_HTML 'h' /* HTML */ +#define GOPHER_CHTML 'H' /* HTML */ +#define GOPHER_SOUND 's' +#define GOPHER_WWW 'w' /* W3 address */ +#define GOPHER_IMAGE 'I' +#define GOPHER_TN3270 'T' +#define GOPHER_INFO 'i' +#define GOPHER_DUPLICATE '+' +#define GOPHER_PLUS_IMAGE ':' /* Addition from Gopher Plus */ +#define GOPHER_PLUS_MOVIE ';' +#define GOPHER_PLUS_SOUND '<' +#define GOPHER_PLUS_PDF 'P' + +#include <HTFormat.h> + +/* + * Hypertext object building machinery. + */ +#include <HTML.h> + +#include <LYStrings.h> +#include <LYUtils.h> +#include <LYLeaks.h> + +#define PUTC(c) (*targetClass.put_character)(target, c) +#define PUTS(s) (*targetClass.put_string)(target, s) +#define START(e) (*targetClass.start_element)(target, e, 0, 0, -1, 0) +#define END(e) (*targetClass.end_element)(target, e, 0) +#define FREE_TARGET (*targetClass._free)(target) + +#define NEXT_CHAR HTGetCharacter() + +/* + * Module-wide variables. + */ +static int s; /* Socket for gopher or CSO host */ + +struct _HTStructured { + const HTStructuredClass *isa; /* For gopher streams */ + /* ... */ +}; + +static HTStructured *target; /* the new gopher hypertext */ +static HTStructuredClass targetClass; /* Its action routines */ + +struct _HTStream { + HTStreamClass *isa; /* For form-based CSO gateway - FM */ +}; + +typedef struct _CSOfield_info { /* For form-based CSO gateway - FM */ + struct _CSOfield_info *next; + char *name; + char *attributes; + char *description; + int id; + int lookup; + int indexed; + int url; + int max_size; + int defreturn; + int explicit_return; + int reserved; + int gpublic; + char name_buf[16]; /* Avoid malloc if we can */ + char desc_buf[32]; /* Avoid malloc if we can */ + char attr_buf[80]; /* Avoid malloc if we can */ +} CSOfield_info; + +static CSOfield_info *CSOfields = NULL; /* For form-based CSO gateway - FM */ + +typedef struct _CSOformgen_context { /* For form-based CSO gateway - FM */ + const char *host; + const char *seek; + CSOfield_info *fld; + int port; + int cur_line; + int cur_off; + int rep_line; + int rep_off; + int public_override; + int field_select; +} CSOformgen_context; + +/* Matrix of allowed characters in filenames + * ========================================= + */ +static BOOL acceptable[256]; +static BOOL acceptable_inited = NO; + +static void init_acceptable(void) +{ + unsigned int i; + const char *good = + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./-_$"; + + for (i = 0; i < 256; i++) + acceptable[i] = NO; + for (; *good; good++) + acceptable[(unsigned int) *good] = YES; + acceptable_inited = YES; +} + +/* Decode one hex character + * ======================== + */ +static const char hex[17] = "0123456789abcdef"; + +static char from_hex(int c) +{ + return (char) ((c >= '0') && (c <= '9') ? c - '0' + : (c >= 'A') && (c <= 'F') ? c - 'A' + 10 + : (c >= 'a') && (c <= 'f') ? c - 'a' + 10 + : 0); +} + +/* Paste in an Anchor + * ================== + * + * The title of the destination is set, as there is no way + * of knowing what the title is when we arrive. + * + * On entry, + * HT is in append mode. + * text points to the text to be put into the file, 0 terminated. + * addr points to the hypertext refernce address 0 terminated. + */ +BOOLEAN HT_Is_Gopher_URL = FALSE; + +static void write_anchor(const char *text, const char *addr) +{ + BOOL present[HTML_A_ATTRIBUTES]; + const char *value[HTML_A_ATTRIBUTES]; + + int i; + + for (i = 0; i < HTML_A_ATTRIBUTES; i++) + present[i] = 0; + present[HTML_A_HREF] = YES; + ((const char **) value)[HTML_A_HREF] = addr; + present[HTML_A_TITLE] = YES; + ((const char **) value)[HTML_A_TITLE] = text; + + CTRACE((tfp, "HTGopher: adding URL: %s\n", addr)); + + HT_Is_Gopher_URL = TRUE; /* tell HTML.c that this is a Gopher URL */ + (*targetClass.start_element) (target, HTML_A, present, + (const char **) value, -1, 0); + + PUTS(text); + END(HTML_A); +} + +/* Parse a Gopher Menu document + * ============================ + */ +static void parse_menu(const char *arg GCC_UNUSED, + HTParentAnchor *anAnchor) +{ + char gtype; + int ich; + char line[BIG]; + char *name = NULL, *selector = NULL; /* Gopher menu fields */ + char *host = NULL; + char *port; + char *p = line; + const char *title; + int bytes = 0; + int BytesReported = 0; + char buffer[128]; + +#define TAB '\t' +#define HEX_ESCAPE '%' + + START(HTML_HTML); + PUTC('\n'); + START(HTML_HEAD); + PUTC('\n'); + START(HTML_TITLE); + if ((title = HTAnchor_title(anAnchor))) + PUTS(title); + else + PUTS(GOPHER_MENU_TITLE); + END(HTML_TITLE); + PUTC('\n'); + END(HTML_HEAD); + PUTC('\n'); + + START(HTML_BODY); + PUTC('\n'); + START(HTML_H1); + if ((title = HTAnchor_title(anAnchor))) + PUTS(title); + else + PUTS(GOPHER_MENU_TITLE); + END(HTML_H1); + PUTC('\n'); + START(HTML_PRE); + PUTC('\n'); /* newline after HTML_PRE forces split-line */ + while ((ich = NEXT_CHAR) != EOF) { + + if (interrupted_in_htgetcharacter) { + CTRACE((tfp, + "HTGopher: Interrupted in HTGetCharacter, apparently.\n")); + goto end_html; + } + + if ((char) ich != LF) { + *p = (char) ich; /* Put character in line */ + if (p < &line[BIG - 1]) + p++; + + } else { + *p++ = '\0'; /* Terminate line */ + bytes += (int) (p - line); /* add size */ + p = line; /* Scan it to parse it */ + port = 0; /* Flag "not parsed" */ + CTRACE((tfp, "HTGopher: Menu item: %s\n", line)); + gtype = *p++; + + if (bytes > BytesReported + 1024) { + sprintf(buffer, TRANSFERRED_X_BYTES, bytes); + HTProgress(buffer); + BytesReported = bytes; + } + + /* Break on line with a dot by itself */ + if ((gtype == '.') && ((*p == '\r') || (*p == 0))) + break; + + if (gtype && *p) { + name = p; + selector = strchr(name, TAB); + if (selector) { + *selector++ = '\0'; /* Terminate name */ + /* + * Gopher+ Type=0+ objects can be binary, and will + * have 9 or 5 beginning their selector. Make sure + * we don't trash the terminal by treating them as + * text. - FM + */ + if (gtype == GOPHER_TEXT && (*selector == GOPHER_BINARY || + *selector == GOPHER_PCBINARY)) + gtype = *selector; + host = strchr(selector, TAB); + if (host) { + *host++ = '\0'; /* Terminate selector */ + port = strchr(host, TAB); + if (port) { + char *junk; + + port[0] = ':'; /* delimit host a la W3 */ + junk = strchr(port, TAB); + if (junk) + *junk = '\0'; /* Chop port */ + if ((port[1] == '0') && (!port[2])) + port[0] = '\0'; /* 0 means none */ + } /* no port */ + } /* host ok */ + } /* selector ok */ + } + /* gtype and name ok */ + /* Nameless files are a separator line */ + if (name != NULL && gtype == GOPHER_TEXT) { + int i = (int) strlen(name) - 1; + + while (name[i] == ' ' && i >= 0) + name[i--] = '\0'; + if (i < 0) + gtype = GOPHER_INFO; + } + + if (gtype == GOPHER_WWW) { /* Gopher pointer to W3 */ + PUTS("(HTML) "); + write_anchor(name, selector); + + } else if (gtype == GOPHER_INFO) { + /* Information or separator line */ + PUTS(" "); + PUTS(name); + + } else if (port) { /* Other types need port */ + char *address = 0; + const char *format = *selector ? "%s//%s@%s/" : "%s//%s/"; + + if (gtype == GOPHER_TELNET) { + PUTS(" (TEL) "); + HTSprintf0(&address, format, STR_TELNET_URL, selector, host); + } else if (gtype == GOPHER_TN3270) { + PUTS("(3270) "); + HTSprintf0(&address, format, STR_TN3270_URL, selector, host); + } else { /* If parsed ok */ + char *r; + + switch (gtype) { + case GOPHER_TEXT: + PUTS("(FILE) "); + break; + case GOPHER_MENU: + PUTS(" (DIR) "); + break; + case GOPHER_CSO: + PUTS(" (CSO) "); + break; + case GOPHER_PCBINARY: + PUTS(" (BIN) "); + break; + case GOPHER_UUENCODED: + PUTS(" (UUE) "); + break; + case GOPHER_INDEX: + PUTS(" (?) "); + break; + case GOPHER_BINARY: + PUTS(" (BIN) "); + break; + case GOPHER_GIF: + case GOPHER_IMAGE: + case GOPHER_PLUS_IMAGE: + PUTS(" (IMG) "); + break; + case GOPHER_SOUND: + case GOPHER_PLUS_SOUND: + PUTS(" (SND) "); + break; + case GOPHER_MACBINHEX: + PUTS(" (HQX) "); + break; + case GOPHER_HTML: + case GOPHER_CHTML: + PUTS("(HTML) "); + break; + case 'm': + PUTS("(MIME) "); + break; + case GOPHER_PLUS_MOVIE: + PUTS(" (MOV) "); + break; + case GOPHER_PLUS_PDF: + PUTS(" (PDF) "); + break; + default: + PUTS("(UNKN) "); + break; + } + + HTSprintf0(&address, "//%s/%c", host, gtype); + + for (r = selector; *r; r++) { /* Encode selector string */ + if (acceptable[UCH(*r)]) { + HTSprintf(&address, "%c", *r); + } else { + HTSprintf(&address, "%c%c%c", + HEX_ESCAPE, /* Means hex coming */ + hex[(TOASCII(*r)) >> 4], + hex[(TOASCII(*r)) & 15]); + } + } + } + /* Error response from Gopher doesn't deserve to + be a hyperlink. */ + if (strcmp(address, "gopher://error.host:1/0")) + write_anchor(name, address); + else + PUTS(name); + FREE(address); + } else { /* parse error */ + CTRACE((tfp, "HTGopher: Bad menu item.\n")); + PUTS(line); + + } /* parse error */ + + PUTC('\n'); + p = line; /* Start again at beginning of line */ + + } /* if end of line */ + + } /* Loop over characters */ + + end_html: + END(HTML_PRE); + PUTC('\n'); + END(HTML_BODY); + PUTC('\n'); + END(HTML_HTML); + PUTC('\n'); + FREE_TARGET; + + return; +} + +/* Parse a Gopher CSO document from an ISINDEX query. + * ================================================== + * + * Accepts an open socket to a CSO server waiting to send us + * data and puts it on the screen in a reasonable manner. + * + * Perhaps this data can be automatically linked to some + * other source as well??? + * + * Taken from hacking by Lou Montulli@ukanaix.cc.ukans.edu + * on XMosaic-1.1, and put on libwww 2.11 by Arthur Secret, + * secret@dxcern.cern.ch . + */ +static void parse_cso(const char *arg, + HTParentAnchor *anAnchor) +{ + int ich; + char line[BIG]; + char *p = line; + char *first_colon, *second_colon, last_char = '\0'; + const char *title; + + START(HTML_HEAD); + PUTC('\n'); + START(HTML_TITLE); + if ((title = HTAnchor_title(anAnchor))) + PUTS(title); + else + PUTS(GOPHER_CSO_SEARCH_RESULTS); + END(HTML_TITLE); + PUTC('\n'); + END(HTML_HEAD); + PUTC('\n'); + START(HTML_H1); + if ((title = HTAnchor_title(anAnchor))) + PUTS(title); + else { + PUTS(arg); + PUTS(GOPHER_SEARCH_RESULTS); + } + END(HTML_H1); + PUTC('\n'); + START(HTML_PRE); + + /* + * Start grabbing chars from the network. + */ + while ((ich = NEXT_CHAR) != EOF) { + if ((char) ich != LF) { + *p = (char) ich; /* Put character in line */ + if (p < &line[BIG - 1]) + p++; + } else { + *p = '\0'; /* Terminate line */ + p = line; /* Scan it to parse it */ + /* + * OK we now have a line in 'p'. Lets parse it and print it. + */ + + /* + * Break on line that begins with a 2. It's the end of data. + */ + if (*p == '2') + break; + + /* + * Lines beginning with 5 are errors. Print them and quit. + */ + if (*p == '5') { + START(HTML_H2); + PUTS(p + 4); + END(HTML_H2); + break; + } + + if (*p == '-') { + /* + * Data lines look like -200:#: + * where # is the search result number and can be multiple + * digits (infinite?). + * Find the second colon and check the digit to the left of it + * to see if they are diferent. If they are then a different + * person is starting. Make this line an <h2>. + */ + + /* + * Find the second_colon. + */ + second_colon = NULL; + first_colon = strchr(p, ':'); + if (first_colon != NULL) { + second_colon = strchr(first_colon + 1, ':'); + } + + if (second_colon != NULL) { /* error check */ + + if (*(second_colon - 1) != last_char) + /* print seperator */ + { + END(HTML_PRE); + START(HTML_H2); + } + + /* + * Right now the record appears with the alias (first line) + * as the header and the rest as <pre> text. + * + * It might look better with the name as the header and the + * rest as a <ul> with <li> tags. I'm not sure whether the + * name field comes in any special order or if its even + * required in a record, so for now the first line is the + * header no matter what it is (it's almost always the + * alias). + * + * A <dl> with the first line as the <DT> and the rest as + * some form of <DD> might good also? + */ + + /* + * Print data. + */ + PUTS(second_colon + 1); + PUTC('\n'); + + if (*(second_colon - 1) != last_char) + /* end seperator */ + { + END(HTML_H2); + START(HTML_PRE); + } + + /* + * Save the char before the second colon for comparison on + * the next pass. + */ + last_char = *(second_colon - 1); + + } /* end if second_colon */ + } /* end if *p == '-' */ + } /* if end of line */ + + } /* Loop over characters */ + + /* end the text block */ + PUTC('\n'); + END(HTML_PRE); + PUTC('\n'); + FREE_TARGET; + + return; /* all done */ +} /* end of procedure */ + +/* Display a Gopher CSO ISINDEX cover page. + * ======================================== + */ +static void display_cso(const char *arg, + HTParentAnchor *anAnchor) +{ + const char *title; + + START(HTML_HEAD); + PUTC('\n'); + START(HTML_TITLE); + if ((title = HTAnchor_title(anAnchor))) + PUTS(title); + else + PUTS(GOPHER_CSO_INDEX); + END(HTML_TITLE); + PUTC('\n'); + START(HTML_ISINDEX); + PUTC('\n'); + END(HTML_HEAD); + PUTC('\n'); + START(HTML_H1); + if ((title = HTAnchor_title(anAnchor))) + PUTS(title); + else { + PUTS(arg); + PUTS(INDEX_SEGMENT); + } + END(HTML_H1); + PUTS(GOPHER_CSO_INDEX_SUBTITLE); + START(HTML_P); + PUTS(GOPHER_CSO_SOLICIT_KEYWORDS); + START(HTML_P); + PUTS(SEGMENT_KEYWORDS_WILL); + PUTS(SEGMENT_PERSONS_DB_NAME); + + if (!HTAnchor_title(anAnchor)) + HTAnchor_setTitle(anAnchor, arg); + + FREE_TARGET; + return; +} + +/* Display a Gopher Index document. + * ================================ + */ +static void display_index(const char *arg, + HTParentAnchor *anAnchor) +{ + const char *title; + + START(HTML_HEAD); + PUTC('\n'); + PUTC('\n'); + START(HTML_TITLE); + if ((title = HTAnchor_title(anAnchor))) + PUTS(title); + else + PUTS(GOPHER_INDEX_TITLE); + END(HTML_TITLE); + PUTC('\n'); + START(HTML_ISINDEX); + PUTC('\n'); + END(HTML_HEAD); + PUTC('\n'); + START(HTML_H1); + if ((title = HTAnchor_title(anAnchor))) + PUTS(title); + else { + PUTS(arg); + PUTS(INDEX_SEGMENT); + } + END(HTML_H1); + PUTS(GOPHER_INDEX_SUBTITLE); + START(HTML_P); + PUTS(GOPHER_SOLICIT_KEYWORDS); + + if (!HTAnchor_title(anAnchor)) + HTAnchor_setTitle(anAnchor, arg); + + FREE_TARGET; + return; +} + +/* De-escape a selector into a command. + * ==================================== + * + * The % hex escapes are converted. Otheriwse, the string is copied. + */ +static void de_escape(char *command, const char *selector) +{ + const char *p = selector; + char *q; + + if (command == NULL) + outofmem(__FILE__, "HTLoadGopher"); + + assert(command != NULL); + + q = command; + while (*p) { /* Decode hex */ + if (*p == HEX_ESCAPE) { + char c; + unsigned int b; + + p++; + c = *p++; + b = UCH(from_hex(c)); + c = *p++; + if (!c) + break; /* Odd number of chars! */ + *q++ = (char) FROMASCII((b << 4) + UCH(from_hex(c))); + } else { + *q++ = *p++; /* Record */ + } + } + *q = '\0'; /* Terminate command */ +} + +/* Free the CSOfields structures. - FM + * =================================== + */ +static void free_CSOfields(void) +{ + CSOfield_info *cur = CSOfields; + CSOfield_info *prev; + + while (cur) { + if (cur->name != cur->name_buf) + FREE(cur->name); + if (cur->attributes != cur->attr_buf) + FREE(cur->attributes); + if (cur->description != cur->desc_buf) + FREE(cur->description); + prev = cur; + cur = cur->next; + FREE(prev); + } + + return; +} + +/* Interpret CSO/PH form template keys. - FM + * ========================================= + */ +static void interpret_cso_key(const char *key, + char *buf, + int *length, + CSOformgen_context * ctx, + HTStream *Target) +{ + CSOfield_info *fld; + + if ((fld = ctx->fld) != 0) { + /* + * Most substitutions only recognized inside of loops. + */ + int error = 0; + + if (0 == StrNCmp(key, "$(FID)", 6)) { + sprintf(buf, "%d", fld->id); + } else if (0 == StrNCmp(key, "$(FDESC)", 8)) { + sprintf(buf, "%.2046s", fld->description); + } else if (0 == StrNCmp(key, "$(FDEF)", 7)) { + strcpy(buf, fld->defreturn ? " checked" : ""); + } else if (0 == StrNCmp(key, "$(FNDX)", 7)) { + strcpy(buf, fld->indexed ? "*" : ""); + } else if (0 == StrNCmp(key, "$(FSIZE)", 8)) { + sprintf(buf, " size=%d maxlength=%d", + fld->max_size > 55 ? 55 : fld->max_size, + fld->max_size); + } else if (0 == StrNCmp(key, "$(FSIZE2)", 9)) { + sprintf(buf, " maxlength=%d", fld->max_size); + } else { + error = 1; + } + if (!error) { + *length = (int) strlen(buf); + return; + } + } + buf[0] = '\0'; + if (0 == StrNCmp(key, "$(NEXTFLD)", 10)) { + if (!ctx->fld) + fld = CSOfields; + else + fld = ctx->fld->next; + switch (ctx->field_select) { + case 0: + /* + * 'Query' fields, public and lookup attributes. + */ + for (; fld; fld = fld->next) + if (fld->gpublic && (fld->lookup == 1)) + break; + break; + case 1: + /* + * 'Query' fields, accept lookup attribute. + */ + for (; fld; fld = fld->next) + if (fld->lookup == 1) + break; + break; + case 2: + /* + * 'Return' fields, public only. + */ + for (; fld; fld = fld->next) + if (fld->gpublic) + break; + break; + case 3: + /* + * All fields. + */ + break; + } + if (fld) { + ctx->cur_line = ctx->rep_line; + ctx->cur_off = ctx->rep_off; + } + ctx->fld = fld; + + } else if ((0 == StrNCmp(key, "$(QFIELDS)", 10)) || + (0 == StrNCmp(key, "$(RFIELDS)", 10))) { + /* + * Begin iteration sequence. + */ + ctx->rep_line = ctx->cur_line; + ctx->rep_off = ctx->cur_off; + ctx->fld = (CSOfield_info *) 0; + ctx->seek = "$(NEXTFLD)"; + ctx->field_select = (key[2] == 'Q') ? 0 : 2; + if (ctx->public_override) + ctx->field_select++; + + } else if (0 == StrNCmp(key, "$(NAMEFLD)", 10)) { + /* + * Special, locate name field. Flag lookup so QFIELDS will skip it. + */ + for (fld = CSOfields; fld; fld = fld->next) + if (strcmp(fld->name, "name") == 0 || + strcmp(fld->name, "Name") == 0) { + if (fld->lookup) + fld->lookup = 2; + break; + } + ctx->fld = fld; + } else if (0 == StrNCmp(key, "$(HOST)", 7)) { + strcpy(buf, ctx->host); + } else if (0 == StrNCmp(key, "$(PORT)", 7)) { + sprintf(buf, "%d", ctx->port); + } else { + /* + * No match, dump key to buffer so client sees it for debugging. + */ + size_t out = 0; + + while (*key && (*key != ')')) { + buf[out++] = (*key++); + if (out > sizeof(buf) - 2) { + buf[out] = '\0'; + (*Target->isa->put_block) (Target, buf, (int) strlen(buf)); + out = 0; + } + } + buf[out++] = ')'; + buf[out] = '\0'; + } + *length = (int) strlen(buf); + return; +} + +/* Parse the elements in a CSO/PH fields structure. - FM + * ===================================================== + */ +static int parse_cso_field_info(CSOfield_info *blk) +{ + char *info, *max_spec; + + /* + * Initialize all fields to default values. + */ + blk->indexed = blk->lookup = blk->reserved = blk->max_size = blk->url = 0; + blk->defreturn = blk->explicit_return = blk->gpublic = 0; + + /* + * Search for keywords in info string and set values. Attributes are + * converted to all lower-case for comparison. + */ + info = blk->attributes; + LYLowerCase(info); + if (strstr(info, "indexed ")) + blk->indexed = 1; + if (strstr(info, "default ")) + blk->defreturn = 1; + if (strstr(info, "public ")) + blk->gpublic = 1; + if (strstr(info, "lookup ")) + blk->lookup = 1; + if (strstr(info, "url ")) { + blk->url = 1; + blk->defreturn = 1; + } + max_spec = strstr(info, "max "); + if (max_spec) { + sscanf(&max_spec[4], "%d", &blk->max_size); + } else { + blk->max_size = 32; + } + + return 0; +} + +/* Parse a reply from a CSO/PH fields request. - FM + * ================================================ + */ +static int parse_cso_fields(char *buf, + size_t size) +{ + int ich; + char *p = buf; + int i, code = 0, prev_code; + size_t alen; + char *indx, *name; + CSOfield_info *last, *newf; + + last = CSOfields = (CSOfield_info *) 0; + prev_code = -2555; + buf[0] = '\0'; + + /* + * Start grabbing chars from the network. + */ + while ((ich = NEXT_CHAR) != EOF) { + if (interrupted_in_htgetcharacter) { + CTRACE((tfp, + "HTLoadCSO: Interrupted in HTGetCharacter, apparently.\n")); + free_CSOfields(); + buf[0] = '\0'; + return HT_INTERRUPTED; + } + + if ((char) ich != LF) { + *p = (char) ich; /* Put character in buffer */ + if (p < &buf[size - 1]) { + p++; + } + } else { + *p = '\0'; /* Terminate line */ + p = buf; /* Scan it to parse it */ + + /* OK we now have a line in 'p' lets parse it. + */ + + /* + * Break on line that begins with a 2. It's the end of data. + */ + if (*p == '2') + break; + + /* + * Lines beginning with 5 are errors. Print them and quit. + */ + if (*p == '5') { + strcpy(buf, p); + return 5; + } + + if (*p == '-') { + /* + * Data lines look like -200:#: + * where # is the search result number and can be multiple + * digits (infinite?). + */ + + /* + * Check status, ignore any non-success. + */ + if (p[1] != '2') + continue; + + /* + * Parse fields within returned line into status, ndx, name, + * data. + */ + indx = NULL; + name = NULL; + for (i = 0; p[i]; i++) { + if (p[i] == ':') { + p[i] = '\0'; + if (!indx) { + indx = (char *) &p[i + 1]; + code = atoi(indx); + } else if (!name) { + name = (char *) &p[i + 1]; + } else { + i++; + break; + } + } + } + /* + * Add data to field structure. + */ + if (name) { + if (code == prev_code) { + /* + * Remaining data are description. Save in current + * info block. + */ + if (last != NULL) { + alen = strlen((char *) &p[i]) + 1; + if (alen > sizeof(last->desc_buf)) { + if (last->description != last->desc_buf) + FREE(last->description); + if (!(last->description = (char *) malloc(alen))) { + outofmem(__FILE__, "HTLoadCSO"); + } + } + strcpy(last->description, (char *) &p[i]); + } + } else { + /* + * Initialize new block, append to end of list to + * preserve order. + */ + newf = typecalloc(CSOfield_info); + + if (!newf) { + outofmem(__FILE__, "HTLoadCSO"); + } + + assert(newf != NULL); + + if (last) + last->next = newf; + else + CSOfields = newf; + last = newf; + + newf->next = (CSOfield_info *) 0; + newf->name = newf->name_buf; + alen = strlen(name) + 1; + if (alen > sizeof(newf->name_buf)) { + if (!(newf->name = (char *) malloc(alen))) { + outofmem(__FILE__, "HTLoadCSO"); + } + } + strcpy(newf->name, name); + + newf->attributes = newf->attr_buf; + alen = strlen((char *) &p[i]) + 2; + if (alen > sizeof(newf->attr_buf)) { + if (!(newf->attributes = (char *) malloc(alen))) { + outofmem(__FILE__, "HTLoadCSO"); + } + } + strcpy(newf->attributes, (char *) &p[i]); + strcpy((char *) &newf->attributes[alen - 2], " "); + newf->description = newf->desc_buf; + newf->desc_buf[0] = '\0'; + newf->id = atoi(indx); + /* + * Scan for keywords. + */ + parse_cso_field_info(newf); + } + prev_code = code; + } else + break; + } /* end if *p == '-' */ + } /* if end of line */ + + } /* Loop over characters */ + + /* end the text block */ + + if (buf[0] == '\0') { + return -1; /* no response */ + } + buf[0] = '\0'; + return 0; /* all done */ +} /* end of procedure */ + +/* Generate a form for submitting CSO/PH searches. - FM + * ==================================================== + */ +static int generate_cso_form(char *host, + int port, + char *buf, + HTStream *Target) +{ + int i, j, length; + size_t out; + int full_flag = 1; + const char *key; + const char *line; + CSOformgen_context ctx; + static const char *ctemplate[] = + { + "<HTML>\n<HEAD>\n<TITLE>CSO/PH Query Form for $(HOST)</TITLE>\n</HEAD>\n<BODY>", + "<H2><I>CSO/PH Query Form</I> for <EM>$(HOST)</EM></H2>", + "To search the database for a name, fill in one or more of the fields", + "in the form below and activate the 'Submit query' button. At least", + "one of the entered fields must be flagged as indexed.", + "<HR><FORM method=\"POST\" action=\"cso://$(HOST)/\">", + "[ <input type=\"submit\" value=\"Submit query\"> | ", + "<input type=\"reset\" value=\"Clear fields\"> ]", + "<P><DL>", + " <DT>Search parameters (* indicates indexed field):", + " <DD>", + "$(NAMEFLD) <DL COMPACT>\n <DT><I>$(FDESC)</I>$(FNDX)", + " <DD>Last: <input name=\"q_$(FID)\" type=\"text\" size=49$(FSIZE2)>", + " <DD>First: <input name=\"q_$(FID)\" type=\"text\" size=48$(FSIZE2)>", + "$(QFIELDS) <DT><I>$(FDESC)</I>$(FNDX)", + " <DD><input name=\"q_$(FID)\" type=\"text\" $(FSIZE)>\n$(NEXTFLD)", + " </DL>", + " </DL>\n<P><DL>", + " <DT>Output format:", + " <DD>Returned data option: <select name=\"return\">", + " <option>default<option selected>all<option>selected</select><BR>", + "$(RFIELDS) <input type=\"checkbox\" name=\"r_$(FID)\"$(FDEF)> $(FDESC)<BR>", + "$(NEXTFLD) ", + " </DL></FORM><HR>\n</BODY>\n</HTML>", + (char *) 0 + }; + + memset(&ctx, 0, sizeof(ctx)); + ctx.host = host; + ctx.seek = (char *) 0; + ctx.port = port; + ctx.fld = (CSOfield_info *) 0; + ctx.public_override = full_flag; + /* + * Parse the strings in the template array to produce HTML document to send + * to client. First line is skipped for 'full' lists. + */ + out = 0; + buf[out] = '\0'; + for (i = full_flag ? /***1***/ 0 : 0; + ctemplate[i]; + i++) { + /* + * Search the current string for substitution, flagged by $( + */ + for (line = ctemplate[i], j = 0; line[j]; j++) { + if ((line[j] == '$') && (line[j + 1] == '(')) { + /* + * Command detected, flush output buffer and find closing ')' + * that delimits the command. + */ + buf[out] = '\0'; + if (out > 0) + (*Target->isa->put_block) (Target, buf, (int) strlen(buf)); + for (key = &line[j]; line[j + 1] && (line[j] != ')'); j++) { + ; + } + /* + * Save context, interpet command and restore updated context. + */ + ctx.cur_line = i; + ctx.cur_off = j; + interpret_cso_key(key, buf, &length, &ctx, Target); + i = ctx.cur_line; + j = ctx.cur_off; + line = ctemplate[i]; + out = (size_t) length; + + if (ctx.seek) { + /* + * Command wants us to skip (forward) to indicated token. + * Start at current position. + */ + size_t slen = strlen(ctx.seek); + + for (; ctemplate[i]; i++) { + for (line = ctemplate[i]; line[j]; j++) { + if (line[j] == '$') + if (0 == StrNCmp(ctx.seek, &line[j], slen)) { + if (j == 0) + j = (int) strlen(ctemplate[--i]) - 1; + else + --j; + line = ctemplate[i]; + ctx.seek = (char *) 0; + break; + } + } + if (!ctx.seek) + break; + j = 0; + } + if (ctx.seek) { + char *temp = 0; + + HTSprintf0(&temp, GOPHER_CSO_SEEK_FAILED, ctx.seek); + (*Target->isa->put_block) (Target, temp, (int) strlen(temp)); + FREE(temp); + } + } + } else { + /* + * Non-command text, add to output buffer. + */ + buf[out++] = line[j]; + if (out > (sizeof(buf) - 3)) { + buf[out] = '\0'; + (*Target->isa->put_block) (Target, buf, (int) strlen(buf)); + out = 0; + } + } + } + buf[out++] = '\n'; + buf[out] = '\0'; + } + if (out > 0) + (*Target->isa->put_block) (Target, buf, (int) strlen(buf)); + + return 0; +} + +/* Generate a results report for CSO/PH form-based searches. - FM + * ============================================================== + */ +static int generate_cso_report(HTStream *Target) +{ + int ich; + char line[BIG]; + char *buf = 0; + char *p = line, *href = NULL; + int len, i, prev_ndx, ndx; + char *rcode, *ndx_str, *fname, *fvalue, *l; + CSOfield_info *fld; + BOOL stop = FALSE; + + /* + * Read lines until non-negative status. + */ + prev_ndx = -100; + /* + * Start grabbing chars from the network. + */ + while (!stop && (ich = NEXT_CHAR) != EOF) { + if (interrupted_in_htgetcharacter) { + CTRACE((tfp, + "HTLoadCSO: Interrupted in HTGetCharacter, apparently.\n")); + _HTProgress(CONNECTION_INTERRUPTED); + goto end_CSOreport; + } + + if ((char) ich != LF) { + *p = (char) ich; /* Put character in line */ + if (p < &line[BIG - 1]) { + p++; + } + } else { + *p = '\0'; /* Terminate line */ + /* + * OK we now have a line. Load it as 'p' and parse it. + */ + p = line; + if (p[0] != '-' && p[0] != '1') { + stop = TRUE; + } + rcode = (p[0] == '-') ? &p[1] : p; + ndx_str = fname = NULL; + len = (int) strlen(p); + for (i = 0; i < len; i++) { + if (p[i] == ':') { + p[i] = '\0'; + if (!ndx_str) { + fname = ndx_str = &p[i + 1]; + } else { + fname = &p[i + 1]; + break; + } + } + } + if (ndx_str) { + ndx = atoi(ndx_str); + if (prev_ndx != ndx) { + if (prev_ndx != -100) { + HTSprintf0(&buf, "</DL></DL>\n"); + (*Target->isa->put_block) (Target, buf, (int) strlen(buf)); + } + if (ndx == 0) { + HTSprintf0(&buf, + "<HR><DL><DT>Information/status<DD><DL><DT>\n"); + (*Target->isa->put_block) (Target, buf, (int) strlen(buf)); + } else { + HTSprintf0(&buf, + "<HR><DL><DT>Entry %d:<DD><DL COMPACT><DT>\n", ndx); + (*Target->isa->put_block) (Target, buf, (int) strlen(buf)); + } + prev_ndx = ndx; + } + } else { + HTSprintf0(&buf, "<DD>%s\n", rcode); + (*Target->isa->put_block) (Target, buf, (int) strlen(buf)); + continue; + } + if ((*rcode >= '2') && (*rcode <= '5') && (fname != ndx_str)) { + while (*fname == ' ') { + fname++; /* trim leading spaces */ + } + for (fvalue = fname; *fvalue; fvalue++) { + if (*fvalue == ':') { + *fvalue++ = '\0'; + i = (int) strlen(fname) - 1; + while (i >= 0 && fname[i] == ' ') { + fname[i--] = '\0'; /* trim trailing */ + } + break; + } + } + if (fvalue) { + while (*fvalue == ' ') { + fvalue++; /* trim leading spaces */ + } + } + if (*fname) { + for (fld = CSOfields; fld; fld = fld->next) { + if (!strcmp(fld->name, fname)) { + if (fld->description) { + fname = fld->description; + } + break; + } + } + if (fld && fld->url) { + HTSprintf0(&buf, + "<DT><I>%s</I><DD><A HREF=\"%s\">%s</A>\n", + fname, fvalue, fvalue); + (*Target->isa->put_block) (Target, buf, (int) strlen(buf)); + } else { + HTSprintf0(&buf, "<DT><I>%s</I><DD>", fname); + (*Target->isa->put_block) (Target, buf, (int) strlen(buf)); + buf[0] = '\0'; + l = fvalue; + while (*l) { + if (*l == '<') { + StrAllocCat(buf, "<"); + l++; + } else if (*l == '>') { + StrAllocCat(buf, ">"); + l++; + } else if (StrNCmp(l, STR_NEWS_URL, LEN_NEWS_URL) && + StrNCmp(l, "snews://", 8) && + StrNCmp(l, "nntp://", 7) && + StrNCmp(l, "snewspost:", 10) && + StrNCmp(l, "snewsreply:", 11) && + StrNCmp(l, "newspost:", 9) && + StrNCmp(l, "newsreply:", 10) && + StrNCmp(l, "ftp://", 6) && + StrNCmp(l, "file:/", 6) && + StrNCmp(l, "finger://", 9) && + StrNCmp(l, "http://", 7) && + StrNCmp(l, "https://", 8) && + StrNCmp(l, "wais://", 7) && + StrNCmp(l, STR_MAILTO_URL, + LEN_MAILTO_URL) && + StrNCmp(l, "cso://", 6) && + StrNCmp(l, "gopher://", 9)) { + HTSprintf(&buf, "%c", *l++); + } else { + StrAllocCat(buf, "<a href=\""); + StrAllocCopy(href, l); + StrAllocCat(buf, strtok(href, " \r\n\t,>)\"")); + StrAllocCat(buf, "\">"); + while (*l && !strchr(" \r\n\t,>)\"", *l)) { + HTSprintf(&buf, "%c", *l++); + } + StrAllocCat(buf, "</a>"); + FREE(href); + } + } + StrAllocCat(buf, "\n"); + (*Target->isa->put_block) (Target, buf, (int) strlen(buf)); + } + } else { + HTSprintf0(&buf, "<DD>"); + (*Target->isa->put_block) (Target, buf, (int) strlen(buf)); + buf[0] = '\0'; + l = fvalue; + while (*l) { + if (*l == '<') { + StrAllocCat(buf, "<"); + l++; + } else if (*l == '>') { + StrAllocCat(buf, ">"); + l++; + } else if (StrNCmp(l, STR_NEWS_URL, LEN_NEWS_URL) && + StrNCmp(l, "snews://", 8) && + StrNCmp(l, "nntp://", 7) && + StrNCmp(l, "snewspost:", 10) && + StrNCmp(l, "snewsreply:", 11) && + StrNCmp(l, "newspost:", 9) && + StrNCmp(l, "newsreply:", 10) && + StrNCmp(l, "ftp://", 6) && + StrNCmp(l, "file:/", 6) && + StrNCmp(l, "finger://", 9) && + StrNCmp(l, "http://", 7) && + StrNCmp(l, "https://", 8) && + StrNCmp(l, "wais://", 7) && + StrNCmp(l, STR_MAILTO_URL, LEN_MAILTO_URL) && + StrNCmp(l, "cso://", 6) && + StrNCmp(l, "gopher://", 9)) { + HTSprintf(&buf, "%c", *l++); + } else { + StrAllocCat(buf, "<a href=\""); + StrAllocCopy(href, l); + StrAllocCat(buf, strtok(href, " \r\n\t,>)\"")); + StrAllocCat(buf, "\">"); + while (*l && !strchr(" \r\n\t,>)\"", *l)) { + HTSprintf(&buf, "%c", *l++); + } + StrAllocCat(buf, "</a>"); + FREE(href); + } + } + StrAllocCat(buf, "\n"); + (*Target->isa->put_block) (Target, buf, (int) strlen(buf)); + } + } else { + HTSprintf0(&buf, "<DD>%s\n", fname ? fname : rcode); + (*Target->isa->put_block) (Target, buf, (int) strlen(buf)); + } + } + } + end_CSOreport: + if (prev_ndx != -100) { + HTSprintf0(&buf, "</DL></DL>\n"); + (*Target->isa->put_block) (Target, buf, (int) strlen(buf)); + } + FREE(buf); + return 0; +} + +/* CSO/PH form-based search gateway - FM HTLoadCSO + * ===================================== + */ +static int HTLoadCSO(const char *arg, + HTParentAnchor *anAnchor, + HTFormat format_out, + HTStream *sink) +{ + static const char end_form[] = "</BODY>\n</HTML>\n"; + char *host, *cp, *data; + int port = CSO_PORT; + int status; /* tcp return */ + bstring *command = NULL; + bstring *content = NULL; + int len, i, j, start, finish, flen, ndx; + int return_type, has_indexed; + CSOfield_info *fld; + char buf[2048]; + HTFormat format_in = WWW_HTML; + HTStream *Target = NULL; + + if (!acceptable_inited) + init_acceptable(); + + if (!arg) + return -3; /* Bad if no name sepcified */ + if (!*arg) + return -2; /* Bad if name had zero length */ + CTRACE((tfp, "HTLoadCSO: Looking for %s\n", arg)); + + /* + * Set up a socket to the server for the data. + */ + status = HTDoConnect(arg, "cso", CSO_PORT, &s); + if (status == HT_INTERRUPTED) { + /* + * Interrupt cleanly. + */ + CTRACE((tfp, + "HTLoadCSO: Interrupted on connect; recovering cleanly.\n")); + _HTProgress(CONNECTION_INTERRUPTED); + return HT_NOT_LOADED; + } + if (status < 0) { + CTRACE((tfp, "HTLoadCSO: Unable to connect to remote host for `%s'.\n", + arg)); + return HTInetStatus("connect"); + } + + HTInitInput(s); /* Set up input buffering */ + + HTBprintf(&command, "fields%c%c", CR, LF); + if (TRACE) { + CTRACE((tfp, "HTLoadCSO: Connected, writing command `")); + trace_bstring(command); + CTRACE((tfp, "' to socket %d\n", s)); + } + _HTProgress(GOPHER_SENDING_CSO_REQUEST); + status = (int) NETWRITE(s, BStrData(command), BStrLen(command)); + BStrFree(command); + if (status < 0) { + CTRACE((tfp, "HTLoadCSO: Unable to send command.\n")); + return HTInetStatus("send"); + } + _HTProgress(GOPHER_SENT_CSO_REQUEST); + + /* + * Now read the data from the socket. + */ + status = parse_cso_fields(buf, sizeof(buf)); + if (status) { + NETCLOSE(s); + if (status == HT_INTERRUPTED) { + _HTProgress(CONNECTION_INTERRUPTED); + } else if (buf[0] != '\0') { + HTAlert(buf); + } else { + HTAlert(FAILED_NO_RESPONSE); + } + return HT_NOT_LOADED; + } + Target = HTStreamStack(format_in, + format_out, + sink, anAnchor); + if (!Target || Target == NULL) { + char *temp = 0; + + HTSprintf0(&temp, CANNOT_CONVERT_I_TO_O, + HTAtom_name(format_in), HTAtom_name(format_out)); + HTAlert(temp); + FREE(temp); + NETCLOSE(s); + return HT_NOT_LOADED; + } + host = HTParse(arg, "", PARSE_HOST); + if ((cp = HTParsePort(host, &port)) != NULL) { + if (port == CSO_PORT) { + *cp = '\0'; + } + } + anAnchor->safe = TRUE; + if (isBEmpty(anAnchor->post_data)) { + generate_cso_form(host, port, buf, Target); + (*Target->isa->_free) (Target); + FREE(host); + NETCLOSE(s); + free_CSOfields(); + return HT_LOADED; + } + + HTBprintf(&command, + "<HTML>\n<HEAD>\n<TITLE>CSO/PH Results on %s</TITLE>\n</HEAD>\n<BODY>\n", + host); + (*Target->isa->put_block) (Target, BStrData(command), BStrLen(command)); + BStrFree(command); + FREE(host); + + BStrCopy(content, anAnchor->post_data); + assert(content != NULL); + + if (BStrData(content)[BStrLen(content) - 1] != '&') + BStrCat0(content, "&"); + + data = BStrData(content); + len = BStrLen(content); + for (i = 0; i < len; i++) { + if (data[i] == '+') { + data[i] = ' '; + } + } + + data = BStrData(content); + HTUnEscape(data); /* FIXME: could it have embedded null? */ + len = BStrLen(content); + + return_type = 0; + has_indexed = 0; + start = 0; + for (i = 0; i < len; i++) { + if (!data[i] || data[i] == '&') { + /* + * Value parsed. Unescape characters and look for first '=' to + * delimit field name from value. + */ + flen = i - start; + finish = start + flen; + data[finish] = '\0'; + for (j = start; j < finish; j++) { + if (data[j] == '=') { + /* + * data[start..j-1] is field name, + * [j+1..finish-1] is value. + */ + if ((data[start + 1] == '_') && + ((data[start] == 'r') || (data[start] == 'q'))) { + /* + * Decode fields number and lookup field info. + */ + sscanf(&data[start + 2], "%d=", &ndx); + for (fld = CSOfields; fld; fld = fld->next) { + if (ndx == fld->id) { + if ((j + 1) >= finish) + break; /* ignore nulls */ + if (data[start] == 'q') { + /* + * Append field to query line. + */ + if (fld->lookup) { + if (fld->indexed) + has_indexed = 1; + if (isBEmpty(command)) { + BStrCopy0(command, "query "); + } else { + BStrCat0(command, " "); + } + HTBprintf(&command, "%s=\"%s\"", + fld->name, &data[j + 1]); + } else { + strcpy(buf, + "Warning: non-lookup field ignored<BR>\n"); + (*Target->isa->put_block) (Target, + buf, + (int) + strlen(buf)); + } + } else if (data[start] == 'r') { + fld->explicit_return = 1; + } + break; + } + } + } else if (!StrNCmp(&data[start], "return=", 7)) { + if (!strcmp(&data[start + 7], "all")) { + return_type = 1; + } else if (!strcmp(&data[start + 7], "selected")) { + return_type = 2; + } + } + } + } + start = i + 1; + } + } + BStrFree(content); + if (isBEmpty(command) || !has_indexed) { + NETCLOSE(s); + strcpy(buf, + "<EM>Error:</EM> At least one indexed field value must be specified!\n"); + (*Target->isa->put_block) (Target, buf, (int) strlen(buf)); + strcpy(buf, "</BODY>\n</HTML>\n"); + (*Target->isa->put_block) (Target, buf, (int) strlen(buf)); + (*Target->isa->_free) (Target); + free_CSOfields(); + return HT_LOADED; + } + /* + * Append return fields. + */ + if (return_type == 1) { + BStrCat0(command, " return all"); + } else if (return_type == 2) { + BStrCat0(command, " return"); + for (fld = CSOfields; fld; fld = fld->next) { + if (fld->explicit_return) { + HTBprintf(&command, " %s", fld->name); + } + } + } + HTBprintf(&command, "%c%c", CR, LF); + strcpy(buf, "<H2>\n<EM>CSO/PH command:</EM> "); + (*Target->isa->put_block) (Target, buf, (int) strlen(buf)); + (*Target->isa->put_block) (Target, BStrData(command), BStrLen(command)); + strcpy(buf, "</H2>\n"); + (*Target->isa->put_block) (Target, buf, (int) strlen(buf)); + if (TRACE) { + CTRACE((tfp, "HTLoadCSO: Writing command `")); + trace_bstring(command); + CTRACE((tfp, "' to socket %d\n", s)); + } + status = (int) NETWRITE(s, BStrData(command), BStrLen(command)); + BStrFree(command); + if (status < 0) { + CTRACE((tfp, "HTLoadCSO: Unable to send command.\n")); + free_CSOfields(); + return HTInetStatus("send"); + } + generate_cso_report(Target); + NETCLOSE(s); + (*Target->isa->put_block) (Target, end_form, (int) sizeof(end_form) - 1); + (*Target->isa->_free) (Target); + FREE(host); + free_CSOfields(); + return HT_LOADED; +} + +/* Load by name. HTLoadGopher + * ============= + * + * Bug: No decoding of strange data types as yet. + * + */ +static int HTLoadGopher(const char *arg, + HTParentAnchor *anAnchor, + HTFormat format_out, + HTStream *sink) +{ + char *command; /* The whole command */ + int status; /* tcp return */ + char gtype; /* Gopher Node type */ + char *selector; /* Selector string */ + + if (!acceptable_inited) + init_acceptable(); + + if (!arg) + return -3; /* Bad if no name sepcified */ + if (!*arg) + return -2; /* Bad if name had zero length */ + CTRACE((tfp, "HTGopher: Looking for %s\n", arg)); + + /* + * If it's a port 105 GOPHER_CSO gtype with no ISINDEX token ('?'), use the + * form-based CSO gateway (otherwise, return an ISINDEX cover page or do + * the ISINDEX search). - FM + */ + { + size_t len; + + if ((len = strlen(arg)) > 5) { + if (0 == strcmp((const char *) &arg[len - 6], ":105/2")) { + /* Use CSO gateway. */ + CTRACE((tfp, "HTGopher: Passing to CSO/PH gateway.\n")); + return HTLoadCSO(arg, anAnchor, format_out, sink); + } + } + } + + /* + * If it's a port 79/0[/...] URL, use the finger gateway. - FM + */ + if (strstr(arg, ":79/0") != NULL) { +#ifndef DISABLE_FINGER + CTRACE((tfp, "HTGopher: Passing to finger gateway.\n")); + return HTLoadFinger(arg, anAnchor, format_out, sink); +#else /* finger is disabled */ + HTAlert(COULD_NOT_ACCESS_DOCUMENT); + return HT_NOT_LOADED; +#endif /* DISABLE_FINGER */ + } + + /* + * Get entity type, and selector string. + */ + { + char *p1 = HTParse(arg, "", PARSE_PATH | PARSE_PUNCTUATION); + + gtype = '1'; /* Default = menu */ + selector = p1; + if ((*selector++ == '/') && (*selector)) { /* Skip first slash */ + gtype = *selector++; /* Pick up gtype */ + } + if (gtype == GOPHER_INDEX) { + char *query; + + /* + * Search is allowed. + */ + HTAnchor_setIndex(anAnchor, anAnchor->address); + query = strchr(selector, '?'); /* Look for search string */ + if (!query || !query[1]) { /* No search required */ + target = HTML_new(anAnchor, format_out, sink); + targetClass = *target->isa; + display_index(arg, anAnchor); /* Display "cover page" */ + return HT_LOADED; /* Local function only */ + } + *query++ = '\0'; /* Skip '?' */ + command = + (char *) malloc(strlen(selector) + 1 + strlen(query) + 2 + 1); + if (command == NULL) + outofmem(__FILE__, "HTLoadGopher"); + + assert(command != NULL); + + de_escape(command, selector); /* Bug fix TBL 921208 */ + + strcat(command, "\t"); + + { /* Remove plus signs 921006 */ + char *p; + + for (p = query; *p; p++) { + if (*p == '+') + *p = ' '; + } + } + + de_escape(&command[strlen(command)], query); /* bug fix LJM 940415 */ + } else if (gtype == GOPHER_CSO) { + char *query; + + /* + * Search is allowed. + */ + query = strchr(selector, '?'); /* Look for search string */ + if (!query || !query[1]) { /* No search required */ + target = HTML_new(anAnchor, format_out, sink); + targetClass = *target->isa; + display_cso(arg, anAnchor); /* Display "cover page" */ + return HT_LOADED; /* Local function only */ + } + HTAnchor_setIndex(anAnchor, anAnchor->address); + *query++ = '\0'; /* Skip '?' */ + command = (char *) malloc(strlen("query") + 1 + + strlen(query) + 2 + 1); + if (command == NULL) + outofmem(__FILE__, "HTLoadGopher"); + + assert(command != NULL); + + de_escape(command, selector); /* Bug fix TBL 921208 */ + + strcpy(command, "query "); + + { /* Remove plus signs 921006 */ + char *p; + + for (p = query; *p; p++) { + if (*p == '+') + *p = ' '; + } + } + de_escape(&command[strlen(command)], query); /* bug fix LJM 940415 */ + + } else { /* Not index */ + command = (char *) malloc(strlen(selector) + 2 + 1); + if (command == NULL) + outofmem(__FILE__, "HTLoadGopher"); + + assert(command != NULL); + + de_escape(command, selector); + } + FREE(p1); + } + + { + char *p = command + strlen(command); + + *p++ = CR; /* Macros to be correct on Mac */ + *p++ = LF; + *p = '\0'; + } + + /* + * Set up a socket to the server for the data. + */ + status = HTDoConnect(arg, "gopher", GOPHER_PORT, &s); + if (status == HT_INTERRUPTED) { + /* + * Interrupt cleanly. + */ + CTRACE((tfp, "HTGopher: Interrupted on connect; recovering cleanly.\n")); + _HTProgress(CONNECTION_INTERRUPTED); + FREE(command); + return HT_NOT_LOADED; + } + if (status < 0) { + CTRACE((tfp, "HTGopher: Unable to connect to remote host for `%s'.\n", + arg)); + FREE(command); + return HTInetStatus("connect"); + } + + HTInitInput(s); /* Set up input buffering */ + + CTRACE((tfp, "HTGopher: Connected, writing command `%s' to socket %d\n", + command, s)); + +#ifdef NOT_ASCII + { + char *p; + + for (p = command; *p; p++) { + *p = TOASCII(*p); + } + } +#endif + + _HTProgress(GOPHER_SENDING_REQUEST); + + status = (int) NETWRITE(s, command, (int) strlen(command)); + FREE(command); + if (status < 0) { + CTRACE((tfp, "HTGopher: Unable to send command.\n")); + return HTInetStatus("send"); + } + + _HTProgress(GOPHER_SENT_REQUEST); + + /* + * Now read the data from the socket. + */ + switch (gtype) { + + case GOPHER_TEXT: + HTParseSocket(WWW_PLAINTEXT, format_out, anAnchor, s, sink); + break; + + case GOPHER_HTML: + case GOPHER_CHTML: + HTParseSocket(WWW_HTML, format_out, anAnchor, s, sink); + break; + + case GOPHER_GIF: + case GOPHER_IMAGE: + case GOPHER_PLUS_IMAGE: + HTParseSocket(HTAtom_for("image/gif"), + format_out, anAnchor, s, sink); + break; + + case GOPHER_MENU: + case GOPHER_INDEX: + target = HTML_new(anAnchor, format_out, sink); + targetClass = *target->isa; + parse_menu(arg, anAnchor); + break; + + case GOPHER_CSO: + target = HTML_new(anAnchor, format_out, sink); + targetClass = *target->isa; + parse_cso(arg, anAnchor); + break; + + case GOPHER_SOUND: + case GOPHER_PLUS_SOUND: + HTParseSocket(WWW_AUDIO, format_out, anAnchor, s, sink); + break; + + case GOPHER_PLUS_MOVIE: + HTParseSocket(HTAtom_for("video/mpeg"), format_out, anAnchor, s, sink); + break; + + case GOPHER_PLUS_PDF: + HTParseSocket(HTAtom_for("application/pdf"), format_out, anAnchor, + s, sink); + break; + + case GOPHER_MACBINHEX: + case GOPHER_PCBINARY: + case GOPHER_UUENCODED: + case GOPHER_BINARY: + default: + /* + * Specifying WWW_UNKNOWN forces dump to local disk. + */ + HTParseSocket(WWW_UNKNOWN, format_out, anAnchor, s, sink); + break; + + } /* switch(gtype) */ + + NETCLOSE(s); + return HT_LOADED; +} + +#ifdef GLOBALDEF_IS_MACRO +#define _HTGOPHER_C_1_INIT { "gopher", HTLoadGopher, NULL } +GLOBALDEF(HTProtocol, HTGopher, _HTGOPHER_C_1_INIT); +#define _HTCSO_C_1_INIT { "cso", HTLoadCSO, NULL } +GLOBALDEF(HTProtocol, HTCSO, _HTCSO_C_1_INIT); +#else +GLOBALDEF HTProtocol HTGopher = +{"gopher", HTLoadGopher, NULL}; +GLOBALDEF HTProtocol HTCSO = +{"cso", HTLoadCSO, NULL}; +#endif /* GLOBALDEF_IS_MACRO */ + +#endif /* not DISABLE_GOPHER */ diff --git a/WWW/Library/Implementation/HTGopher.h b/WWW/Library/Implementation/HTGopher.h new file mode 100644 index 00000000..667fc24e --- /dev/null +++ b/WWW/Library/Implementation/HTGopher.h @@ -0,0 +1,29 @@ +/* Gopher protocol module for libwww + GOPHER ACCESS + + HISTORY: + + 8 Jan 92 Adapted from HTTP TBL + + */ + +#ifndef HTGOPHER_H +#define HTGOPHER_H + +#include <HTAccess.h> +#include <HTAnchor.h> + +#ifdef __cplusplus +extern "C" { +#endif +#ifdef GLOBALREF_IS_MACRO + extern GLOBALREF (HTProtocol, HTGopher); + +#else + GLOBALREF HTProtocol HTGopher; +#endif /* GLOBALREF_IS_MACRO */ + +#ifdef __cplusplus +} +#endif +#endif /* HTGOPHER_H */ diff --git a/WWW/Library/Implementation/HTGroup.c b/WWW/Library/Implementation/HTGroup.c new file mode 100644 index 00000000..34c8b32e --- /dev/null +++ b/WWW/Library/Implementation/HTGroup.c @@ -0,0 +1,766 @@ +/* MODULE HTGroup.c + * GROUP FILE ROUTINES + * + * Contains group file parser and routines to match IP + * address templates and to find out group membership. + * + * + * AUTHORS: + * AL Ari Luotonen luotonen@dxcern.cern.ch + * + * HISTORY: + * + * + * BUGS: + * + * + * + * GROUP DEFINITION GRAMMAR: + * + * string = "sequence of alphanumeric characters" + * user_name ::= string + * group_name ::= string + * group_ref ::= group_name + * user_def ::= user_name | group_ref + * user_def_list ::= user_def { ',' user_def } + * user_part = user_def | '(' user_def_list ')' + * + * templ = "sequence of alphanumeric characters and '*'s" + * ip_number_mask ::= templ '.' templ '.' templ '.' templ + * domain_name_mask ::= templ { '.' templ } + * address ::= ip_number_mask | domain_name_mask + * address_def ::= address + * address_def_list ::= address_def { ',' address_def } + * address_part = address_def | '(' address_def_list ')' + * + * item ::= [user_part] ['@' address_part] + * item_list ::= item { ',' item } + * group_def ::= item_list + * group_decl ::= group_name ':' group_def + * + */ + +#include <HTUtils.h> + +#include <HTAAUtil.h> +#include <HTLex.h> /* Lexical analysor */ +#include <HTGroup.h> /* Implemented here */ + +#include <LYUtils.h> +#include <LYLeaks.h> + +/* + * Group file parser + */ + +typedef HTList UserDefList; +typedef HTList AddressDefList; + +typedef struct { + UserDefList *user_def_list; + AddressDefList *address_def_list; +} Item; + +typedef struct { + char *name; + GroupDef *translation; +} Ref; + +static void syntax_error(FILE *fp, const char *msg, + LexItem lex_item) +{ + char buffer[41]; + int cnt = 0; + int ch; + + while ((ch = getc(fp)) != EOF && ch != '\n') + if (cnt < 40) + buffer[cnt++] = (char) ch; + buffer[cnt] = (char) 0; + + CTRACE((tfp, "%s %d before: '%s'\nHTGroup.c: %s (got %s)\n", + "HTGroup.c: Syntax error in rule file at line", + HTlex_line, buffer, msg, lex_verbose(lex_item))); + HTlex_line++; +} + +static AddressDefList *parse_address_part(FILE *fp) +{ + AddressDefList *address_def_list = NULL; + LexItem lex_item; + BOOL only_one = NO; + + lex_item = lex(fp); + if (lex_item == LEX_ALPH_STR || lex_item == LEX_TMPL_STR) + only_one = YES; + else if (lex_item != LEX_OPEN_PAREN || + ((lex_item = lex(fp)) != LEX_ALPH_STR && + lex_item != LEX_TMPL_STR)) { + syntax_error(fp, "Expecting a single address or '(' beginning list", + lex_item); + return NULL; + } + address_def_list = HTList_new(); + + for (;;) { + Ref *ref = typecalloc(Ref); + + if (ref == NULL) + outofmem(__FILE__, "parse_address_part"); + + assert(ref != NULL); + + ref->name = NULL; + ref->translation = NULL; + StrAllocCopy(ref->name, HTlex_buffer); + + HTList_addObject(address_def_list, (void *) ref); + + if (only_one || (lex_item = lex(fp)) != LEX_ITEM_SEP) + break; + /* + * Here lex_item == LEX_ITEM_SEP; after item separator it + * is ok to have one or more newlines (LEX_REC_SEP) and + * they are ignored (continuation line). + */ + do { + lex_item = lex(fp); + } while (lex_item == LEX_REC_SEP); + + if (lex_item != LEX_ALPH_STR && lex_item != LEX_TMPL_STR) { + syntax_error(fp, "Expecting an address template", lex_item); + HTList_delete(address_def_list); + address_def_list = NULL; + return NULL; + } + } + + if (!only_one && lex_item != LEX_CLOSE_PAREN) { + HTList_delete(address_def_list); + address_def_list = NULL; + syntax_error(fp, "Expecting ')' closing address list", lex_item); + return NULL; + } + return address_def_list; +} + +static UserDefList *parse_user_part(FILE *fp) +{ + UserDefList *user_def_list = NULL; + LexItem lex_item; + BOOL only_one = NO; + + lex_item = lex(fp); + if (lex_item == LEX_ALPH_STR) + only_one = YES; + else if (lex_item != LEX_OPEN_PAREN || + (lex_item = lex(fp)) != LEX_ALPH_STR) { + syntax_error(fp, "Expecting a single name or '(' beginning list", + lex_item); + return NULL; + } + user_def_list = HTList_new(); + + for (;;) { + Ref *ref = typecalloc(Ref); + + if (ref == NULL) + outofmem(__FILE__, "parse_user_part"); + + assert(ref != NULL); + + ref->name = NULL; + ref->translation = NULL; + StrAllocCopy(ref->name, HTlex_buffer); + + HTList_addObject(user_def_list, (void *) ref); + + if (only_one || (lex_item = lex(fp)) != LEX_ITEM_SEP) + break; + /* + * Here lex_item == LEX_ITEM_SEP; after item separator it + * is ok to have one or more newlines (LEX_REC_SEP) and + * they are ignored (continuation line). + */ + do { + lex_item = lex(fp); + } while (lex_item == LEX_REC_SEP); + + if (lex_item != LEX_ALPH_STR) { + syntax_error(fp, "Expecting user or group name", lex_item); + HTList_delete(user_def_list); + user_def_list = NULL; + return NULL; + } + } + + if (!only_one && lex_item != LEX_CLOSE_PAREN) { + HTList_delete(user_def_list); + user_def_list = NULL; + syntax_error(fp, "Expecting ')' closing user/group list", lex_item); + return NULL; + } + return user_def_list; +} + +static Item *parse_item(FILE *fp) +{ + Item *item = NULL; + UserDefList *user_def_list = NULL; + AddressDefList *address_def_list = NULL; + LexItem lex_item; + + lex_item = lex(fp); + if (lex_item == LEX_ALPH_STR || lex_item == LEX_OPEN_PAREN) { + unlex(lex_item); + user_def_list = parse_user_part(fp); + lex_item = lex(fp); + } + + if (lex_item == LEX_AT_SIGN) { + lex_item = lex(fp); + if (lex_item == LEX_ALPH_STR || lex_item == LEX_TMPL_STR || + lex_item == LEX_OPEN_PAREN) { + unlex(lex_item); + address_def_list = parse_address_part(fp); + } else { + if (user_def_list) { + HTList_delete(user_def_list); /* @@@@ */ + user_def_list = NULL; + } + syntax_error(fp, "Expected address part (single address or list)", + lex_item); + return NULL; + } + } else + unlex(lex_item); + + if (!user_def_list && !address_def_list) { + syntax_error(fp, "Empty item not allowed", lex_item); + return NULL; + } + item = typecalloc(Item); + if (item == NULL) + outofmem(__FILE__, "parse_item"); + + assert(item != NULL); + + item->user_def_list = user_def_list; + item->address_def_list = address_def_list; + return item; +} + +static ItemList *parse_item_list(FILE *fp) +{ + ItemList *item_list = HTList_new(); + Item *item; + LexItem lex_item; + + for (;;) { + if (!(item = parse_item(fp))) { + HTList_delete(item_list); /* @@@@ */ + item_list = NULL; + return NULL; + } + HTList_addObject(item_list, (void *) item); + lex_item = lex(fp); + if (lex_item != LEX_ITEM_SEP) { + unlex(lex_item); + return item_list; + } + /* + * Here lex_item == LEX_ITEM_SEP; after item separator it + * is ok to have one or more newlines (LEX_REC_SEP) and + * they are ignored (continuation line). + */ + do { + lex_item = lex(fp); + } while (lex_item == LEX_REC_SEP); + unlex(lex_item); + } +} + +GroupDef *HTAA_parseGroupDef(FILE *fp) +{ + ItemList *item_list = NULL; + GroupDef *group_def = NULL; + LexItem lex_item; + + if (!(item_list = parse_item_list(fp))) { + return NULL; + } + group_def = typecalloc(GroupDef); + if (group_def == NULL) + outofmem(__FILE__, "HTAA_parseGroupDef"); + + assert(group_def != NULL); + + group_def->group_name = NULL; + group_def->item_list = item_list; + + if ((lex_item = lex(fp)) != LEX_REC_SEP) { + syntax_error(fp, "Garbage after group definition", lex_item); + } + + return group_def; +} + +#if 0 +static GroupDef *parse_group_decl(FILE *fp) +{ + char *group_name = NULL; + GroupDef *group_def = NULL; + LexItem lex_item; + + do { + lex_item = lex(fp); + } while (lex_item == LEX_REC_SEP); /* Ignore empty lines */ + + if (lex_item != LEX_ALPH_STR) { + if (lex_item != LEX_EOF) + syntax_error(fp, "Expecting group name", lex_item); + return NULL; + } + StrAllocCopy(group_name, HTlex_buffer); + + if (LEX_FIELD_SEP != (lex_item = lex(fp))) { + syntax_error(fp, "Expecting field separator", lex_item); + FREE(group_name); + return NULL; + } + + if (!(group_def = HTAA_parseGroupDef(fp))) { + FREE(group_name); + return NULL; + } + group_def->group_name = group_name; + + return group_def; +} + +/* + * Group manipulation routines + */ + +static GroupDef *find_group_def(GroupDefList *group_list, + const char *group_name) +{ + if (group_list && group_name) { + GroupDefList *cur = group_list; + GroupDef *group_def; + + while (NULL != (group_def = (GroupDef *) HTList_nextObject(cur))) { + if (!strcmp(group_name, group_def->group_name)) { + return group_def; + } + } + } + return NULL; +} + +void HTAA_resolveGroupReferences(GroupDef *group_def, + GroupDefList *group_def_list) +{ + if (group_def && group_def->item_list && group_def_list) { + ItemList *cur1 = group_def->item_list; + Item *item; + + while (NULL != (item = (Item *) HTList_nextObject(cur1))) { + UserDefList *cur2 = item->user_def_list; + Ref *ref; + + while (NULL != (ref = (Ref *) HTList_nextObject(cur2))) + ref->translation = find_group_def(group_def_list, ref->name); + + /* Does NOT translate address_def_list */ + } + } +} + +static void add_group_def(GroupDefList *group_def_list, + GroupDef *group_def) +{ + HTAA_resolveGroupReferences(group_def, group_def_list); + HTList_addObject(group_def_list, (void *) group_def); +} + +static GroupDefList *parse_group_file(FILE *fp) +{ + GroupDefList *group_def_list = HTList_new(); + GroupDef *group_def; + + while (NULL != (group_def = parse_group_decl(fp))) + add_group_def(group_def_list, group_def); + + return group_def_list; +} +#endif + +/* + * Trace functions + */ + +static void print_item(Item *item) +{ + if (!item) + fprintf(tfp, "\tNULL-ITEM\n"); + else { + UserDefList *cur1 = item->user_def_list; + AddressDefList *cur2 = item->address_def_list; + Ref *user_ref = (Ref *) HTList_nextObject(cur1); + Ref *addr_ref = (Ref *) HTList_nextObject(cur2); + + if (user_ref) { + fprintf(tfp, "\t[%s%s", user_ref->name, + (user_ref->translation ? "*REF*" : "")); + while (NULL != (user_ref = (Ref *) HTList_nextObject(cur1))) + fprintf(tfp, "; %s%s", user_ref->name, + (user_ref->translation ? "*REF*" : "")); + fprintf(tfp, "] "); + } else + fprintf(tfp, "\tANYBODY "); + + if (addr_ref) { + fprintf(tfp, "@ [%s", addr_ref->name); + while (NULL != (addr_ref = (Ref *) HTList_nextObject(cur2))) + fprintf(tfp, "; %s", addr_ref->name); + fprintf(tfp, "]\n"); + } else + fprintf(tfp, "@ ANYADDRESS\n"); + } +} + +static void print_item_list(ItemList *item_list) +{ + ItemList *cur = item_list; + Item *item; + + if (!item_list) + fprintf(tfp, "EMPTY"); + else + while (NULL != (item = (Item *) HTList_nextObject(cur))) + print_item(item); +} + +void HTAA_printGroupDef(GroupDef *group_def) +{ + if (!group_def) { + fprintf(tfp, "\nNULL RECORD\n"); + return; + } + + fprintf(tfp, "\nGroup %s:\n", + (group_def->group_name ? group_def->group_name : "NULL")); + + print_item_list(group_def->item_list); + fprintf(tfp, "\n"); +} + +#if 0 +static void print_group_def_list(GroupDefList *group_list) +{ + GroupDefList *cur = group_list; + GroupDef *group_def; + + while (NULL != (group_def = (GroupDef *) HTList_nextObject(cur))) + HTAA_printGroupDef(group_def); +} + +/* + * IP address template matching + */ + +/* static part_match() + * MATCH ONE PART OF INET ADDRESS AGAIST + * A PART OF MASK (inet address has 4 parts) + * ON ENTRY: + * tcur pointer to the beginning of template part. + * icur pointer to the beginning of actual inet + * number part. + * + * ON EXIT: + * returns YES, if match. + */ +static BOOL part_match(const char *tcur, + const char *icur) +{ + char required[4]; + char actual[4]; + const char *cur; + int cnt; + BOOL status; + + if (!tcur || !icur) + return NO; + + cur = tcur; + cnt = 0; + while (cnt < 3 && *cur && *cur != '.') + required[cnt++] = *(cur++); + required[cnt] = (char) 0; + + cur = icur; + cnt = 0; + while (cnt < 3 && *cur && *cur != '.') + actual[cnt++] = *(cur++); + actual[cnt] = (char) 0; + + status = HTAA_templateMatch(required, actual); + CTRACE((tfp, "part_match: req: '%s' act: '%s' match: %s\n", + required, actual, (status ? "yes" : "no"))); + + return status; +} + +/* static ip_number_match() + * MATCH INET NUMBER AGAINST AN INET NUMBER MASK + * ON ENTRY: + * template mask to match agaist, e.g., 128.141.*.* + * the_inet_addr actual inet address, e.g., 128.141.201.74 + * + * ON EXIT: + * returns YES, if match; NO, if not. + */ +static BOOL ip_number_match(const char *ctemplate, + const char *the_inet_addr) +{ + const char *tcur = ctemplate; + const char *icur = the_inet_addr; + int cnt; + + for (cnt = 0; cnt < 4; cnt++) { + if (!tcur || !icur || !part_match(tcur, icur)) + return NO; + if (NULL != (tcur = strchr(tcur, '.'))) + tcur++; + if (NULL != (icur = strchr(icur, '.'))) + icur++; + } + return YES; +} + +/* static is_domain_mask() + * DETERMINE IF A GIVEN MASK IS A + * DOMAIN NAME MASK OR AN INET NUMBER MASK + * ON ENTRY: + * mask either a domain name mask, + * e.g. + * *.cern.ch + * + * or an inet number mask, + * e.g. + * 128.141.*.* + * + * ON EXIT: + * returns YES, if mask is a domain name mask. + * NO, if it is an inet number mask. + */ +static BOOL is_domain_mask(const char *mask) +{ + const char *cur = mask; + + if (!mask) + return NO; + + while (*cur) { + if (*cur != '.' && *cur != '*' && (*cur < '0' || *cur > '9')) + return YES; /* Even one non-digit makes it a domain name mask */ + cur++; + } + return NO; /* All digits and dots, so it is an inet number mask */ +} + +/* static ip_mask_match() + * MATCH AN IP NUMBER MASK OR IP NAME MASK + * AGAINST ACTUAL IP NUMBER OR IP NAME + * + * ON ENTRY: + * mask mask. Mask may be either an inet number + * mask or a domain name mask, + * e.g. + * 128.141.*.* + * or + * *.cern.ch + * + * ip_number IP number of connecting host. + * ip_name IP name of the connecting host. + * + * ON EXIT: + * returns YES, if hostname/internet number + * matches the mask. + * NO, if no match (no fire). + */ +static BOOL ip_mask_match(const char *mask, + const char *ip_number, + const char *ip_name) +{ + if (mask && (ip_number || ip_name)) { + if (is_domain_mask(mask)) { + if (HTAA_templateMatch(mask, ip_name)) + return YES; + } else { + if (ip_number_match(mask, ip_number)) + return YES; + } + } + return NO; +} + +static BOOL ip_in_def_list(AddressDefList *address_def_list, + char *ip_number, + char *ip_name) +{ + if (address_def_list && (ip_number || ip_name)) { + AddressDefList *cur = address_def_list; + Ref *ref; + + while (NULL != (ref = (Ref *) HTList_nextObject(cur))) { + /* Value of ref->translation is ignored, i.e., */ + /* no recursion for ip address tamplates. */ + if (ip_mask_match(ref->name, ip_number, ip_name)) + return YES; + } + } + return NO; +} + +/* + * Group file cached reading + */ + +typedef struct { + char *group_filename; + GroupDefList *group_list; +} GroupCache; + +typedef HTList GroupCacheList; + +static GroupCacheList *group_cache_list = NULL; + +GroupDefList *HTAA_readGroupFile(const char *filename) +{ + FILE *fp; + GroupCache *group_cache; + + if (isEmpty(filename)) + return NULL; + + if (!group_cache_list) + group_cache_list = HTList_new(); + else { + GroupCacheList *cur = group_cache_list; + + while (NULL != (group_cache = (GroupCache *) HTList_nextObject(cur))) { + if (!strcmp(filename, group_cache->group_filename)) { + CTRACE((tfp, "%s '%s' %s\n", + "HTAA_readGroupFile: group file", + filename, "already found in cache")); + return group_cache->group_list; + } /* if cache match */ + } /* while cached files remain */ + } /* cache exists */ + + CTRACE((tfp, "HTAA_readGroupFile: reading group file `%s'\n", + filename)); + + if (!(fp = fopen(filename, TXT_R))) { + CTRACE((tfp, "%s '%s'\n", + "HTAA_readGroupFile: unable to open group file", + filename)); + return NULL; + } + + if ((group_cache = typecalloc(GroupCache)) == 0) + outofmem(__FILE__, "HTAA_readGroupFile"); + + group_cache->group_filename = NULL; + StrAllocCopy(group_cache->group_filename, filename); + group_cache->group_list = parse_group_file(fp); + HTList_addObject(group_cache_list, (void *) group_cache); + fclose(fp); + + CTRACE((tfp, "Read group file '%s', results follow:\n", filename)); + if (TRACE) + print_group_def_list(group_cache->group_list); + + return group_cache->group_list; +} + +/* PUBLIC HTAA_userAndInetInGroup() + * CHECK IF USER BELONGS TO TO A GIVEN GROUP + * AND THAT THE CONNECTION COMES FROM AN + * ADDRESS THAT IS ALLOWED BY THAT GROUP + * ON ENTRY: + * group the group definition structure. + * username connecting user. + * ip_number browser host IP number, optional. + * ip_name browser host IP name, optional. + * However, one of ip_number or ip_name + * must be given. + * ON EXIT: + * returns HTAA_IP_MASK, if IP address mask was + * reason for failing. + * HTAA_NOT_MEMBER, if user does not belong + * to the group. + * HTAA_OK if both IP address and user are ok. + */ +HTAAFailReasonType HTAA_userAndInetInGroup(GroupDef *group, + char *username, + char *ip_number, + char *ip_name) +{ + HTAAFailReasonType reason = HTAA_NOT_MEMBER; + + if (group && username) { + ItemList *cur1 = group->item_list; + Item *item; + + while (NULL != (item = (Item *) HTList_nextObject(cur1))) { + if (!item->address_def_list || /* Any address allowed */ + ip_in_def_list(item->address_def_list, ip_number, ip_name)) { + + if (!item->user_def_list) /* Any user allowed */ + return HTAA_OK; + else { + UserDefList *cur2 = item->user_def_list; + Ref *ref; + + while (NULL != (ref = (Ref *) HTList_nextObject(cur2))) { + + if (ref->translation) { /* Group, check recursively */ + reason = HTAA_userAndInetInGroup(ref->translation, + username, + ip_number, ip_name); + if (reason == HTAA_OK) + return HTAA_OK; + } else { /* Username, check directly */ + if (username && *username && + 0 == strcmp(ref->name, username)) + return HTAA_OK; + } + /* Every user/group name in this group */ + } + /* search for username */ + } + /* IP address ok */ + } else { + reason = HTAA_IP_MASK; + } + } /* while items in group */ + } + /* valid parameters */ + return reason; /* No match, or invalid parameters */ +} + +void GroupDef_delete(GroupDef *group_def) +{ + if (group_def) { + FREE(group_def->group_name); + if (group_def->item_list) { + HTList_delete(group_def->item_list); /* @@@@ */ + group_def->item_list = NULL; + } + FREE(group_def); + } +} +#endif diff --git a/WWW/Library/Implementation/HTGroup.h b/WWW/Library/Implementation/HTGroup.h new file mode 100644 index 00000000..78745656 --- /dev/null +++ b/WWW/Library/Implementation/HTGroup.h @@ -0,0 +1,182 @@ +/* GROUP FILE ROUTINES + + */ + +#ifndef HTGROUP_H +#define HTGROUP_H + +#include <HTList.h> + +#ifdef __cplusplus +extern "C" { +#endif + typedef HTList GroupDefList; + typedef HTList ItemList; + + typedef struct { + char *group_name; + ItemList *item_list; + } GroupDef; + +/* + * Access Authorization failure reasons + */ + typedef enum { + HTAA_OK, /* 200 OK */ + HTAA_OK_GATEWAY, /* 200 OK, acting as a gateway */ + HTAA_NO_AUTH, /* 401 Unauthorized, not authenticated */ + HTAA_NOT_MEMBER, /* 401 Unauthorized, not authorized */ + HTAA_IP_MASK, /* 403 Forbidden by IP mask */ + HTAA_BY_RULE, /* 403 Forbidden by rule */ + HTAA_NO_ACL, /* 403 Forbidden, ACL non-existent */ + HTAA_NO_ENTRY, /* 403 Forbidden, no ACL entry */ + HTAA_SETUP_ERROR, /* 403 Forbidden, server setup error */ + HTAA_DOTDOT, /* 403 Forbidden, URL with /../ illegal */ + HTAA_HTBIN, /* 403 Forbidden, /htbin not enabled */ + HTAA_NOT_FOUND /* 404 Not found, or read protected */ + } HTAAFailReasonType; + +/* + +Group definition grammar + + string + "sequence of alphanumeric characters" + + user_name + string + + group_name + string + + group_ref + group_name + + user_def + user_name | group_ref + + user_def_list + user_def { ',' user_def } + + user_part + user_def | '(' user_def_list ')' + + templ + + "sequence of alphanumeric characters and '*'s" + + ip_number_mask + templ '.' templ '.' templ '.' templ + + domain_name_mask + templ { '.' templ } + + address + + ip_number_mask | domain_name_mask + + address_def + + address + + address_def_list + address_def { ',' address_def } + + address_part + address_def | '(' address_def_list ')' + + item + [user_part] ['@' address_part] + + item_list + item { ',' item } + + group_def + item_list + + group_decl + group_name ':' group_def + + PARSE GROUP DEFINITION + + */ + + extern GroupDef *HTAA_parseGroupDef(FILE *fp); + +/* + +Fill in Pointers to referenced Group Definitions in a Group Definition + + References to groups (by their name) are resolved from group_def_list and pointers to + those structures are added to group_def. + + */ + + extern void HTAA_resolveGroupReferences(GroupDef *group_def, + GroupDefList *group_def_list); + +/* + +Read Group File (and do caching) + + If group file is already in cache returns a pointer to previously read group definition + list. + + */ + + extern GroupDefList *HTAA_readGroupFile(const char *filename); + +/* + +Delete Group Definition + + Groups in cache should never be freed by this function. This should only be used to + free group definitions read by HTAA_parseGroupDef. + + */ + + extern void GroupDef_delete(GroupDef *group_def); + +/* + +Print Out Group Definition (for trace purposes) + + */ + + extern void HTAA_printGroupDef(GroupDef *group_def); + +/* + +Does a User Belong to a Given Set of Groups + + This function checks both the username and the internet address. + + */ + +/* PUBLIC HTAA_userAndInetInGroup() + * CHECK IF USER BELONGS TO TO A GIVEN GROUP + * AND THAT THE CONNECTION COMES FROM AN + * ADDRESS THAT IS ALLOWED BY THAT GROUP + * ON ENTRY: + * group the group definition structure. + * username connecting user. + * ip_number browser host IP number, optional. + * ip_name browser host IP name, optional. + * However, one of ip_number or ip_name + * must be given. + * ON EXIT: + * returns HTAA_IP_MASK, if IP address mask was + * reason for failing. + * HTAA_NOT_MEMBER, if user does not belong + * to the group. + * HTAA_OK if both IP address and user are ok. + */ + extern HTAAFailReasonType HTAA_userAndInetInGroup(GroupDef *group, + char *username, + char *ip_number, + char *ip_name); + +#ifdef __cplusplus +} +#endif +#endif /* not HTGROUP_H */ diff --git a/WWW/Library/Implementation/HTInit.h b/WWW/Library/Implementation/HTInit.h new file mode 100644 index 00000000..4fc70003 --- /dev/null +++ b/WWW/Library/Implementation/HTInit.h @@ -0,0 +1,34 @@ +/* /Net/dxcern/userd/timbl/hypertext/WWW/Library/Implementation/HTInit.html + INITIALISATION MODULE + + This module registers all the plug & play software modules which will be + used in the program. This is for a browser. + + To override this, just copy it and link in your version before you link with + the library. + + Implemented by HTInit.c by default. + + */ + +#ifndef HTINIT_H +#define HTINIT_H 1 + +#ifndef HTUTILS_H +#include <HTUtils.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + extern void HTFormatInit(void); + extern void HTPreparsedFormatInit(void); + extern void HTFileInit(void); + extern int LYTestMailcapCommand(const char *testcommand, const char *params); + extern BOOL LYMailcapUsesPctS(const char *controlstring); + extern char *LYMakeMailcapCommand(const char *command, const char *params, const char *filename); + +#ifdef __cplusplus +} +#endif +#endif /* HTINIT_H */ diff --git a/WWW/Library/Implementation/HTLex.c b/WWW/Library/Implementation/HTLex.c new file mode 100644 index 00000000..5a0df917 --- /dev/null +++ b/WWW/Library/Implementation/HTLex.c @@ -0,0 +1,142 @@ + +/* MODULE HTLex.c + * LEXICAL ANALYSOR + * + * AUTHORS: + * AL Ari Luotonen luotonen@dxcern.cern.ch + * + * HISTORY: + * + * + * BUGS: + * + * + */ + +#include <HTUtils.h> + +#include <HTLex.h> /* Implemented here */ + +#include <LYLeaks.h> + +/* + * Global variables + */ +char HTlex_buffer[40]; /* Read lexical string */ +int HTlex_line = 1; /* Line number in source file */ + +/* + * Module-wide variables + */ +static int lex_cnt; +static BOOL lex_template; +static LexItem lex_pushed_back = LEX_NONE; +static FILE *cache = NULL; + +void unlex(LexItem lex_item) +{ + lex_pushed_back = lex_item; +} + +LexItem lex(FILE *fp) +{ + int ch = 0; + + if (fp != cache) { /* This cache doesn't work ok because the system */ + cache = fp; /* often assign same FILE structure the next open */ + HTlex_line = 1; /* file. So, if there are syntax errors in setup * + files it may confuse things later on. */ + } + if (lex_pushed_back != LEX_NONE) { + LexItem ret = lex_pushed_back; + + lex_pushed_back = LEX_NONE; + return ret; + } + + lex_cnt = 0; + lex_template = NO; + + for (;;) { + switch (ch = getc(fp)) { + case EOF: + case ' ': + case '\t': + case '\r': + case '\n': + case ':': + case ',': + case '(': + case ')': + case '@': + if (lex_cnt > 0) { + if (ch != EOF) + ungetc(ch, fp); + if (lex_template) + return LEX_TMPL_STR; + else + return LEX_ALPH_STR; + } else + switch (ch) { + case EOF: + return LEX_EOF; + case '\n': + HTlex_line++; + return LEX_REC_SEP; + case ':': + return LEX_FIELD_SEP; + case ',': + return LEX_ITEM_SEP; + case '(': + return LEX_OPEN_PAREN; + case ')': + return LEX_CLOSE_PAREN; + case '@': + return LEX_AT_SIGN; + default: /* Leading white space ignored (SP,TAB,CR) */ + break; + } + break; + default: + if (lex_cnt < (int) (sizeof(HTlex_buffer) - 1)) + HTlex_buffer[lex_cnt++] = (char) ch; + HTlex_buffer[lex_cnt] = '\0'; + if ('*' == ch) + lex_template = YES; + } /* switch ch */ + } /* forever */ +} + +const char *lex_verbose(LexItem lex_item) +{ + static char msg[sizeof(HTlex_buffer) + 30]; /* @@@@@@@@ */ + + switch (lex_item) { + case LEX_NONE: /* Internally used */ + return "NO-LEX-ITEM"; + case LEX_EOF: /* End of file */ + return "end-of-file"; + case LEX_REC_SEP: /* Record separator */ + return "record separator (newline)"; + case LEX_FIELD_SEP: /* Field separator */ + return "field separator ':'"; + case LEX_ITEM_SEP: /* List item separator */ + return "item separator ','"; + case LEX_OPEN_PAREN: /* Group start tag */ + return "'('"; + case LEX_CLOSE_PAREN: /* Group end tag */ + return "')'"; + case LEX_AT_SIGN: /* Address qualifier */ + return "address qualifier '@'"; + case LEX_ALPH_STR: /* Alphanumeric string */ + sprintf(msg, "alphanumeric string '%.*s'", + (int) sizeof(HTlex_buffer), HTlex_buffer); + return msg; + case LEX_TMPL_STR: /* Template string */ + sprintf(msg, "template string '%.*s'", + (int) sizeof(HTlex_buffer), HTlex_buffer); + return msg; + default: + return "UNKNOWN-LEX-ITEM"; + } +} diff --git a/WWW/Library/Implementation/HTLex.h b/WWW/Library/Implementation/HTLex.h new file mode 100644 index 00000000..fde90348 --- /dev/null +++ b/WWW/Library/Implementation/HTLex.h @@ -0,0 +1,64 @@ +/* LEXICAL ANALYSOR (MAINLY FOR CONFIG FILES) + + */ + +#ifndef HTLEX_H +#define HTLEX_H + +#ifndef HTUTILS_H +#include <HTUtils.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + typedef enum { + LEX_NONE, /* Internally used */ + LEX_EOF, /* End of file */ + LEX_REC_SEP, /* Record separator */ + LEX_FIELD_SEP, /* Field separator */ + LEX_ITEM_SEP, /* List item separator */ + LEX_OPEN_PAREN, /* Group start tag */ + LEX_CLOSE_PAREN, /* Group end tag */ + LEX_AT_SIGN, /* Address qualifier */ + LEX_ALPH_STR, /* Alphanumeric string */ + LEX_TMPL_STR /* Template string */ + } LexItem; + + extern char HTlex_buffer[]; /* Read lexical string */ + extern int HTlex_line; /* Line number in source file */ + +/* + +Get Next Lexical Item + + If returns LEX_ALPH_STR or LEX_TMPL_STR the string is in global buffer lex_buffer. + + */ + + extern LexItem lex(FILE *fp); + +/* + +Push Back Latest Item + + */ + + extern void unlex(LexItem lex_item); + +/* + +Get the Name for Lexical Item + + */ + + extern const char *lex_verbose(LexItem lex_item); + +/* + + */ + +#ifdef __cplusplus +} +#endif +#endif /* not HTLEX_H */ diff --git a/WWW/Library/Implementation/HTList.c b/WWW/Library/Implementation/HTList.c new file mode 100644 index 00000000..fee62404 --- /dev/null +++ b/WWW/Library/Implementation/HTList.c @@ -0,0 +1,408 @@ +/* + * $LynxId: HTList.c,v 1.18 2010/04/29 09:59:31 tom Exp $ + * + * A small List class HTList.c + * ================== + * + * A list is represented as a sequence of linked nodes of type HTList. + * The first node is a header which contains no object. + * New nodes are inserted between the header and the rest of the list. + */ + +#include <HTUtils.h> +#include <HTList.h> + +#include <LYLeaks.h> + +/* Create list. +*/ +HTList *HTList_new(void) +{ + HTList *newList; + + if ((newList = typeMalloc(HTList)) == NULL) + outofmem(__FILE__, "HTList_new"); + + assert(newList != NULL); + + newList->object = NULL; + newList->next = NULL; + + return newList; +} + +/* Delete list. +*/ +void HTList_delete(HTList *me) +{ + HTList *current; + + while ((current = me)) { + me = me->next; + FREE(current); + } + + return; +} + +/* Reverse order of elements in list. + */ +HTList *HTList_reverse(HTList *start) +{ + HTList *cur, *succ; + + if (!(start && start->next && (cur = start->next->next))) + return start; + start->next->next = NULL; + while (cur) { + succ = cur->next; + cur->next = start->next; + start->next = cur; + cur = succ; + } + return start; +} + +/* Append a list to another. + * + * If successful, the second list will become empty but not freed. + */ +HTList *HTList_appendList(HTList *start, + HTList *tail) +{ + HTList *temp = start; + + if (!start) { + CTRACE((tfp, + "HTList: Trying to append list %p to a nonexisting list\n", + (void *) tail)); + return NULL; + } + if (!(tail && tail->next)) + return start; + + while (temp->next) + temp = temp->next; + + temp->next = tail->next; + tail->next = NULL; /* tail is now an empty list */ + return start; +} + +/* Link object to START of list (so it is pointed to by the head). + * + * Unlike HTList_addObject(), it does not malloc memory for HTList entry, + * it use already allocated memory which should not be free'd by any + * list operations (optimization). + */ +void HTList_linkObject(HTList *me, void *newObject, + HTList *newNode) +{ + if (me) { + if (newNode->object == NULL && newNode->next == NULL) { + /* It is safe: */ + newNode->object = newObject; + newNode->next = me->next; + me->next = newNode; + + } else { + /* + * This node is already linked to some list (probably this one), so + * refuse changing node pointers to keep the list valid!!! + */ + CTRACE((tfp, "*** HTList: Refuse linking already linked obj ")); + CTRACE((tfp, "%p, node %p, list %p\n", + (void *) newObject, (void *) newNode, (void *) me)); + } + + } else { + CTRACE((tfp, + "HTList: Trying to link object %p to a nonexisting list\n", + newObject)); + } + + return; +} + +/* Add object to START of list (so it is pointed to by the head). +*/ +void HTList_addObject(HTList *me, void *newObject) +{ + HTList *newNode; + + if (me) { + if ((newNode = typeMalloc(HTList)) == NULL) + outofmem(__FILE__, "HTList_addObject"); + + assert(newNode != NULL); + + newNode->object = newObject; + newNode->next = me->next; + me->next = newNode; + + } else { + CTRACE((tfp, "HTList: Trying to add object %p to a nonexisting list\n", + newObject)); + } + + return; +} + +/* Append object to END of list (furthest from the head). +*/ +void HTList_appendObject(HTList *me, void *newObject) +{ + HTList *temp = me; + + if (temp && newObject) { + while (temp->next) + temp = temp->next; + HTList_addObject(temp, newObject); + } + + return; +} + +/* Insert an object into the list at a specified position. + * If position is 0, this places the object at the head of the list + * and is equivalent to HTList_addObject(). + */ +void HTList_insertObjectAt(HTList *me, void *newObject, + int pos) +{ + HTList *newNode; + HTList *temp = me; + HTList *prevNode; + int Pos = pos; + + if (!temp) { + CTRACE((tfp, "HTList: Trying to add object %p to a nonexisting list\n", + newObject)); + return; + } + if (Pos < 0) { + Pos = 0; + CTRACE((tfp, "HTList: Treating negative object position %d as %d.\n", + pos, Pos)); + } + + prevNode = temp; + while ((temp = temp->next)) { + if (Pos == 0) { + if ((newNode = typeMalloc(HTList)) == NULL) + outofmem(__FILE__, "HTList_addObjectAt"); + + assert(newNode != NULL); + + newNode->object = newObject; + newNode->next = temp; + if (prevNode) + prevNode->next = newNode; + return; + } + prevNode = temp; + Pos--; + } + if (Pos >= 0) + HTList_addObject(prevNode, newObject); + + return; +} + +/* Unlink specified object from list. + * It does not free memory. + */ +BOOL HTList_unlinkObject(HTList *me, void *oldObject) +{ + HTList *temp = me; + HTList *prevNode; + + if (temp && oldObject) { + while (temp->next) { + prevNode = temp; + temp = temp->next; + if (temp->object == oldObject) { + prevNode->next = temp->next; + temp->next = NULL; + temp->object = NULL; + return YES; /* Success */ + } + } + } + return NO; /* object not found or NULL list */ +} + +/* Remove specified object from list. +*/ +BOOL HTList_removeObject(HTList *me, void *oldObject) +{ + HTList *temp = me; + HTList *prevNode; + + if (temp && oldObject) { + while (temp->next) { + prevNode = temp; + temp = temp->next; + if (temp->object == oldObject) { + prevNode->next = temp->next; + FREE(temp); + return YES; /* Success */ + } + } + } + return NO; /* object not found or NULL list */ +} + +/* Remove object at a given position in the list, where 0 is the + * object pointed to by the head (returns a pointer to the element + * (->object) for the object, and NULL if the list is empty, or + * if it doesn't exist - Yuk!). + */ +void *HTList_removeObjectAt(HTList *me, int position) +{ + HTList *temp = me; + HTList *prevNode; + int pos = position; + + if (!temp || pos < 0) + return NULL; + + prevNode = temp; + while ((temp = temp->next)) { + if (pos == 0) { + prevNode->next = temp->next; + prevNode = temp; + FREE(temp); + return prevNode->object; + } + prevNode = temp; + pos--; + } + + return NULL; /* Reached the end of the list */ +} + +/* Unlink object from START of list (the Last one inserted + * via HTList_linkObject(), and pointed to by the head). + * It does not free memory. + */ +void *HTList_unlinkLastObject(HTList *me) +{ + HTList *lastNode; + void *lastObject; + + if (me && me->next) { + lastNode = me->next; + lastObject = lastNode->object; + me->next = lastNode->next; + lastNode->next = NULL; + lastNode->object = NULL; + return lastObject; + + } else { /* Empty list */ + return NULL; + } +} + +/* Remove object from START of list (the Last one inserted + * via HTList_addObject(), and pointed to by the head). + */ +void *HTList_removeLastObject(HTList *me) +{ + HTList *lastNode; + void *lastObject; + + if (me && me->next) { + lastNode = me->next; + lastObject = lastNode->object; + me->next = lastNode->next; + FREE(lastNode); + return lastObject; + + } else { /* Empty list */ + return NULL; + } +} + +/* Remove object from END of list (the First one inserted + * via HTList_addObject(), and furthest from the head). + */ +void *HTList_removeFirstObject(HTList *me) +{ + HTList *temp = me; + HTList *prevNode; + void *firstObject; + + if (!temp) + return NULL; + + prevNode = temp; + if (temp->next) { + while (temp->next) { + prevNode = temp; + temp = temp->next; + } + firstObject = temp->object; + prevNode->next = NULL; + FREE(temp); + return firstObject; + + } else { /* Empty list */ + return NULL; + } +} + +/* Determine total number of objects in the list, + * not counting the head. + */ +int HTList_count(HTList *me) +{ + HTList *temp = me; + int count = 0; + + if (temp) + while ((temp = temp->next)) + count++; + + return count; +} + +/* Determine position of an object in the list (a value of 0 + * means it is pointed to by the head; returns -1 if not found). + */ +int HTList_indexOf(HTList *me, void *object) +{ + HTList *temp = me; + int position = 0; + + if (temp) { + while ((temp = temp->next)) { + if (temp->object == object) + return position; + position++; + } + } + + return -1; /* Object not in the list */ +} + +/* Return pointer to the object at a specified position in the list, + * where 0 is the object pointed to by the head (returns NULL if + * the list is empty, or if it doesn't exist - Yuk!). + */ +void *HTList_objectAt(HTList *me, int position) +{ + HTList *temp = me; + int pos = position; + + if (!temp || pos < 0) + return NULL; + + while ((temp = temp->next)) { + if (pos == 0) + return temp->object; + pos--; + } + + return NULL; /* Reached the end of the list */ +} diff --git a/WWW/Library/Implementation/HTList.h b/WWW/Library/Implementation/HTList.h new file mode 100644 index 00000000..93f91473 --- /dev/null +++ b/WWW/Library/Implementation/HTList.h @@ -0,0 +1,142 @@ + +/* List object + * + * The list object is a generic container for storing collections + * of things in order. + */ +#ifndef HTLIST_H +#define HTLIST_H + +#ifndef HTUTILS_H +#include <HTUtils.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + typedef struct _HTList HTList; + + struct _HTList { + void *object; + HTList *next; + }; + +/* Fast macro to traverse a list. Call it first with copy of the list + * header. It returns the first object and increments the passed list + * pointer. Call it with the same variable until it returns NULL. + */ +#define HTList_nextObject(me) \ + ((me) && ((me) = (me)->next) ? (me)->object : NULL) + +/* Macro to find object pointed to by the head (returns NULL + * if list is empty, OR if it doesn't exist - Yuk!) + */ +#define HTList_lastObject(me) \ + ((me) && (me)->next ? (me)->next->object : NULL) + +/* Macro to check if a list is empty (or doesn't exist - Yuk!) +*/ +#define HTList_isEmpty(me) ((me) ? ((me)->next == NULL) : YES) + +/* Create list. +*/ + extern HTList *HTList_new(void); + +/* Delete list. +*/ + extern void HTList_delete(HTList *me); + +/* Reverse a list. +*/ + extern HTList *HTList_reverse(HTList *start); + +/* Append two lists, making second list empty. +*/ + extern HTList *HTList_appendList(HTList *start, + HTList *tail); + +/* Add object to START of list (so it is pointed to by the head). +*/ + extern void HTList_addObject(HTList *me, + void *newObject); + +/* Append object to END of list (furthest from the head). +*/ + extern void HTList_appendObject(HTList *me, + void *newObject); + +/* Insert an object into the list at a specified position. + * If position is 0, this places the object at the head of the list + * and is equivalent to HTList_addObject(). + */ + extern void HTList_insertObjectAt(HTList *me, + void *newObject, + int pos); + +/* Remove specified object from list. +*/ + extern BOOL HTList_removeObject(HTList *me, + void *oldObject); + +/* Remove object at a given position in the list, where 0 is the + * object pointed to by the head (returns a pointer to the element + * (->object) for the object, and NULL if the list is empty, or + * if it doesn't exist - Yuk!). + */ + extern void *HTList_removeObjectAt(HTList *me, + int position); + +/* Remove object from START of list (the Last one inserted + * via HTList_addObject(), and pointed to by the head). + */ + extern void *HTList_removeLastObject(HTList *me); + +/* Remove object from END of list (the First one inserted + * via HTList_addObject(), and furthest from the head). + */ + extern void *HTList_removeFirstObject(HTList *me); + +/* Determine total number of objects in the list, + * not counting the head. + */ + extern int HTList_count(HTList *me); + +/* Determine position of an object in the list (a value of 0 + * means it is pointed to by the head; returns -1 if not found). + */ + extern int HTList_indexOf(HTList *me, + void *object); + +/* Return pointer to the object at a specified position in the list, + * where 0 is the object pointed to by the head (returns NULL if + * the list is empty, or if it doesn't exist - Yuk!). + */ + extern void *HTList_objectAt(HTList *me, + int position); + +/* Link object to START of list (so it is pointed to by the head). + * + * Unlike HTList_addObject(), it does not malloc memory for HTList entry, + * it use already allocated memory which should not be free'd by any + * list operations (optimization). + */ + extern void HTList_linkObject(HTList *me, + void *newObject, + HTList *newNode); + +/* Unlink object from START of list (the Last one inserted + * via HTList_linkObject(), and pointed to by the head). + * It does not free memory. + */ + extern void *HTList_unlinkLastObject(HTList *me); + +/* Unlink specified object from list. + * It does not free memory. + */ + extern BOOL HTList_unlinkObject(HTList *me, + void *oldObject); + +#ifdef __cplusplus +} +#endif +#endif /* HTLIST_H */ diff --git a/WWW/Library/Implementation/HTMIME.c b/WWW/Library/Implementation/HTMIME.c new file mode 100644 index 00000000..bdb86ec9 --- /dev/null +++ b/WWW/Library/Implementation/HTMIME.c @@ -0,0 +1,2601 @@ +/* + * $LynxId: HTMIME.c,v 1.76 2011/06/11 12:10:40 tom Exp $ + * + * MIME Message Parse HTMIME.c + * ================== + * + * This is RFC 1341-specific code. + * The input stream pushed into this parser is assumed to be + * stripped on CRs, ie lines end with LF, not CR LF. + * (It is easy to change this except for the body part where + * conversion can be slow.) + * + * History: + * Feb 92 Written Tim Berners-Lee, CERN + * + */ + +#define HTSTREAM_INTERNAL 1 + +#include <HTUtils.h> +#include <HTMIME.h> /* Implemented here */ +#include <HTTP.h> /* for redirecting_url */ +#include <HTAlert.h> +#include <HTFile.h> +#include <HTCJK.h> +#include <UCMap.h> +#include <UCDefs.h> +#include <UCAux.h> + +#include <LYCookie.h> +#include <LYCharSets.h> +#include <LYCharUtils.h> +#include <LYStrings.h> +#include <LYUtils.h> +#include <LYLeaks.h> + +/* MIME Object + * ----------- + */ + +typedef enum { + MIME_TRANSPARENT, /* put straight through to target ASAP! */ + /* states for "Transfer-Encoding: chunked" */ + MIME_CHUNKED, + mcCHUNKED_COUNT_DIGIT, + mcCHUNKED_COUNT_CR, + mcCHUNKED_COUNT_LF, + mcCHUNKED_EXTENSION, + mcCHUNKED_DATA, + mcCHUNKED_DATA_CR, + mcCHUNKED_DATA_LF, + /* character state-machine */ + miBEGINNING_OF_LINE, /* first character and not a continuation */ + miA, + miACCEPT_RANGES, + miAGE, + miAL, + miALLOW, + miALTERNATES, + miC, + miCACHE_CONTROL, + miCO, + miCOOKIE, + miCON, + miCONNECTION, + miCONTENT_, + miCONTENT_BASE, + miCONTENT_DISPOSITION, + miCONTENT_ENCODING, + miCONTENT_FEATURES, + miCONTENT_L, + miCONTENT_LANGUAGE, + miCONTENT_LENGTH, + miCONTENT_LOCATION, + miCONTENT_MD5, + miCONTENT_RANGE, + miCONTENT_T, + miCONTENT_TRANSFER_ENCODING, + miCONTENT_TYPE, + miDATE, + miE, + miETAG, + miEXPIRES, + miKEEP_ALIVE, + miL, + miLAST_MODIFIED, + miLINK, + miLOCATION, + miP, + miPR, + miPRAGMA, + miPROXY_AUTHENTICATE, + miPUBLIC, + miR, + miRE, + miREFRESH, + miRETRY_AFTER, + miS, + miSAFE, + miSE, + miSERVER, + miSET_COOKIE, + miSET_COOKIE1, + miSET_COOKIE2, + miT, + miTITLE, + miTRANSFER_ENCODING, + miU, + miUPGRADE, + miURI, + miV, + miVARY, + miVIA, + miW, + miWARNING, + miWWW_AUTHENTICATE, + miSKIP_GET_VALUE, /* Skip space then get value */ + miGET_VALUE, /* Get value till white space */ + miJUNK_LINE, /* Ignore the rest of this folded line */ + miNEWLINE, /* Just found a LF .. maybe continuation */ + miCHECK, /* check against check_pointer */ + MIME_NET_ASCII, /* Translate from net ascii */ + MIME_IGNORE /* Ignore entire file */ + /* TRANSPARENT and IGNORE are defined as stg else in _WINDOWS */ +} MIME_state; + +#define VALUE_SIZE 5120 /* @@@@@@@ Arbitrary? */ +struct _HTStream { + const HTStreamClass *isa; + + BOOL net_ascii; /* Is input net ascii? */ + MIME_state state; /* current state */ + MIME_state if_ok; /* got this state if match */ + MIME_state field; /* remember which field */ + MIME_state fold_state; /* state on a fold */ + BOOL head_only; /* only parsing header */ + BOOL pickup_redirection; /* parsing for location */ + BOOL no_streamstack; /* use sink directly */ + const char *check_pointer; /* checking input */ + + char *value_pointer; /* storing values */ + char value[VALUE_SIZE]; + + HTParentAnchor *anchor; /* Given on creation */ + HTStream *sink; /* Given on creation */ + + char *boundary; /* For multipart */ + char *set_cookie; /* Set-Cookie */ + char *set_cookie2; /* Set-Cookie2 */ + char *location; /* Location */ + + char *refresh_url; /* "Refresh:" URL */ + + HTFormat c_t_encoding; /* Content-Transfer-Encoding */ + char *compression_encoding; + + BOOL chunked_encoding; /* Transfer-Encoding: chunked */ + long chunked_size; /* ...counter for "chunked" */ + + HTFormat format; /* Content-Type */ + HTStream *target; /* While writing out */ + HTStreamClass targetClass; + + HTAtom *targetRep; /* Converting into? */ +}; + +/* + * This function is for trimming off any paired + * open- and close-double quotes from header values. + * It does not parse the string for embedded quotes, + * and will not modify the string unless both the + * first and last characters are double-quotes. - FM + */ +void HTMIME_TrimDoubleQuotes(char *value) +{ + size_t i; + char *cp = value; + + if (isEmpty(cp) || *cp != '"') + return; + + i = strlen(cp); + if (cp[(i - 1)] != '"') + return; + else + cp[(i - 1)] = '\0'; + + for (i = 0; value[i]; i++) + value[i] = cp[(i + 1)]; +} + +/* + * Check if the token from "Content-Encoding" corresponds to a compression + * type. + */ +static BOOL content_is_compressed(HTStream *me) +{ + char *encoding = me->anchor->content_encoding; + BOOL result = (BOOL) (HTEncodingToCompressType(encoding) != cftNone); + + CTRACE((tfp, "content is%s compressed\n", result ? "" : " NOT")); + return result; +} + +/* + * Strip quotes from a refresh-URL. + */ +static void dequote(char *url) +{ + size_t len; + + len = strlen(url); + if (*url == '\'' && len > 1 && url[len - 1] == url[0]) { + url[len - 1] = '\0'; + while ((url[0] = url[1]) != '\0') { + ++url; + } + } +} + +/* + * Strip off any compression-suffix from the address and check if the result + * looks like one of the presentable suffixes. If so, return the corresponding + * MIME type. + */ +static const char *UncompressedContentType(HTStream *me, CompressFileType method) +{ + const char *result = 0; + char *address = me->anchor->address; + const char *expected = HTCompressTypeToSuffix(method); + const char *actual = strrchr(address, '.'); + + /* + * We have to ensure the suffix is consistent, to use HTFileFormat(). + */ + if (actual != 0 && !strcasecomp(actual, expected)) { + HTFormat format; + HTAtom *pencoding = 0; + const char *description = 0; + + format = HTFileFormat(address, &pencoding, &description); + result = HTAtom_name(format); + } + + return result; +} + +static int pumpData(HTStream *me) +{ + CompressFileType method; + const char *new_encoding; + const char *new_content; + + CTRACE((tfp, "Begin pumpData\n")); + /* + * If the content-type says it is compressed, and there is no + * content-encoding, check further and see if the address (omitting the + * suffix for a compressed type) looks like a type we can present. If so, + * rearrange things so we'll present the StreamStack code with the + * presentable type, already marked as compressed. + */ + CTRACE((tfp, "...address{%s}\n", me->anchor->address)); + method = HTContentTypeToCompressType(me->anchor->content_type_params); + if ((method != cftNone) + && isEmpty(me->anchor->content_encoding) + && (new_content = UncompressedContentType(me, method)) != 0) { + + new_encoding = HTCompressTypeToEncoding(method); + CTRACE((tfp, "reinterpreting as content-type:%s, encoding:%s\n", + new_content, new_encoding)); + + StrAllocCopy(me->anchor->content_encoding, new_encoding); + FREE(me->compression_encoding); + StrAllocCopy(me->compression_encoding, new_encoding); + + strcpy(me->value, new_content); + StrAllocCopy(me->anchor->content_type_params, me->value); + me->format = HTAtom_for(me->value); + } + + if (strchr(HTAtom_name(me->format), ';') != NULL) { + char *cp = NULL, *cp1, *cp2, *cp3 = NULL, *cp4; + + CTRACE((tfp, "HTMIME: Extended MIME Content-Type is %s\n", + HTAtom_name(me->format))); + StrAllocCopy(cp, HTAtom_name(me->format)); + /* + * Note that the Content-Type value was converted + * to lower case when we loaded into me->format, + * but there may have been a mixed or upper-case + * atom, so we'll force lower-casing again. We + * also stripped spaces and double-quotes, but + * we'll make sure they're still gone from any + * charset parameter we check. - FM + */ + LYLowerCase(cp); + if ((cp1 = strchr(cp, ';')) != NULL) { + BOOL chartrans_ok = NO; + + if ((cp2 = strstr(cp1, "charset")) != NULL) { + int chndl; + + cp2 += 7; + while (*cp2 == ' ' || *cp2 == '=' || *cp2 == '"') + cp2++; + StrAllocCopy(cp3, cp2); /* copy to mutilate more */ + for (cp4 = cp3; (*cp4 != '\0' && *cp4 != '"' && + *cp4 != ';' && *cp4 != ':' && + !WHITE(*cp4)); cp4++) ; /* do nothing */ + *cp4 = '\0'; + cp4 = cp3; + chndl = UCGetLYhndl_byMIME(cp3); + if (UCCanTranslateFromTo(chndl, + current_char_set)) { + chartrans_ok = YES; + *cp1 = '\0'; + me->format = HTAtom_for(cp); + StrAllocCopy(me->anchor->charset, cp4); + HTAnchor_setUCInfoStage(me->anchor, chndl, + UCT_STAGE_MIME, + UCT_SETBY_MIME); + } else if (chndl < 0) { /* got something but we don't + recognize it */ + chndl = UCLYhndl_for_unrec; + if (chndl < 0) + /* + * UCLYhndl_for_unrec not defined :-( fallback to + * UCLYhndl_for_unspec which always valid. + */ + chndl = UCLYhndl_for_unspec; /* always >= 0 */ + if (UCCanTranslateFromTo(chndl, + current_char_set)) { + chartrans_ok = YES; + *cp1 = '\0'; + me->format = HTAtom_for(cp); + HTAnchor_setUCInfoStage(me->anchor, chndl, + UCT_STAGE_MIME, + UCT_SETBY_DEFAULT); + } + } else { + /* + * Something like 'big5' - we cannot translate it, but + * the user may still be able to navigate the links. + */ + *cp1 = '\0'; + me->format = HTAtom_for(cp); + StrAllocCopy(me->anchor->charset, cp4); + HTAnchor_setUCInfoStage(me->anchor, chndl, + UCT_STAGE_MIME, + UCT_SETBY_MIME); + } + if (chartrans_ok) { + LYUCcharset *p_in = + HTAnchor_getUCInfoStage(me->anchor, + UCT_STAGE_MIME); + LYUCcharset *p_out = + HTAnchor_setUCInfoStage(me->anchor, + current_char_set, + UCT_STAGE_HTEXT, + UCT_SETBY_DEFAULT); + + if (!p_out) + /* + * Try again. + */ + p_out = + HTAnchor_getUCInfoStage(me->anchor, + UCT_STAGE_HTEXT); + + if (!strcmp(p_in->MIMEname, + "x-transparent")) { + HTPassEightBitRaw = TRUE; + HTAnchor_setUCInfoStage(me->anchor, + HTAnchor_getUCLYhndl(me->anchor, + UCT_STAGE_HTEXT), + UCT_STAGE_MIME, + UCT_SETBY_DEFAULT); + } + if (!strcmp(p_out->MIMEname, + "x-transparent")) { + HTPassEightBitRaw = TRUE; + HTAnchor_setUCInfoStage(me->anchor, + HTAnchor_getUCLYhndl(me->anchor, + UCT_STAGE_MIME), + UCT_STAGE_HTEXT, + UCT_SETBY_DEFAULT); + } + if ((p_in->enc != UCT_ENC_CJK) +#ifdef EXP_JAPANESEUTF8_SUPPORT + && ((p_in->enc != UCT_ENC_UTF8) + || (p_out->enc != UCT_ENC_CJK)) +#endif + ) { + HTCJK = NOCJK; + if (!(p_in->codepoints & + UCT_CP_SUBSETOF_LAT1) && + chndl == current_char_set) { + HTPassEightBitRaw = TRUE; + } + } else if (p_out->enc == UCT_ENC_CJK) { + Set_HTCJK(p_in->MIMEname, p_out->MIMEname); + } + } else { + /* + * Cannot translate. If according to some heuristic the + * given charset and the current display character both are + * likely to be like ISO-8859 in structure, pretend we have + * some kind of match. + */ + BOOL given_is_8859 = + (BOOL) (!StrNCmp(cp4, "iso-8859-", 9) && + isdigit(UCH(cp4[9]))); + BOOL given_is_8859like = + (BOOL) (given_is_8859 || + !StrNCmp(cp4, "windows-", 8) || + !StrNCmp(cp4, "cp12", 4) || + !StrNCmp(cp4, "cp-12", 5)); + BOOL given_and_display_8859like = + (BOOL) (given_is_8859like && + (strstr(LYchar_set_names[current_char_set], + "ISO-8859") || + strstr(LYchar_set_names[current_char_set], + "windows-"))); + + if (given_and_display_8859like) { + *cp1 = '\0'; + me->format = HTAtom_for(cp); + } + if (given_is_8859) { + cp1 = &cp4[10]; + while (*cp1 && + isdigit(UCH(*cp1))) + cp1++; + *cp1 = '\0'; + } + if (given_and_display_8859like) { + StrAllocCopy(me->anchor->charset, cp4); + HTPassEightBitRaw = TRUE; + } + HTAlert(*cp4 ? cp4 : me->anchor->charset); + } + FREE(cp3); + } else { + /* + * No charset parameter is present. Ignore all other + * parameters, as we do when charset is present. - FM + */ + *cp1 = '\0'; + me->format = HTAtom_for(cp); + } + } + FREE(cp); + } + /* + * If we have an Expires header and haven't already set the no_cache + * element for the anchor, check if we should set it based on that header. + * - FM + */ + if (me->anchor->no_cache == FALSE && + me->anchor->expires != NULL) { + if (!strcmp(me->anchor->expires, "0")) { + /* + * The value is zero, which we treat as an absolute no-cache + * directive. - FM + */ + me->anchor->no_cache = TRUE; + } else if (me->anchor->date != NULL) { + /* + * We have a Date header, so check if the value is less than or + * equal to that. - FM + */ + if (LYmktime(me->anchor->expires, TRUE) <= + LYmktime(me->anchor->date, TRUE)) { + me->anchor->no_cache = TRUE; + } + } else if (LYmktime(me->anchor->expires, FALSE) == 0) { + /* + * We don't have a Date header, and the value is in past for us. - + * FM + */ + me->anchor->no_cache = TRUE; + } + } + StrAllocCopy(me->anchor->content_type, + HTAtom_name(me->format)); + + if (me->set_cookie != NULL || me->set_cookie2 != NULL) { + LYSetCookie(me->set_cookie, + me->set_cookie2, + me->anchor->address); + FREE(me->set_cookie); + FREE(me->set_cookie2); + } + if (me->pickup_redirection) { + if (me->location && *me->location) { + redirecting_url = me->location; + me->location = NULL; + if (me->targetRep != WWW_DEBUG || me->sink) + me->head_only = YES; + + } else { + permanent_redirection = FALSE; + if (me->location) { + CTRACE((tfp, "HTTP: 'Location:' is zero-length!\n")); + HTAlert(REDIRECTION_WITH_BAD_LOCATION); + } + CTRACE((tfp, "HTTP: Failed to pick up location.\n")); + if (me->location) { + FREE(me->location); + } else { + HTAlert(REDIRECTION_WITH_NO_LOCATION); + } + } + } + CTRACE((tfp, "...pumpData finished reading header\n")); + if (me->head_only) { + /* We are done! - kw */ + me->state = MIME_IGNORE; + } else { + + if (me->no_streamstack) { + me->target = me->sink; + } else { + if (!me->compression_encoding) { + CTRACE((tfp, + "HTMIME: MIME Content-Type is '%s', converting to '%s'\n", + HTAtom_name(me->format), HTAtom_name(me->targetRep))); + } else { + /* + * Change the format to that for "www/compressed" and set up a + * stream to deal with it. - FM + */ + CTRACE((tfp, "HTMIME: MIME Content-Type is '%s',\n", + HTAtom_name(me->format))); + me->format = HTAtom_for("www/compressed"); + CTRACE((tfp, " Treating as '%s'. Converting to '%s'\n", + HTAtom_name(me->format), HTAtom_name(me->targetRep))); + FREE(me->compression_encoding); + } + me->target = HTStreamStack(me->format, me->targetRep, + me->sink, me->anchor); + if (!me->target) { + CTRACE((tfp, "HTMIME: Can't translate! ** \n")); + me->target = me->sink; /* Cheat */ + } + } + if (me->target) { + me->targetClass = *me->target->isa; + /* + * Pump rest of data right through, according to the transfer encoding. + */ + me->state = (me->chunked_encoding + ? MIME_CHUNKED + : MIME_TRANSPARENT); + } else { + me->state = MIME_IGNORE; /* What else to do? */ + } + if (me->refresh_url != NULL && !content_is_compressed(me)) { + char *url = NULL; + char *num = NULL; + char *txt = NULL; + const char *base = ""; /* FIXME: refresh_url may be relative to doc */ + + LYParseRefreshURL(me->refresh_url, &num, &url); + if (url != NULL && me->format == WWW_HTML) { + CTRACE((tfp, + "Formatting refresh-url as first line of result\n")); + HTSprintf0(&txt, gettext("Refresh: ")); + HTSprintf(&txt, gettext("%s seconds "), num); + dequote(url); + HTSprintf(&txt, "<a href=\"%s%s\">%s</a><br>", base, url, url); + CTRACE((tfp, "URL %s%s\n", base, url)); + (me->isa->put_string) (me, txt); + free(txt); + } + FREE(num); + FREE(url); + } + } + CTRACE((tfp, "...end of pumpData\n")); + return HT_OK; +} + +static int dispatchField(HTStream *me) +{ + int i, j; + char *cp; + + *me->value_pointer = '\0'; + + cp = me->value_pointer; + while ((cp > me->value) && *(--cp) == ' ') /* S/390 -- gil -- 0146 */ + /* + * Trim trailing spaces. + */ + *cp = '\0'; + + switch (me->field) { + case miACCEPT_RANGES: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Accept-Ranges: '%s'\n", + me->value)); + break; + case miAGE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Age: '%s'\n", + me->value)); + break; + case miALLOW: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Allow: '%s'\n", + me->value)); + break; + case miALTERNATES: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Alternates: '%s'\n", + me->value)); + break; + case miCACHE_CONTROL: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Cache-Control: '%s'\n", + me->value)); + if (!(me->value && *me->value)) + break; + /* + * Convert to lowercase and indicate in anchor. - FM + */ + LYLowerCase(me->value); + StrAllocCopy(me->anchor->cache_control, me->value); + /* + * Check whether to set no_cache for the anchor. - FM + */ + { + char *cp1, *cp0 = me->value; + + while ((cp1 = strstr(cp0, "no-cache")) != NULL) { + cp1 += 8; + while (*cp1 != '\0' && WHITE(*cp1)) + cp1++; + if (*cp1 == '\0' || *cp1 == ';') { + me->anchor->no_cache = TRUE; + break; + } + cp0 = cp1; + } + if (me->anchor->no_cache == TRUE) + break; + cp0 = me->value; + while ((cp1 = strstr(cp0, "max-age")) != NULL) { + cp1 += 7; + while (*cp1 != '\0' && WHITE(*cp1)) + cp1++; + if (*cp1 == '=') { + cp1++; + while (*cp1 != '\0' && WHITE(*cp1)) + cp1++; + if (isdigit(UCH(*cp1))) { + cp0 = cp1; + while (isdigit(UCH(*cp1))) + cp1++; + if (*cp0 == '0' && cp1 == (cp0 + 1)) { + me->anchor->no_cache = TRUE; + break; + } + } + } + cp0 = cp1; + } + } + break; + case miCOOKIE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Cookie: '%s'\n", + me->value)); + break; + case miCONNECTION: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Connection: '%s'\n", + me->value)); + break; + case miCONTENT_BASE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Content-Base: '%s'\n", + me->value)); + if (!(me->value && *me->value)) + break; + /* + * Indicate in anchor. - FM + */ + StrAllocCopy(me->anchor->content_base, me->value); + break; + case miCONTENT_DISPOSITION: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Content-Disposition: '%s'\n", + me->value)); + if (!(me->value && *me->value)) + break; + /* + * Indicate in anchor. - FM + */ + StrAllocCopy(me->anchor->content_disposition, me->value); + /* + * It's not clear yet from existing RFCs and IDs whether we should be + * looking for file;, attachment;, and/or inline; before the + * filename=value, so we'll just search for "filename" followed by '=' + * and just hope we get the intended value. It is purely a suggested + * name, anyway. - FM + */ + cp = me->anchor->content_disposition; + while (*cp != '\0' && strncasecomp(cp, "filename", 8)) + cp++; + if (*cp == '\0') + break; + cp += 8; + while ((*cp != '\0') && (WHITE(*cp) || *cp == '=')) + cp++; + if (*cp == '\0') + break; + while (*cp != '\0' && WHITE(*cp)) + cp++; + if (*cp == '\0') + break; + StrAllocCopy(me->anchor->SugFname, cp); + if (*me->anchor->SugFname == '"') { + if ((cp = strchr((me->anchor->SugFname + 1), + '"')) != NULL) { + *(cp + 1) = '\0'; + HTMIME_TrimDoubleQuotes(me->anchor->SugFname); + } else { + FREE(me->anchor->SugFname); + break; + } + } + cp = me->anchor->SugFname; + while (*cp != '\0' && !WHITE(*cp)) + cp++; + *cp = '\0'; + if (*me->anchor->SugFname == '\0') + FREE(me->anchor->SugFname); + break; + case miCONTENT_ENCODING: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Content-Encoding: '%s'\n", + me->value)); + if (!(me->value && *me->value) || + !strcasecomp(me->value, "identity")) + break; + /* + * Convert to lowercase and indicate in anchor. - FM + */ + LYLowerCase(me->value); + StrAllocCopy(me->anchor->content_encoding, me->value); + FREE(me->compression_encoding); + if (content_is_compressed(me)) { + /* + * Save it to use as a flag for setting up a "www/compressed" + * target. - FM + */ + StrAllocCopy(me->compression_encoding, me->value); + } else { + /* + * Some server indicated "8bit", "7bit" or "binary" + * inappropriately. We'll ignore it. - FM + */ + CTRACE((tfp, " Ignoring it!\n")); + } + break; + case miCONTENT_FEATURES: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Content-Features: '%s'\n", + me->value)); + break; + case miCONTENT_LANGUAGE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Content-Language: '%s'\n", + me->value)); + if (!(me->value && *me->value)) + break; + /* + * Convert to lowercase and indicate in anchor. - FM + */ + LYLowerCase(me->value); + StrAllocCopy(me->anchor->content_language, me->value); + break; + case miCONTENT_LENGTH: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Content-Length: '%s'\n", + me->value)); + if (!(me->value && *me->value)) + break; + /* + * Convert to integer and indicate in anchor. - FM + */ + me->anchor->content_length = atoi(me->value); + if (me->anchor->content_length < 0) + me->anchor->content_length = 0; + CTRACE((tfp, " Converted to integer: '%ld'\n", + me->anchor->content_length)); + break; + case miCONTENT_LOCATION: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Content-Location: '%s'\n", + me->value)); + if (!(me->value && *me->value)) + break; + /* + * Indicate in anchor. - FM + */ + StrAllocCopy(me->anchor->content_location, me->value); + break; + case miCONTENT_MD5: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Content-MD5: '%s'\n", + me->value)); + if (!(me->value && *me->value)) + break; + /* + * Indicate in anchor. - FM + */ + StrAllocCopy(me->anchor->content_md5, me->value); + break; + case miCONTENT_RANGE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Content-Range: '%s'\n", + me->value)); + break; + case miCONTENT_TRANSFER_ENCODING: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Content-Transfer-Encoding: '%s'\n", + me->value)); + if (!(me->value && *me->value)) + break; + /* + * Force the Content-Transfer-Encoding value to all lower case. - FM + */ + LYLowerCase(me->value); + me->c_t_encoding = HTAtom_for(me->value); + break; + case miCONTENT_TYPE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Content-Type: '%s'\n", + me->value)); + if (!(me->value && *me->value)) + break; + /* + * Force the Content-Type value to all lower case and strip spaces and + * double-quotes. - FM + */ + for (i = 0, j = 0; me->value[i]; i++) { + if (me->value[i] != ' ' && me->value[i] != '"') { + me->value[j++] = (char) TOLOWER(me->value[i]); + } + } + me->value[j] = '\0'; + me->format = HTAtom_for(me->value); + StrAllocCopy(me->anchor->content_type_params, me->value); + break; + case miDATE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Date: '%s'\n", + me->value)); + if (!(me->value && *me->value)) + break; + /* + * Indicate in anchor. - FM + */ + StrAllocCopy(me->anchor->date, me->value); + break; + case miETAG: + /* Do not trim double quotes: an entity tag consists of an opaque + * quoted string, possibly prefixed by a weakness indicator. + */ + CTRACE((tfp, "HTMIME: PICKED UP ETag: %s\n", + me->value)); + if (!(me->value && *me->value)) + break; + /* + * Indicate in anchor. - FM + */ + StrAllocCopy(me->anchor->ETag, me->value); + break; + case miEXPIRES: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Expires: '%s'\n", + me->value)); + if (!(me->value && *me->value)) + break; + /* + * Indicate in anchor. - FM + */ + StrAllocCopy(me->anchor->expires, me->value); + break; + case miKEEP_ALIVE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Keep-Alive: '%s'\n", + me->value)); + break; + case miLAST_MODIFIED: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Last-Modified: '%s'\n", + me->value)); + if (!(me->value && *me->value)) + break; + /* + * Indicate in anchor. - FM + */ + StrAllocCopy(me->anchor->last_modified, me->value); + break; + case miLINK: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Link: '%s'\n", + me->value)); + break; + case miLOCATION: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Location: '%s'\n", + me->value)); + if (me->pickup_redirection && !me->location) { + StrAllocCopy(me->location, me->value); + } else { + CTRACE((tfp, "HTMIME: *** Ignoring Location!\n")); + } + break; + case miPRAGMA: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Pragma: '%s'\n", + me->value)); + if (!(me->value && *me->value)) + break; + /* + * Check whether to set no_cache for the anchor. - FM + */ + if (!strcmp(me->value, "no-cache")) + me->anchor->no_cache = TRUE; + break; + case miPROXY_AUTHENTICATE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Proxy-Authenticate: '%s'\n", + me->value)); + break; + case miPUBLIC: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Public: '%s'\n", + me->value)); + break; + case miREFRESH: /* nonstandard: Netscape */ + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Refresh: '%s'\n", + me->value)); + StrAllocCopy(me->refresh_url, me->value); + break; + case miRETRY_AFTER: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Retry-After: '%s'\n", + me->value)); + break; + case miSAFE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Safe: '%s'\n", + me->value)); + if (!(me->value && *me->value)) + break; + /* + * Indicate in anchor if "YES" or "TRUE". - FM + */ + if (!strcasecomp(me->value, "YES") || + !strcasecomp(me->value, "TRUE")) { + me->anchor->safe = TRUE; + } else if (!strcasecomp(me->value, "NO") || + !strcasecomp(me->value, "FALSE")) { + /* + * If server explicitly tells us that it has changed its mind, + * reset flag in anchor. - kw + */ + me->anchor->safe = FALSE; + } + break; + case miSERVER: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Server: '%s'\n", + me->value)); + if (!(me->value && *me->value)) + break; + /* + * Indicate in anchor. - FM + */ + StrAllocCopy(me->anchor->server, me->value); + break; + case miSET_COOKIE1: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Set-Cookie: '%s'\n", + me->value)); + if (me->set_cookie == NULL) { + StrAllocCopy(me->set_cookie, me->value); + } else { + StrAllocCat(me->set_cookie, ", "); + StrAllocCat(me->set_cookie, me->value); + } + break; + case miSET_COOKIE2: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Set-Cookie2: '%s'\n", + me->value)); + if (me->set_cookie2 == NULL) { + StrAllocCopy(me->set_cookie2, me->value); + } else { + StrAllocCat(me->set_cookie2, ", "); + StrAllocCat(me->set_cookie2, me->value); + } + break; + case miTITLE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Title: '%s'\n", + me->value)); + break; + case miTRANSFER_ENCODING: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Transfer-Encoding: '%s'\n", + me->value)); + if (!strcmp(me->value, "chunked")) + me->chunked_encoding = YES; + break; + case miUPGRADE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Upgrade: '%s'\n", + me->value)); + break; + case miURI: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP URI: '%s'\n", + me->value)); + break; + case miVARY: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Vary: '%s'\n", + me->value)); + break; + case miVIA: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Via: '%s'\n", + me->value)); + break; + case miWARNING: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Warning: '%s'\n", + me->value)); + break; + case miWWW_AUTHENTICATE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP WWW-Authenticate: '%s'\n", + me->value)); + break; + default: /* Should never get here */ + return HT_ERROR; + } + return HT_OK; +} + +/*_________________________________________________________________________ + * + * A C T I O N R O U T I N E S + */ + +/* Character handling + * ------------------ + * + * This is a FSM parser. It ignores field names it does not understand. + * Folded header fields are recognized. Lines without a fieldname at + * the beginning (that are not folded continuation lines) are ignored + * as unknown field names. Fields with empty values are not picked up. + */ +static void HTMIME_put_character(HTStream *me, int c) +{ + /* MUST BE FAST */ + switch (me->state) { + begin_transparent: + case MIME_TRANSPARENT: + (*me->targetClass.put_character) (me->target, c); + return; + + /* RFC-2616 describes chunked transfer coding */ + case mcCHUNKED_DATA: + (*me->targetClass.put_character) (me->target, c); + me->chunked_size--; + if (me->chunked_size <= 0) + me->state = mcCHUNKED_DATA_CR; + return; + + case mcCHUNKED_DATA_CR: + me->state = mcCHUNKED_DATA_LF; + if (c == CR) { + return; + } + /* FALLTHRU */ + + case mcCHUNKED_DATA_LF: + me->state = MIME_CHUNKED; + if (c == LF) { + return; + } + + CTRACE((tfp, "HTIME_put_character expected LF in chunked data\n")); + me->state = MIME_TRANSPARENT; + goto begin_transparent; + + /* FALLTHRU */ + begin_chunked: + case MIME_CHUNKED: + me->chunked_size = 0; + me->state = mcCHUNKED_COUNT_DIGIT; + + /* FALLTHRU */ + case mcCHUNKED_COUNT_DIGIT: + if (isxdigit(UCH(c))) { + me->chunked_size <<= 4; + if (isdigit(UCH(c))) + me->chunked_size += UCH(c) - '0'; + else + me->chunked_size += TOUPPER(UCH(c)) - 'A' + 10; + return; + } + if (c == ';') + me->state = mcCHUNKED_EXTENSION; + + /* FALLTHRU */ + case mcCHUNKED_EXTENSION: + if (c != CR && c != LF) { + return; + } + me->state = mcCHUNKED_COUNT_CR; + + /* FALLTHRU */ + case mcCHUNKED_COUNT_CR: + me->state = mcCHUNKED_COUNT_LF; + if (c == CR) { + return; + } + + /* FALLTHRU */ + case mcCHUNKED_COUNT_LF: + me->state = ((me->chunked_size != 0) + ? mcCHUNKED_DATA + : MIME_CHUNKED); + if (c == LF) { + return; + } + goto begin_chunked; + + default: + break; + } + + /* + * This slightly simple conversion just strips CR and turns LF to newline. + * On unix LF is \n but on Mac \n is CR for example. See NetToText for an + * implementation which preserves single CR or LF. + */ + if (me->net_ascii) { + /* + * <sigh> This is evidence that at one time, this code supported + * local character sets other than ASCII. But there is so much + * code in HTTP.c that depends on line_buffer's having been + * translated to local character set that I needed to put the + * FROMASCII translation there, leaving this translation purely + * destructive. -- gil + */ + /* S/390 -- gil -- 0118 */ +#ifndef NOT_ASCII + c = FROMASCII(c); +#endif /* NOT_ASCII */ + if (c == CR) + return; + else if (c == LF) + c = '\n'; + } + + switch (me->state) { + + case MIME_IGNORE: + return; + + case MIME_TRANSPARENT: /* Not reached see above */ + case MIME_CHUNKED: + case mcCHUNKED_COUNT_DIGIT: + case mcCHUNKED_COUNT_CR: + case mcCHUNKED_COUNT_LF: + case mcCHUNKED_EXTENSION: + case mcCHUNKED_DATA: + case mcCHUNKED_DATA_CR: + case mcCHUNKED_DATA_LF: + return; + + case MIME_NET_ASCII: + (*me->targetClass.put_character) (me->target, c); /* MUST BE FAST */ + return; + + case miNEWLINE: + if (c != '\n' && WHITE(c)) { /* Folded line */ + me->state = me->fold_state; /* pop state before newline */ + if (me->state == miGET_VALUE && + me->value_pointer && me->value_pointer != me->value && + !WHITE(*(me->value_pointer - 1))) { + c = ' '; + goto GET_VALUE; /* will add space to value if it fits - kw */ + } + break; + } else if (me->fold_state == miGET_VALUE) { + /* Got a field, and now we know it's complete - so + * act on it. - kw */ + dispatchField(me); + } + /* FALLTHRU */ + + case miBEGINNING_OF_LINE: + me->net_ascii = YES; + switch (c) { + case 'a': + case 'A': + me->state = miA; + CTRACE((tfp, "HTMIME: Got 'A' at beginning of line, state now A\n")); + break; + + case 'c': + case 'C': + me->state = miC; + CTRACE((tfp, "HTMIME: Got 'C' at beginning of line, state now C\n")); + break; + + case 'd': + case 'D': + me->check_pointer = "ate:"; + me->if_ok = miDATE; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Got 'D' at beginning of line, checking for 'ate:'\n")); + break; + + case 'e': + case 'E': + me->state = miE; + CTRACE((tfp, "HTMIME: Got 'E' at beginning of line, state now E\n")); + break; + + case 'k': + case 'K': + me->check_pointer = "eep-alive:"; + me->if_ok = miKEEP_ALIVE; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Got 'K' at beginning of line, checking for 'eep-alive:'\n")); + break; + + case 'l': + case 'L': + me->state = miL; + CTRACE((tfp, "HTMIME: Got 'L' at beginning of line, state now L\n")); + break; + + case 'p': + case 'P': + me->state = miP; + CTRACE((tfp, "HTMIME: Got 'P' at beginning of line, state now P\n")); + break; + + case 'r': + case 'R': + me->state = miR; + CTRACE((tfp, "HTMIME: Got 'R' at beginning of line, state now R\n")); + break; + + case 's': + case 'S': + me->state = miS; + CTRACE((tfp, "HTMIME: Got 'S' at beginning of line, state now S\n")); + break; + + case 't': + case 'T': + me->state = miT; + CTRACE((tfp, "HTMIME: Got 'T' at beginning of line, state now T\n")); + break; + + case 'u': + case 'U': + me->state = miU; + CTRACE((tfp, "HTMIME: Got 'U' at beginning of line, state now U\n")); + break; + + case 'v': + case 'V': + me->state = miV; + CTRACE((tfp, "HTMIME: Got 'V' at beginning of line, state now V\n")); + break; + + case 'w': + case 'W': + me->state = miW; + CTRACE((tfp, "HTMIME: Got 'W' at beginning of line, state now W\n")); + break; + + case '\n': /* Blank line: End of Header! */ + { + me->net_ascii = NO; + pumpData(me); + } + break; + + default: + goto bad_field_name; + + } /* switch on character */ + break; + + case miA: /* Check for 'c','g' or 'l' */ + switch (c) { + case 'c': + case 'C': + me->check_pointer = "cept-ranges:"; + me->if_ok = miACCEPT_RANGES; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was A, found C, checking for 'cept-ranges:'\n")); + break; + + case 'g': + case 'G': + me->check_pointer = "e:"; + me->if_ok = miAGE; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was A, found G, checking for 'e:'\n")); + break; + + case 'l': + case 'L': + me->state = miAL; + CTRACE((tfp, "HTMIME: Was A, found L, state now AL'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'g' or 'l'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miAL: /* Check for 'l' or 't' */ + switch (c) { + case 'l': + case 'L': + me->check_pointer = "ow:"; + me->if_ok = miALLOW; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was AL, found L, checking for 'ow:'\n")); + break; + + case 't': + case 'T': + me->check_pointer = "ernates:"; + me->if_ok = miALTERNATES; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was AL, found T, checking for 'ernates:'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'l' or 't'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miC: /* Check for 'a' or 'o' */ + switch (c) { + case 'a': + case 'A': + me->check_pointer = "che-control:"; + me->if_ok = miCACHE_CONTROL; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was C, found A, checking for 'che-control:'\n")); + break; + + case 'o': + case 'O': + me->state = miCO; + CTRACE((tfp, "HTMIME: Was C, found O, state now CO'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'a' or 'o'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miCO: /* Check for 'n' or 'o' */ + switch (c) { + case 'n': + case 'N': + me->state = miCON; + CTRACE((tfp, "HTMIME: Was CO, found N, state now CON\n")); + break; + + case 'o': + case 'O': + me->check_pointer = "kie:"; + me->if_ok = miCOOKIE; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was CO, found O, checking for 'kie:'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'n' or 'o'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miCON: /* Check for 'n' or 't' */ + switch (c) { + case 'n': + case 'N': + me->check_pointer = "ection:"; + me->if_ok = miCONNECTION; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was CON, found N, checking for 'ection:'\n")); + break; + + case 't': + case 'T': + me->check_pointer = "ent-"; + me->if_ok = miCONTENT_; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was CON, found T, checking for 'ent-'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'n' or 't'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miE: /* Check for 't' or 'x' */ + switch (c) { + case 't': + case 'T': + me->check_pointer = "ag:"; + me->if_ok = miETAG; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was E, found T, checking for 'ag:'\n")); + break; + + case 'x': + case 'X': + me->check_pointer = "pires:"; + me->if_ok = miEXPIRES; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was E, found X, checking for 'pires:'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'t' or 'x'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miL: /* Check for 'a', 'i' or 'o' */ + switch (c) { + case 'a': + case 'A': + me->check_pointer = "st-modified:"; + me->if_ok = miLAST_MODIFIED; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was L, found A, checking for 'st-modified:'\n")); + break; + + case 'i': + case 'I': + me->check_pointer = "nk:"; + me->if_ok = miLINK; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was L, found I, checking for 'nk:'\n")); + break; + + case 'o': + case 'O': + me->check_pointer = "cation:"; + me->if_ok = miLOCATION; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was L, found O, checking for 'cation:'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'a', 'i' or 'o'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miP: /* Check for 'r' or 'u' */ + switch (c) { + case 'r': + case 'R': + me->state = miPR; + CTRACE((tfp, "HTMIME: Was P, found R, state now PR'\n")); + break; + + case 'u': + case 'U': + me->check_pointer = "blic:"; + me->if_ok = miPUBLIC; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was P, found U, checking for 'blic:'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'r' or 'u'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miPR: /* Check for 'a' or 'o' */ + switch (c) { + case 'a': + case 'A': + me->check_pointer = "gma:"; + me->if_ok = miPRAGMA; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was PR, found A, checking for 'gma'\n")); + break; + + case 'o': + case 'O': + me->check_pointer = "xy-authenticate:"; + me->if_ok = miPROXY_AUTHENTICATE; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was PR, found O, checking for 'xy-authenticate'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'a' or 'o'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miR: /* Check for 'e' */ + switch (c) { + case 'e': + case 'E': + me->state = miRE; + CTRACE((tfp, "HTMIME: Was R, found E\n")); + break; + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'e'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miRE: /* Check for 'a' or 'o' */ + switch (c) { + case 'f': + case 'F': /* nonstandard: Netscape */ + me->check_pointer = "resh:"; + me->if_ok = miREFRESH; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was RE, found F, checking for '%s'\n", me->check_pointer)); + break; + + case 't': + case 'T': + me->check_pointer = "ry-after:"; + me->if_ok = miRETRY_AFTER; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was RE, found T, checking for '%s'\n", me->check_pointer)); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'f' or 't'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miS: /* Check for 'a' or 'e' */ + switch (c) { + case 'a': + case 'A': + me->check_pointer = "fe:"; + me->if_ok = miSAFE; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was S, found A, checking for 'fe:'\n")); + break; + + case 'e': + case 'E': + me->state = miSE; + CTRACE((tfp, "HTMIME: Was S, found E, state now SE'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'a' or 'e'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miSE: /* Check for 'r' or 't' */ + switch (c) { + case 'r': + case 'R': + me->check_pointer = "ver:"; + me->if_ok = miSERVER; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was SE, found R, checking for 'ver'\n")); + break; + + case 't': + case 'T': + me->check_pointer = "-cookie"; + me->if_ok = miSET_COOKIE; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was SE, found T, checking for '-cookie'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'r' or 't'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miSET_COOKIE: /* Check for ':' or '2' */ + switch (c) { + case ':': + me->field = miSET_COOKIE1; /* remember it */ + me->state = miSKIP_GET_VALUE; + CTRACE((tfp, "HTMIME: Was SET_COOKIE, found :, processing\n")); + break; + + case '2': + me->check_pointer = ":"; + me->if_ok = miSET_COOKIE2; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was SET_COOKIE, found 2, checking for ':'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "':' or '2'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miT: /* Check for 'i' or 'r' */ + switch (c) { + case 'i': + case 'I': + me->check_pointer = "tle:"; + me->if_ok = miTITLE; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was T, found I, checking for 'tle:'\n")); + break; + + case 'r': + case 'R': + me->check_pointer = "ansfer-encoding:"; + me->if_ok = miTRANSFER_ENCODING; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was T, found R, checking for 'ansfer-encoding'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'i' or 'r'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miU: /* Check for 'p' or 'r' */ + switch (c) { + case 'p': + case 'P': + me->check_pointer = "grade:"; + me->if_ok = miUPGRADE; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was U, found P, checking for 'grade:'\n")); + break; + + case 'r': + case 'R': + me->check_pointer = "i:"; + me->if_ok = miURI; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was U, found R, checking for 'i:'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'p' or 'r'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miV: /* Check for 'a' or 'i' */ + switch (c) { + case 'a': + case 'A': + me->check_pointer = "ry:"; + me->if_ok = miVARY; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was V, found A, checking for 'ry:'\n")); + break; + + case 'i': + case 'I': + me->check_pointer = "a:"; + me->if_ok = miVIA; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was V, found I, checking for 'a:'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'a' or 'i'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miW: /* Check for 'a' or 'w' */ + switch (c) { + case 'a': + case 'A': + me->check_pointer = "rning:"; + me->if_ok = miWARNING; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was W, found A, checking for 'rning:'\n")); + break; + + case 'w': + case 'W': + me->check_pointer = "w-authenticate:"; + me->if_ok = miWWW_AUTHENTICATE; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was W, found W, checking for 'w-authenticate:'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'a' or 'w'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miCHECK: /* Check against string */ + if (TOLOWER(c) == *(me->check_pointer)++) { + if (!*me->check_pointer) + me->state = me->if_ok; + } else { /* Error */ + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, me->check_pointer - 1)); + goto bad_field_name; + } + break; + + case miCONTENT_: + CTRACE((tfp, "HTMIME: in case CONTENT_\n")); + + switch (c) { + case 'b': + case 'B': + me->check_pointer = "ase:"; + me->if_ok = miCONTENT_BASE; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was CONTENT_, found B, checking for 'ase:'\n")); + break; + + case 'd': + case 'D': + me->check_pointer = "isposition:"; + me->if_ok = miCONTENT_DISPOSITION; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was CONTENT_, found D, checking for 'isposition:'\n")); + break; + + case 'e': + case 'E': + me->check_pointer = "ncoding:"; + me->if_ok = miCONTENT_ENCODING; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was CONTENT_, found E, checking for 'ncoding:'\n")); + break; + + case 'f': + case 'F': + me->check_pointer = "eatures:"; + me->if_ok = miCONTENT_FEATURES; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was CONTENT_, found F, checking for 'eatures:'\n")); + break; + + case 'l': + case 'L': + me->state = miCONTENT_L; + CTRACE((tfp, + "HTMIME: Was CONTENT_, found L, state now CONTENT_L\n")); + break; + + case 'm': + case 'M': + me->check_pointer = "d5:"; + me->if_ok = miCONTENT_MD5; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was CONTENT_, found M, checking for 'd5:'\n")); + break; + + case 'r': + case 'R': + me->check_pointer = "ange:"; + me->if_ok = miCONTENT_RANGE; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was CONTENT_, found R, checking for 'ange:'\n")); + break; + + case 't': + case 'T': + me->state = miCONTENT_T; + CTRACE((tfp, + "HTMIME: Was CONTENT_, found T, state now CONTENT_T\n")); + break; + + default: + CTRACE((tfp, "HTMIME: Was CONTENT_, found nothing; bleah\n")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miCONTENT_L: + CTRACE((tfp, "HTMIME: in case CONTENT_L\n")); + + switch (c) { + case 'a': + case 'A': + me->check_pointer = "nguage:"; + me->if_ok = miCONTENT_LANGUAGE; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was CONTENT_L, found A, checking for 'nguage:'\n")); + break; + + case 'e': + case 'E': + me->check_pointer = "ngth:"; + me->if_ok = miCONTENT_LENGTH; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was CONTENT_L, found E, checking for 'ngth:'\n")); + break; + + case 'o': + case 'O': + me->check_pointer = "cation:"; + me->if_ok = miCONTENT_LOCATION; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was CONTENT_L, found O, checking for 'cation:'\n")); + break; + + default: + CTRACE((tfp, "HTMIME: Was CONTENT_L, found nothing; bleah\n")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miCONTENT_T: + CTRACE((tfp, "HTMIME: in case CONTENT_T\n")); + + switch (c) { + case 'r': + case 'R': + me->check_pointer = "ansfer-encoding:"; + me->if_ok = miCONTENT_TRANSFER_ENCODING; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was CONTENT_T, found R, checking for 'ansfer-encoding:'\n")); + break; + + case 'y': + case 'Y': + me->check_pointer = "pe:"; + me->if_ok = miCONTENT_TYPE; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was CONTENT_T, found Y, checking for 'pe:'\n")); + break; + + default: + CTRACE((tfp, "HTMIME: Was CONTENT_T, found nothing; bleah\n")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miACCEPT_RANGES: + case miAGE: + case miALLOW: + case miALTERNATES: + case miCACHE_CONTROL: + case miCOOKIE: + case miCONNECTION: + case miCONTENT_BASE: + case miCONTENT_DISPOSITION: + case miCONTENT_ENCODING: + case miCONTENT_FEATURES: + case miCONTENT_LANGUAGE: + case miCONTENT_LENGTH: + case miCONTENT_LOCATION: + case miCONTENT_MD5: + case miCONTENT_RANGE: + case miCONTENT_TRANSFER_ENCODING: + case miCONTENT_TYPE: + case miDATE: + case miETAG: + case miEXPIRES: + case miKEEP_ALIVE: + case miLAST_MODIFIED: + case miLINK: + case miLOCATION: + case miPRAGMA: + case miPROXY_AUTHENTICATE: + case miPUBLIC: + case miREFRESH: + case miRETRY_AFTER: + case miSAFE: + case miSERVER: + case miSET_COOKIE1: + case miSET_COOKIE2: + case miTITLE: + case miTRANSFER_ENCODING: + case miUPGRADE: + case miURI: + case miVARY: + case miVIA: + case miWARNING: + case miWWW_AUTHENTICATE: + me->field = me->state; /* remember it */ + me->state = miSKIP_GET_VALUE; + /* Fall through! */ + + case miSKIP_GET_VALUE: + if (c == '\n') { + me->fold_state = me->state; + me->state = miNEWLINE; + break; + } + if (WHITE(c)) + /* + * Skip white space. + */ + break; + + me->value_pointer = me->value; + me->state = miGET_VALUE; + /* Fall through to store first character */ + + case miGET_VALUE: + GET_VALUE: + if (c != '\n') { /* Not end of line */ + if (me->value_pointer < me->value + VALUE_SIZE - 1) { + *me->value_pointer++ = (char) c; + break; + } else { + goto value_too_long; + } + } + /* Fall through (if end of line) */ + + case miJUNK_LINE: + if (c == '\n') { + me->fold_state = me->state; + me->state = miNEWLINE; + } + break; + + } /* switch on state */ + +#ifdef EXP_HTTP_HEADERS + HTChunkPutc(&me->anchor->http_headers, UCH(c)); + if (me->state == MIME_TRANSPARENT) { + HTChunkTerminate(&me->anchor->http_headers); + CTRACE((tfp, "Server Headers:\n%.*s\n", + me->anchor->http_headers.size, + me->anchor->http_headers.data)); + CTRACE((tfp, "Server Content-Type:%s\n", + me->anchor->content_type_params)); + } +#endif + return; + + value_too_long: + CTRACE((tfp, "HTMIME: *** Syntax error. (string too long)\n")); + + bad_field_name: /* Ignore it */ + me->state = miJUNK_LINE; + +#ifdef EXP_HTTP_HEADERS + HTChunkPutc(&me->anchor->http_headers, UCH(c)); +#endif + + return; + +} + +/* String handling + * --------------- + * + * Strings must be smaller than this buffer size. + */ +static void HTMIME_put_string(HTStream *me, + const char *s) +{ + const char *p; + + if (me->state == MIME_TRANSPARENT) { /* Optimisation */ + (*me->targetClass.put_string) (me->target, s); + + } else if (me->state != MIME_IGNORE) { + CTRACE((tfp, "HTMIME: %s\n", s)); + + for (p = s; *p; p++) + HTMIME_put_character(me, *p); + } +} + +/* Buffer write. Buffers can (and should!) be big. + * ------------ + */ +static void HTMIME_write(HTStream *me, + const char *s, + int l) +{ + const char *p; + + if (me->state == MIME_TRANSPARENT) { /* Optimisation */ + (*me->targetClass.put_block) (me->target, s, l); + + } else { + CTRACE((tfp, "HTMIME: %.*s\n", l, s)); + + for (p = s; p < s + l; p++) + HTMIME_put_character(me, *p); + } +} + +/* Free an HTML object + * ------------------- + * + */ +static void HTMIME_free(HTStream *me) +{ + if (me) { + FREE(me->location); + FREE(me->compression_encoding); + if (me->target) + (*me->targetClass._free) (me->target); + FREE(me); + } +} + +/* End writing +*/ +static void HTMIME_abort(HTStream *me, + HTError e) +{ + if (me) { + FREE(me->location); + FREE(me->compression_encoding); + if (me->target) + (*me->targetClass._abort) (me->target, e); + FREE(me); + } +} + +/* Structured Object Class + * ----------------------- + */ +static const HTStreamClass HTMIME = +{ + "MIMEParser", + HTMIME_free, + HTMIME_abort, + HTMIME_put_character, + HTMIME_put_string, + HTMIME_write +}; + +/* Subclass-specific Methods + * ------------------------- + */ +HTStream *HTMIMEConvert(HTPresentation *pres, + HTParentAnchor *anchor, + HTStream *sink) +{ + HTStream *me; + + me = typecalloc(HTStream); + + if (me == NULL) + outofmem(__FILE__, "HTMIMEConvert"); + + assert(me != NULL); + + me->isa = &HTMIME; + me->sink = sink; + me->anchor = anchor; + me->anchor->safe = FALSE; + me->anchor->no_cache = FALSE; + FREE(me->anchor->cache_control); + FREE(me->anchor->SugFname); + FREE(me->anchor->charset); +#ifdef EXP_HTTP_HEADERS + HTChunkClear(&me->anchor->http_headers); + HTChunkInit(&me->anchor->http_headers, 128); +#endif + FREE(me->anchor->content_type_params); + FREE(me->anchor->content_language); + FREE(me->anchor->content_encoding); + FREE(me->anchor->content_base); + FREE(me->anchor->content_disposition); + FREE(me->anchor->content_location); + FREE(me->anchor->content_md5); + me->anchor->content_length = 0; + FREE(me->anchor->date); + FREE(me->anchor->expires); + FREE(me->anchor->last_modified); + FREE(me->anchor->ETag); + FREE(me->anchor->server); + me->target = NULL; + me->state = miBEGINNING_OF_LINE; + /* + * Sadly enough, change this to always default to WWW_HTML to parse all + * text as HTML for the users. + * GAB 06-30-94 + * Thanks to Robert Rowland robert@cyclops.pei.edu + * + * After discussion of the correct handline, should be application/octet- + * stream or unknown; causing servers to send a correct content type. + * + * The consequence of using WWW_UNKNOWN is that you end up downloading as a + * binary file what 99.9% of the time is an HTML file, which should have + * been rendered or displayed. So sadly enough, I'm changing it back to + * WWW_HTML, and it will handle the situation like Mosaic does, and as + * Robert Rowland suggested, because being functionally correct 99.9% of + * the time is better than being technically correct but functionally + * nonsensical. - FM + */ + /*** + me->format = WWW_UNKNOWN; + ***/ + me->format = WWW_HTML; + me->targetRep = pres->rep_out; + me->boundary = NULL; /* Not set yet */ + me->set_cookie = NULL; /* Not set yet */ + me->set_cookie2 = NULL; /* Not set yet */ + me->refresh_url = NULL; /* Not set yet */ + me->c_t_encoding = 0; /* Not set yet */ + me->compression_encoding = NULL; /* Not set yet */ + me->net_ascii = NO; /* Local character set */ + HTAnchor_setUCInfoStage(me->anchor, current_char_set, + UCT_STAGE_STRUCTURED, + UCT_SETBY_DEFAULT); + HTAnchor_setUCInfoStage(me->anchor, current_char_set, + UCT_STAGE_HTEXT, + UCT_SETBY_DEFAULT); + return me; +} + +HTStream *HTNetMIME(HTPresentation *pres, + HTParentAnchor *anchor, + HTStream *sink) +{ + HTStream *me = HTMIMEConvert(pres, anchor, sink); + + if (!me) + return NULL; + + me->net_ascii = YES; + return me; +} + +HTStream *HTMIMERedirect(HTPresentation *pres, + HTParentAnchor *anchor, + HTStream *sink) +{ + HTStream *me = HTMIMEConvert(pres, anchor, sink); + + if (!me) + return NULL; + + me->pickup_redirection = YES; + if (me->targetRep == WWW_DEBUG && sink) + me->no_streamstack = YES; + return me; +} + +/* Japanese header handling functions + * ================================== + * + * K&Rized and added 07-Jun-96 by FM, based on: + * +//////////////////////////////////////////////////////////////////////// + * + * ISO-2022-JP handling routines + * & + * MIME decode routines (quick hack just for ISO-2022-JP) + * + * Thu Jan 25 10:11:42 JST 1996 + * + * Copyright (C) 1994, 1995, 1996 + * Shuichi Ichikawa (ichikawa@nuee.nagoya-u.ac.jp) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either versions 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with SKK, see the file COPYING. If not, write to the Free + * Software Foundation Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * MIME decoding routines + * + * Written by S. Ichikawa, + * partially inspired by encdec.c of <jh@efd.lth.se>. + * Caller's buffers decode to no longer than the input strings. + */ +#include <LYCharVals.h> /* S/390 -- gil -- 0163 */ + +static char HTmm64[] = +"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; +static char HTmmquote[] = "0123456789ABCDEF"; +static int HTmmcont = 0; + +static void HTmmdec_base64(char **t, + char *s) +{ + int d, count, j, val; + char *buf, *bp, nw[4], *p; + + if ((buf = typeMallocn(char, strlen(s) * 3 + 1)) == 0) + outofmem(__FILE__, "HTmmdec_base64"); + + assert(buf != NULL); + + for (bp = buf; *s; s += 4) { + val = 0; + if (s[2] == '=') + count = 1; + else if (s[3] == '=') + count = 2; + else + count = 3; + + for (j = 0; j <= count; j++) { + if (!(p = strchr(HTmm64, s[j]))) { + return; + } + d = (int) (p - HTmm64); + d <<= (3 - j) * 6; + val += d; + } + for (j = 2; j >= 0; j--) { + nw[j] = (char) (val & 255); + val >>= 8; + } + if (count--) + *bp++ = nw[0]; + if (count--) + *bp++ = nw[1]; + if (count) + *bp++ = nw[2]; + } + *bp = '\0'; + StrAllocCopy(*t, buf); + FREE(buf); +} + +static void HTmmdec_quote(char **t, + char *s) +{ + char *buf, cval, *bp, *p; + + if ((buf = typeMallocn(char, strlen(s) + 1)) == 0) + outofmem(__FILE__, "HTmmdec_quote"); + + assert(buf != NULL); + + for (bp = buf; *s;) { + if (*s == '=') { + cval = 0; + if (s[1] && (p = strchr(HTmmquote, s[1]))) { + cval = (char) (cval + (char) (p - HTmmquote)); + } else { + *bp++ = *s++; + continue; + } + if (s[2] && (p = strchr(HTmmquote, s[2]))) { + cval = (char) (cval << 4); + cval = (char) (cval + (p - HTmmquote)); + *bp++ = cval; + s += 3; + } else { + *bp++ = *s++; + } + } else if (*s == '_') { + *bp++ = 0x20; + s++; + } else { + *bp++ = *s++; + } + } + *bp = '\0'; + StrAllocCopy(*t, buf); + FREE(buf); +} + +/* + * HTmmdecode for ISO-2022-JP - FM + */ +void HTmmdecode(char **target, + char *source) +{ + char *buf; + char *mmbuf = NULL; + char *m2buf = NULL; + char *s, *t, *u; + int base64, quote; + + if ((buf = typeMallocn(char, strlen(source) + 1)) == 0) + outofmem(__FILE__, "HTmmdecode"); + + assert(buf != NULL); + + for (s = source, u = buf; *s;) { + if (!strncasecomp(s, "=?ISO-2022-JP?B?", 16)) { + base64 = 1; + } else { + base64 = 0; + } + if (!strncasecomp(s, "=?ISO-2022-JP?Q?", 16)) { + quote = 1; + } else { + quote = 0; + } + if (base64 || quote) { + if (HTmmcont) { + for (t = s - 1; + t >= source && (*t == ' ' || *t == '\t'); t--) { + u--; + } + } + if (mmbuf == 0) /* allocate buffer big enough for source */ + StrAllocCopy(mmbuf, source); + for (s += 16, t = mmbuf; *s;) { + if (s[0] == '?' && s[1] == '=') { + break; + } else { + *t++ = *s++; + *t = '\0'; + } + } + if (s[0] != '?' || s[1] != '=') { + goto end; + } else { + s += 2; + *t = '\0'; + } + if (base64) + HTmmdec_base64(&m2buf, mmbuf); + if (quote) + HTmmdec_quote(&m2buf, mmbuf); + for (t = m2buf; *t;) + *u++ = *t++; + HTmmcont = 1; + } else { + if (*s != ' ' && *s != '\t') + HTmmcont = 0; + *u++ = *s++; + } + } + *u = '\0'; + end: + StrAllocCopy(*target, buf); + FREE(m2buf); + FREE(mmbuf); + FREE(buf); +} + +/* + * Insert ESC where it seems lost. + * (The author of this function "rjis" is S. Ichikawa.) + */ +int HTrjis(char **t, + char *s) +{ + char *p; + char *buf = NULL; + int kanji = 0; + + if (strchr(s, CH_ESC) || !strchr(s, '$')) { + if (s != *t) + StrAllocCopy(*t, s); + return 1; + } + + if ((buf = typeMallocn(char, strlen(s) * 2 + 1)) == 0) + outofmem(__FILE__, "HTrjis"); + + assert(buf != NULL); + + for (p = buf; *s;) { + if (!kanji && s[0] == '$' && (s[1] == '@' || s[1] == 'B')) { + if (HTmaybekanji((int) s[2], (int) s[3])) { + kanji = 1; + *p++ = CH_ESC; + *p++ = *s++; + *p++ = *s++; + *p++ = *s++; + *p++ = *s++; + continue; + } + *p++ = *s++; + continue; + } + if (kanji && s[0] == '(' && (s[1] == 'J' || s[1] == 'B')) { + kanji = 0; + *p++ = CH_ESC; + *p++ = *s++; + *p++ = *s++; + continue; + } + *p++ = *s++; + } + *p = *s; /* terminate string */ + + StrAllocCopy(*t, buf); + FREE(buf); + return 0; +} + +/* + * The following function "maybekanji" is derived from + * RJIS-1.0 by Mr. Hironobu Takahashi. + * Maybekanji() is included here under the courtesy of the author. + * The original comment of rjis.c is also included here. + */ +/* + * RJIS ( Recover JIS code from broken file ) + * Copyright (C) 1992 1994 + * Hironobu Takahashi (takahasi@tiny.or.jp) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either versions 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with SKK, see the file COPYING. If not, write to the Free + * Software Foundation Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +int HTmaybekanji(int c1, + int c2) +{ + + if ((c2 < 33) || (c2 > 126)) + return 0; + if ((c1 < 33) || ((40 < c1) && (c1 < 48)) || (116 < c1)) + return 0; + c2 -= 32; + switch (c1 - 32) { + case 2: + if ((14 < c2) && (c2 < 26)) + return 0; + if ((33 < c2) && (c2 < 42)) + return 0; + if ((48 < c2) && (c2 < 60)) + return 0; + if ((74 < c2) && (c2 < 82)) + return 0; + if ((89 < c2) && (c2 < 94)) + return 0; + break; + case 3: + if (c2 < 16) + return 0; + if ((25 < c2) && (c2 < 33)) + return 0; + if ((58 < c2) && (c2 < 65)) + return 0; + if (90 < c2) + return 0; + break; + case 4: + if (83 < c2) + return 0; + break; + case 5: + if (86 < c2) + return 0; + break; + case 6: + if ((24 < c2) && (c2 < 33)) + return 0; + if (56 < c2) + return 0; + break; + case 7: + if ((33 < c2) && (c2 < 49)) + return 0; + if (81 < c2) + return 0; + break; + case 8: + if (32 < c2) + return 0; + break; + case 47: + if (51 < c2) + return 0; + break; + case 84: + if (6 < c2) + return 0; + break; + } + return 1; +} diff --git a/WWW/Library/Implementation/HTMIME.h b/WWW/Library/Implementation/HTMIME.h new file mode 100644 index 00000000..6410c158 --- /dev/null +++ b/WWW/Library/Implementation/HTMIME.h @@ -0,0 +1,84 @@ +/* /Net/dxcern/userd/timbl/hypertext/WWW/Library/Implementation/HTMIME.html + MIME PARSER + + The MIME parser stream presents a MIME document. It recursively invokes the + format manager to handle embedded formats. + + As well as stripping off and parsing the headers, the MIME parser has to + parse any weirld MIME encodings it may meet within the body parts of + messages, and must deal with multipart messages. + + This module is implemented to the level necessary for operation with WWW, + but is not currently complete for any arbitrary MIME message. + + Check the source for latest additions to functionality. + + The MIME parser is complicated by the fact that WWW allows real binary to be + sent, not ASCII encoded. Therefore the netascii decoding is included in + this module. One cannot layer it by converting first from Net to local + text, then decoding it. Of course, for local files, the net ascii decoding + is not needed. There are therefore two creation routines. + + */ +#ifndef HTMIME_H +#define HTMIME_H + +#include <HTStream.h> +#include <HTAnchor.h> + +#ifdef __cplusplus +extern "C" { +#endif +/* + * This function is for trimming off any paired + * open- and close-double quotes from header values. + * It does not parse the string for embedded quotes, + * and will not modify the string unless both the + * first and last characters are double-quotes. - FM + */ extern void HTMIME_TrimDoubleQuotes(char *value); + +/* + + INPUT: LOCAL TEXT + + */ + extern HTStream *HTMIMEConvert(HTPresentation *pres, + HTParentAnchor *anchor, + HTStream *sink); + +/* + + INPUT: NET ASCII + + */ + extern HTStream *HTNetMIME(HTPresentation *pres, + HTParentAnchor *anchor, + HTStream *sink); + +/* + + INPUT: Redirection message, parse headers only for Location if present + + */ + extern HTStream *HTMIMERedirect(HTPresentation *pres, + HTParentAnchor *anchor, + HTStream *sink); + +/* + + For handling Japanese headers. + +*/ + extern void HTmmdecode(char **target, + char *source); + + extern int HTrjis(char **target, + char *source); + + extern int HTmaybekanji(int c1, + int c2); + +#ifdef __cplusplus +} +#endif +#endif /* !HTMIME_H */ diff --git a/WWW/Library/Implementation/HTMLDTD.c b/WWW/Library/Implementation/HTMLDTD.c new file mode 100644 index 00000000..833077f2 --- /dev/null +++ b/WWW/Library/Implementation/HTMLDTD.c @@ -0,0 +1,334 @@ +/* + * $LynxId: HTMLDTD.c,v 1.57 2010/09/25 00:30:56 tom Exp $ + * + * Our Static DTD for HTML + * ----------------------- + */ + +/* Implements: +*/ + +#include <HTUtils.h> +#include <HTMLDTD.h> +#include <LYLeaks.h> +#include <LYJustify.h> + +/* + * Character entities like   now excluded from our DTD tables, they are + * mapped to Unicode and handled by chartrans code directly the similar way the + * numeric entities like { does. See src/chrtrans/entities.h for real + * mapping. + */ + +/* Entity Names + * ------------ + * + * This table must be matched exactly with ALL the translation tables + * (this is an obsolete translation mechanism, probably unused, + * currently replaced with Unicode chartrans in most cases...) + */ +static const char *entities[] = +{ + "AElig", /* capital AE diphthong (ligature) */ + "Aacute", /* capital A, acute accent */ + "Acirc", /* capital A, circumflex accent */ + "Agrave", /* capital A, grave accent */ + "Aring", /* capital A, ring */ + "Atilde", /* capital A, tilde */ + "Auml", /* capital A, dieresis or umlaut mark */ + "Ccedil", /* capital C, cedilla */ + "Dstrok", /* capital Eth, Icelandic */ + "ETH", /* capital Eth, Icelandic */ + "Eacute", /* capital E, acute accent */ + "Ecirc", /* capital E, circumflex accent */ + "Egrave", /* capital E, grave accent */ + "Euml", /* capital E, dieresis or umlaut mark */ + "Iacute", /* capital I, acute accent */ + "Icirc", /* capital I, circumflex accent */ + "Igrave", /* capital I, grave accent */ + "Iuml", /* capital I, dieresis or umlaut mark */ + "Ntilde", /* capital N, tilde */ + "Oacute", /* capital O, acute accent */ + "Ocirc", /* capital O, circumflex accent */ + "Ograve", /* capital O, grave accent */ + "Oslash", /* capital O, slash */ + "Otilde", /* capital O, tilde */ + "Ouml", /* capital O, dieresis or umlaut mark */ + "THORN", /* capital THORN, Icelandic */ + "Uacute", /* capital U, acute accent */ + "Ucirc", /* capital U, circumflex accent */ + "Ugrave", /* capital U, grave accent */ + "Uuml", /* capital U, dieresis or umlaut mark */ + "Yacute", /* capital Y, acute accent */ + "aacute", /* small a, acute accent */ + "acirc", /* small a, circumflex accent */ + "acute", /* spacing acute */ + "aelig", /* small ae diphthong (ligature) */ + "agrave", /* small a, grave accent */ + "amp", /* ampersand */ + "aring", /* small a, ring */ + "atilde", /* small a, tilde */ + "auml", /* small a, dieresis or umlaut mark */ + "brkbar", /* broken vertical bar */ + "brvbar", /* broken vertical bar */ + "ccedil", /* small c, cedilla */ + "cedil", /* spacing cedilla */ + "cent", /* cent sign */ + "copy", /* copyright sign */ + "curren", /* currency sign */ + "deg", /* degree sign */ + "die", /* spacing dieresis */ + "divide", /* division sign */ + "eacute", /* small e, acute accent */ + "ecirc", /* small e, circumflex accent */ + "egrave", /* small e, grave accent */ + "emdash", /* dash the width of emsp */ + "emsp", /* em space - not collapsed */ + "endash", /* dash the width of ensp */ + "ensp", /* en space - not collapsed */ + "eth", /* small eth, Icelandic */ + "euml", /* small e, dieresis or umlaut mark */ + "frac12", /* fraction 1/2 */ + "frac14", /* fraction 1/4 */ + "frac34", /* fraction 3/4 */ + "gt", /* greater than */ + "hibar", /* spacing macron */ + "iacute", /* small i, acute accent */ + "icirc", /* small i, circumflex accent */ + "iexcl", /* inverted exclamation mark */ + "igrave", /* small i, grave accent */ + "iquest", /* inverted question mark */ + "iuml", /* small i, dieresis or umlaut mark */ + "laquo", /* angle quotation mark, left */ + "lt", /* less than */ + "macr", /* spacing macron */ + "mdash", /* dash the width of emsp */ + "micro", /* micro sign */ + "middot", /* middle dot */ + "nbsp", /* non breaking space */ + "ndash", /* dash the width of ensp */ + "not", /* negation sign */ + "ntilde", /* small n, tilde */ + "oacute", /* small o, acute accent */ + "ocirc", /* small o, circumflex accent */ + "ograve", /* small o, grave accent */ + "ordf", /* feminine ordinal indicator */ + "ordm", /* masculine ordinal indicator */ + "oslash", /* small o, slash */ + "otilde", /* small o, tilde */ + "ouml", /* small o, dieresis or umlaut mark */ + "para", /* paragraph sign */ + "plusmn", /* plus-or-minus sign */ + "pound", /* pound sign */ + "quot", /* quote '"' */ + "raquo", /* angle quotation mark, right */ + "reg", /* circled R registered sign */ + "sect", /* section sign */ + "shy", /* soft hyphen */ + "sup1", /* superscript 1 */ + "sup2", /* superscript 2 */ + "sup3", /* superscript 3 */ + "szlig", /* small sharp s, German (sz ligature) */ + "thinsp", /* thin space (not collapsed) */ + "thorn", /* small thorn, Icelandic */ + "times", /* multiplication sign */ + "trade", /* trade mark sign (U+2122) */ + "uacute", /* small u, acute accent */ + "ucirc", /* small u, circumflex accent */ + "ugrave", /* small u, grave accent */ + "uml", /* spacing dieresis */ + "uuml", /* small u, dieresis or umlaut mark */ + "yacute", /* small y, acute accent */ + "yen", /* yen sign */ + "yuml", /* small y, dieresis or umlaut mark */ +}; + +/* Attribute Lists + * --------------- + * + * Lists must be in alphabetical order by attribute name + * The tag elements contain the number of attributes + */ + +/* From Peter Flynn's intro to the HTML Pro DTD: + + %structure; + + DIV, CENTER, H1 to H6, P, UL, OL, DL, DIR, MENU, PRE, XMP, LISTING, BLOCKQUOTE, BQ, + 2 1 2 2 1 8 8 8 8 8 8 8 8 4 4 + MULTICOL,?NOBR, FORM, TABLE, ADDRESS, FIG, BDO, NOTE, and FN; plus?WBR, LI, and LH + 8 n ?1 n 8 8 2 2 2 2 2 ?1 nE 4 4 + + %insertions; + + Elements which usually contain special-purpose material, or no text material at all. + + BASEFONT, APPLET, OBJECT, EMBED, SCRIPT, MAP, MARQUEE, HR, ISINDEX, BGSOUND, TAB,?IMG, + 1 e? 2 2 l 1 e 2 l 8 4 4 E 1? E 1 E ! E ?1 E + IMAGE, BR, plus NOEMBED, SERVER, SPACER, AUDIOSCOPE, and SIDEBAR; ?area + 1 n 1 E n n n n n 8 E + + %text; + + Elements within the %structure; which directly contain running text. + + Descriptive or analytic markup: EM, STRONG, DFN, CODE, SAMP, KBD, VAR, CITE, Q, LANG, AU, + 2 2 2 2 2 2 2 2 2 2 n 2 + AUTHOR, PERSON, ACRONYM, ABBR, INS, DEL, and SPAN + 2 2 n 2 2 2 2 2 + Visual markup:S, STRIKE, I, B, TT, U,?NOBR,?WBR, BR, BIG, SMALL, FONT, STYLE, BLINK, TAB, + 1 1 1 1 1 1 ?1 n ?1nE? 1 E 1 1 1 1 l 1 1 E? + BLACKFACE, LIMITTEXT, NOSMARTQUOTES, and SHADOW + 1 n 1 n 1 n 1 n + Hypertext and graphics: A and?IMG + 8 ?8 E + Mathematical: SUB, SUP, and MATH + 4 4 4 l + Documentary: COMMENT, ENTITY, ELEMENT, and ATTRIB + 4 4 n 4 n 4 n + %formula; + */ + +/* Elements + * -------- + * + * Must match definitions in HTMLDTD.html! + * Must be in alphabetical order. + * + * The T_* extra info is listed here, even though most fields are not used + * in SGML.c if Old_DTD is set (with the exception of some Tgf_* flags). + * This simplifies comparison of the tags_table0[] table (otherwise unchanged + * from original Lynx treatment) with the tags_table1[] table below. - kw + * + * Name*, Attributes, No. of attributes, content, extra info... + */ + +#include <src0_HTMLDTD.h> +#include <src1_HTMLDTD.h> + +/* Dummy space, will be filled with the contents of either tags_table1 + or tags_table0 on calling HTSwitchDTD - kw */ + +static HTTag tags[HTML_ALL_ELEMENTS]; + +const SGML_dtd HTML_dtd = +{ + tags, + HTML_ELEMENTS, + entities, /* probably unused */ + TABLESIZE(entities), +}; + +/* This function fills the "tags" part of the HTML_dtd structure with + what we want to use, either tags_table0 or tags_table1. Note that it + has to be called at least once before HTML_dtd is used, otherwise + the HTML_dtd contents will be invalid! This could be coded in a way + that would make an initialisation call unnecessary, but my C knowledge + is limited and I didn't want to list the whole tags_table1 table + twice... - kw */ +void HTSwitchDTD(int new_flag) +{ + if (TRACE) + CTRACE((tfp, + "HTMLDTD: Copying %s DTD element info of size %d, %d * %d\n", + new_flag ? "strict" : "tagsoup", + (int) (new_flag ? sizeof(tags_table1) : sizeof(tags_table0)), + HTML_ALL_ELEMENTS, + (int) sizeof(HTTag))); + if (new_flag) + MemCpy(tags, tags_table1, HTML_ALL_ELEMENTS * sizeof(HTTag)); + else + MemCpy(tags, tags_table0, HTML_ALL_ELEMENTS * sizeof(HTTag)); +} + +HTTag HTTag_unrecognized = + +{NULL_HTTag, NULL, 0, 0, SGML_EMPTY, T__UNREC_}; + +/* + * Utility Routine: Useful for people building HTML objects. + */ + +/* Start anchor element + * -------------------- + * + * It is kinda convenient to have a particulr routine for + * starting an anchor element, as everything else for HTML is + * simple anyway. + */ +struct _HTStructured { + HTStructuredClass *isa; + /* ... */ +}; + +void HTStartAnchor(HTStructured * obj, const char *name, + const char *href) +{ + BOOL present[HTML_A_ATTRIBUTES]; + const char *value[HTML_A_ATTRIBUTES]; + int i; + + for (i = 0; i < HTML_A_ATTRIBUTES; i++) + present[i] = NO; + + if (name && *name) { + present[HTML_A_NAME] = YES; + value[HTML_A_NAME] = (const char *) name; + } + if (href) { + present[HTML_A_HREF] = YES; + value[HTML_A_HREF] = (const char *) href; + } + + (*obj->isa->start_element) (obj, HTML_A, present, value, -1, 0); +} + +void HTStartAnchor5(HTStructured * obj, const char *name, + const char *href, + const char *linktype, + int tag_charset) +{ + BOOL present[HTML_A_ATTRIBUTES]; + const char *value[HTML_A_ATTRIBUTES]; + int i; + + for (i = 0; i < HTML_A_ATTRIBUTES; i++) + present[i] = NO; + + if (name && *name) { + present[HTML_A_NAME] = YES; + value[HTML_A_NAME] = name; + } + if (href && *href) { + present[HTML_A_HREF] = YES; + value[HTML_A_HREF] = href; + } + if (linktype && *linktype) { + present[HTML_A_TYPE] = YES; + value[HTML_A_TYPE] = linktype; + } + + (*obj->isa->start_element) (obj, HTML_A, present, value, tag_charset, 0); +} + +void HTStartIsIndex(HTStructured * obj, const char *prompt, + const char *href) +{ + BOOL present[HTML_ISINDEX_ATTRIBUTES]; + const char *value[HTML_ISINDEX_ATTRIBUTES]; + int i; + + for (i = 0; i < HTML_ISINDEX_ATTRIBUTES; i++) + present[i] = NO; + + if (prompt && *prompt) { + present[HTML_ISINDEX_PROMPT] = YES; + value[HTML_ISINDEX_PROMPT] = (const char *) prompt; + } + if (href) { + present[HTML_ISINDEX_HREF] = YES; + value[HTML_ISINDEX_HREF] = (const char *) href; + } + + (*obj->isa->start_element) (obj, HTML_ISINDEX, present, value, -1, 0); +} diff --git a/WWW/Library/Implementation/HTMLDTD.h b/WWW/Library/Implementation/HTMLDTD.h new file mode 100644 index 00000000..10b0308f --- /dev/null +++ b/WWW/Library/Implementation/HTMLDTD.h @@ -0,0 +1,97 @@ +/* + * $LynxId: HTMLDTD.h,v 1.32 2008/07/06 17:38:13 tom Exp $ + * + The HTML DTD -- software interface in libwww + HTML DTD - SOFTWARE INTERFACE + + SGML purists should excuse the use of the term "DTD" in this file to + represent DTD-related information which is not exactly a DTD itself. + + The C modular structure doesn't work very well here, as the dtd is + partly in the .h and partly in the .c which are not very independent. + Tant pis. + + */ +#ifndef HTMLDTD_H +#define HTMLDTD_H + +#include <SGML.h> +#include <HTFont.h> + +#ifdef __cplusplus +extern "C" { +#endif +/* + * Valid name chars for tag parsing. + */ +#define IsNmStart(c) (isalpha(UCH(c))) +#define IsNmChar(c) (isalnum(UCH(c)) || \ + c == '_' || c=='-' || c == '.' || c==':') +#define ReallyEmptyTagNum(e) ((HTML_dtd.tags[e].contents == SGML_EMPTY) && \ + !(HTML_dtd.tags[e].flags & Tgf_nreie)) +#define ReallyEmptyTag(t) ((t->contents == SGML_EMPTY) && \ + !(t->flags & Tgf_nreie)) + +#include <hdr_HTMLDTD.h> + +#ifdef USE_PRETTYSRC +/* values of HTML attributes' types */ +#define HTMLA_NORMAL 0 /* nothing specific */ +#define HTMLA_ANAME 1 /* anchor name - 'id' or a's 'name' */ +#define HTMLA_HREF 2 /* href */ +#define HTMLA_CLASS 4 /* class name. */ +#define HTMLA_AUXCLASS 8 /* attribute, the value of which also designates + a class name */ +#endif + extern const SGML_dtd HTML_dtd; + + extern void HTSwitchDTD(int new_flag); + + extern HTTag HTTag_unrecognized; + extern HTTag HTTag_mixedObject; + +/* + +Start anchor element + + It is kinda convenient to have a particular routine for starting an anchor + element, as everything else for HTML is simple anyway. + + ON ENTRY + + targetstream points to a structured stream object. + + name and href point to attribute strings or are NULL if the attribute is + to be omitted. + + */ + extern void HTStartAnchor(HTStructured * targetstream, const char *name, + const char *href); + + extern void HTStartAnchor5(HTStructured * targetstream, const char *name, + const char *href, + const char *linktype, + int tag_charset); + +/* + +Start IsIndex element - FM + + It is kinda convenient to have a particular routine for starting an IsIndex + element with the prompt and/or href (action) attributes specified. + + ON ENTRY + + targetstream points to a structured stream object. + + prompt and href point to attribute strings or are NULL if the attribute is + to be omitted. + + */ + extern void HTStartIsIndex(HTStructured * targetstream, const char *prompt, + const char *href); + +#ifdef __cplusplus +} +#endif +#endif /* HTMLDTD_H */ diff --git a/WWW/Library/Implementation/HTMLGen.c b/WWW/Library/Implementation/HTMLGen.c new file mode 100644 index 00000000..3fc55e9b --- /dev/null +++ b/WWW/Library/Implementation/HTMLGen.c @@ -0,0 +1,740 @@ +/* + * $LynxId: HTMLGen.c,v 1.37 2012/02/10 18:32:26 tom Exp $ + * + * HTML Generator + * ============== + * + * This version of the HTML object sends HTML markup to the output stream. + * + * Bugs: Line wrapping is not done at all. + * All data handled as PCDATA. + * Should convert old XMP, LISTING and PLAINTEXT to PRE. + * + * It is not obvious to me right now whether the HEAD should be generated + * from the incomming data or the anchor. Currently it is from the former + * which is cleanest. + */ + +#define HTSTREAM_INTERNAL 1 + +#include <HTUtils.h> + +#define BUFFER_SIZE 200 /* Line buffer attempts to make neat breaks */ +#define MAX_CLEANNESS 20 + +/* Implements: +*/ +#include <HTMLGen.h> + +#include <HTMLDTD.h> +#include <HTStream.h> +#include <SGML.h> +#include <HTFormat.h> + +#ifdef USE_COLOR_STYLE +#include <LYCharUtils.h> +#include <AttrList.h> +#include <LYHash.h> +#include <LYStyle.h> +#endif + +#include <LYGlobalDefs.h> +#include <LYCurses.h> +#include <LYLeaks.h> + +#ifdef USE_COLOR_STYLE +char class_string[TEMPSTRINGSIZE]; + +static char *Style_className = NULL; +static char myHash[128]; +static int hcode; +#endif + +/* HTML Object + * ----------- + */ +struct _HTStream { + const HTStreamClass *isa; + HTStream *target; + HTStreamClass targetClass; /* COPY for speed */ +}; + +struct _HTStructured { + const HTStructuredClass *isa; + HTStream *target; + HTStreamClass targetClass; /* COPY for speed */ + + char buffer[BUFFER_SIZE + 1]; /* 1for NL */ + int buffer_maxchars; + char *write_pointer; + char *line_break[MAX_CLEANNESS + 1]; + int cleanness; + BOOL overflowed; + BOOL delete_line_break_char[MAX_CLEANNESS + 1]; + BOOL preformatted; + BOOL escape_specials; + BOOL in_attrval; +#ifdef USE_COLOR_STYLE + HText *text; +#endif +}; + +/* Flush Buffer + * ------------ + */ + +static void flush_breaks(HTStructured * me) +{ + int i; + + for (i = 0; i <= MAX_CLEANNESS; i++) { + me->line_break[i] = NULL; + } +} + +static void HTMLGen_flush(HTStructured * me) +{ + (*me->targetClass.put_block) (me->target, + me->buffer, + (int) (me->write_pointer - me->buffer)); + me->write_pointer = me->buffer; + flush_breaks(me); + me->cleanness = 0; + me->delete_line_break_char[0] = NO; +} + +#ifdef USE_COLOR_STYLE +/* + * We need to flush our buffer each time before we effect a color style change, + * this also relies on the subsequent stage not doing any buffering - this is + * currently true, in cases where it matters the target stream should be the + * HTPlain converter. The flushing currently prevents reasonable line breaking + * in lines with tags. Since color styles help visual scanning of displayed + * source lines, and long lines are wrapped in GridText anyway, this is + * probably acceptable (or even A Good Thing - more to see in one screenful). + * The pointer to the HText structure is initialized here before we effect the + * first style change. Getting it from the global HTMainText variable isn't + * very clean, since it relies on the fact that HText_new() has already been + * called for the current stream stack's document by the time we start + * processing the first element; we rely on HTMLGenerator's callers + * (HTMLParsedPresent in particular) to guarantee this when it matters. + * Normally the target stream will have been setup by HTPlainPresent, which + * does what we need in this respect. (A check whether we have the right + * output stream could be done by checking that targetClass.name is + * "PlainPresenter" or similar.) + * + * All special color style handling is only done if LYPreparsedSource is set. + * We could always do it for displaying source generated by an internal + * gateway, but this makes the rule more simple for the user: color styles are + * applied to html source only with the -preparsed flag. - kw + */ +static void do_cstyle_flush(HTStructured * me) +{ + if (!me->text && LYPreparsedSource) { + me->text = HTMainText; + } + if (me->text) { + HTMLGen_flush(me); + } +} +#endif /* COLOR_STYLE */ + +/* Weighted optional line break + * + * We keep track of all the breaks for when we chop the line + */ + +static void allow_break(HTStructured * me, int new_cleanness, int dlbc) +{ + if (dlbc && me->write_pointer == me->buffer) + dlbc = NO; + me->line_break[new_cleanness] = + dlbc ? me->write_pointer - 1 /* Point to space */ + : me->write_pointer; /* point to gap */ + me->delete_line_break_char[new_cleanness] = (BOOLEAN) dlbc; + if (new_cleanness >= me->cleanness && + (me->overflowed || me->line_break[new_cleanness] > me->buffer)) + me->cleanness = new_cleanness; +} + +/* Character handling + * ------------------ + * + * The tricky bits are the line break handling. This attempts + * to synchrononise line breaks on sentence or phrase ends. This + * is important if one stores SGML files in a line-oriented code + * repository, so that if a small change is made, line ends don't + * shift in a ripple-through to apparently change a large part of the + * file. We give extra "cleanness" to spaces appearing directly + * after periods (full stops), [semi]colons and commas. + * This should make the source files easier to read and modify + * by hand, too, though this is not a primary design consideration. TBL + */ +static void HTMLGen_put_character(HTStructured * me, int c) +{ + if (me->escape_specials && UCH(c) < 32) { + if (c == HT_NON_BREAK_SPACE || c == HT_EN_SPACE || + c == LY_SOFT_HYPHEN) { /* recursion... */ + HTMLGen_put_character(me, '&'); + HTMLGen_put_character(me, '#'); + HTMLGen_put_character(me, 'x'); + switch (c) { + case HT_NON_BREAK_SPACE: /*   */ + HTMLGen_put_character(me, 'A'); + HTMLGen_put_character(me, '0'); + break; + case HT_EN_SPACE: /*   */ + HTMLGen_put_character(me, '2'); + HTMLGen_put_character(me, '0'); + HTMLGen_put_character(me, '0'); + HTMLGen_put_character(me, '2'); + break; + case LY_SOFT_HYPHEN: /* ­ */ + HTMLGen_put_character(me, 'A'); + HTMLGen_put_character(me, 'D'); + break; + } + c = ';'; + } + } + + *me->write_pointer++ = (char) c; + + if (c == '\n') { + HTMLGen_flush(me); + return; + } + + /* Figure our whether we can break at this point + */ + if ((!me->preformatted && (c == ' ' || c == '\t'))) { + int new_cleanness = 3; + + if (me->write_pointer > (me->buffer + 1)) { + char delims[5]; + char *p; + + strcpy(delims, ",;:."); /* @@ english bias */ + p = strchr(delims, me->write_pointer[-2]); + if (p) + new_cleanness = (int) (p - delims + 6); + if (!me->in_attrval) + new_cleanness += 10; + } + allow_break(me, new_cleanness, YES); + } + + /* + * Flush buffer out when full, or whenever the line is over the nominal + * maximum and we can break at all + */ + if (me->write_pointer >= me->buffer + me->buffer_maxchars || + (me->overflowed && me->cleanness)) { + if (me->cleanness) { + char line_break_char = me->line_break[me->cleanness][0]; + char *saved = me->line_break[me->cleanness]; + + if (me->delete_line_break_char[me->cleanness]) + saved++; + me->line_break[me->cleanness][0] = '\n'; + (*me->targetClass.put_block) (me->target, + me->buffer, + (int) (me->line_break[me->cleanness] - + me->buffer + 1)); + me->line_break[me->cleanness][0] = line_break_char; + { /* move next line in */ + char *p = saved; + char *q; + + for (q = me->buffer; p < me->write_pointer;) + *q++ = *p++; + } + me->cleanness = 0; + /* Now we have to check whether ther are any perfectly good breaks + * which weren't good enough for the last line but may be good + * enough for the next + */ + { + int i; + + for (i = 0; i <= MAX_CLEANNESS; i++) { + if (me->line_break[i] != NULL && + me->line_break[i] > saved) { + me->line_break[i] = me->line_break[i] - + (saved - me->buffer); + me->cleanness = i; + } else { + me->line_break[i] = NULL; + } + } + } + + me->delete_line_break_char[0] = 0; + me->write_pointer = me->write_pointer - (saved - me->buffer); + me->overflowed = NO; + + } else { + (*me->targetClass.put_block) (me->target, + me->buffer, + me->buffer_maxchars); + me->write_pointer = me->buffer; + flush_breaks(me); + me->overflowed = YES; + } + } +} + +/* String handling + * --------------- + */ +static void HTMLGen_put_string(HTStructured * me, const char *s) +{ + const char *p; + + for (p = s; *p; p++) + HTMLGen_put_character(me, *p); +} + +static void HTMLGen_write(HTStructured * me, const char *s, + int l) +{ + const char *p; + + for (p = s; p < (s + l); p++) + HTMLGen_put_character(me, *p); +} + +/* Start Element + * ------------- + * + * Within the opening tag, there may be spaces and the line may be broken at + * these spaces. + */ +static int HTMLGen_start_element(HTStructured * me, int element_number, + const BOOL *present, + STRING2PTR value, + int charset GCC_UNUSED, + char **insert GCC_UNUSED) +{ + int i; + BOOL was_preformatted = me->preformatted; + HTTag *tag = &HTML_dtd.tags[element_number]; + +#if defined(USE_COLOR_STYLE) + char *title = NULL; + char *title_tmp = NULL; + + if (LYPreparsedSource) { + /* + * Same logic as in HTML_start_element, copied from there. - kw + */ + HTSprintf(&Style_className, ";%s", HTML_dtd.tags[element_number].name); + strcpy(myHash, HTML_dtd.tags[element_number].name); + if (class_string[0]) { + int len = (int) strlen(myHash); + + sprintf(myHash + len, ".%.*s", (int) sizeof(myHash) - len - 2, class_string); + HTSprintf(&Style_className, ".%s", class_string); + } + class_string[0] = '\0'; + strtolower(myHash); + hcode = hash_code(myHash); + strtolower(Style_className); + + if (TRACE_STYLE) { + fprintf(tfp, "CSSTRIM:%s -> %d", myHash, hcode); + if (hashStyles[hcode].code != hcode) { + char *rp = strrchr(myHash, '.'); + + fprintf(tfp, " (undefined) %s\n", myHash); + if (rp) { + int hcd; + + *rp = '\0'; /* trim the class */ + hcd = hash_code(myHash); + fprintf(tfp, "CSS:%s -> %d", myHash, hcd); + if (hashStyles[hcd].code != hcd) + fprintf(tfp, " (undefined) %s\n", myHash); + else + fprintf(tfp, " ca=%d\n", hashStyles[hcd].color); + } + } else + fprintf(tfp, " ca=%d\n", hashStyles[hcode].color); + } + + if (displayStyles[element_number + STARTAT].color > -2) { + CTRACE2(TRACE_STYLE, + (tfp, "CSSTRIM: start_element: top <%s>\n", + HTML_dtd.tags[element_number].name)); + do_cstyle_flush(me); + HText_characterStyle(me->text, hcode, 1); + } + } +#endif /* USE_COLOR_STYLE */ + me->preformatted = YES; /* free text within tags */ + HTMLGen_put_character(me, '<'); + HTMLGen_put_string(me, tag->name); + if (present) { + BOOL had_attr = NO; + + for (i = 0; i < tag->number_of_attributes; i++) { + if (present[i]) { + had_attr = YES; + HTMLGen_put_character(me, ' '); + allow_break(me, 11, YES); +#ifdef USE_COLOR_STYLE + /* + * Try to mimic HTML_start_element's special handling for + * HTML_LINK. If applicable, color the displayed attribute / + * value pairs differently. - kw + */ + if (LYPreparsedSource && + element_number == HTML_LINK && !title && + present[HTML_LINK_CLASS] && *value[HTML_LINK_CLASS] && + !present[HTML_LINK_REV] && + (present[HTML_LINK_REL] || present[HTML_LINK_HREF])) { + if (present[HTML_LINK_TITLE] && *value[HTML_LINK_TITLE]) { + StrAllocCopy(title, value[HTML_LINK_TITLE]); + LYTrimHead(title); + LYTrimTail(title); + } + if ((!title || *title == '\0') && present[HTML_LINK_REL]) { + StrAllocCopy(title, value[HTML_LINK_REL]); + } + if (title && *title) { + HTSprintf0(&title_tmp, "link.%s.%s", + value[HTML_LINK_CLASS], title); + CTRACE2(TRACE_STYLE, + (tfp, "CSSTRIM:link=%s\n", title_tmp)); + + do_cstyle_flush(me); + HText_characterStyle(me->text, hash_code(title_tmp), 1); + } + } +#endif + HTMLGen_put_string(me, tag->attributes[i].name); + if (value[i]) { + me->preformatted = was_preformatted; + me->in_attrval = YES; + if (strchr(value[i], '"') == NULL) { + HTMLGen_put_string(me, "=\""); + HTMLGen_put_string(me, value[i]); + HTMLGen_put_character(me, '"'); + } else if (strchr(value[i], '\'') == NULL) { + HTMLGen_put_string(me, "='"); + HTMLGen_put_string(me, value[i]); + HTMLGen_put_character(me, '\''); + } else { /* attribute value has both kinds of quotes */ + const char *p; + + HTMLGen_put_string(me, "=\""); + for (p = value[i]; *p; p++) { + if (*p != '"') { + HTMLGen_put_character(me, *p); + } else { + HTMLGen_put_string(me, """); + } + } + HTMLGen_put_character(me, '"'); + } + me->preformatted = YES; + me->in_attrval = NO; + } + } + } +#ifdef USE_COLOR_STYLE + if (had_attr && LYPreparsedSource && element_number == HTML_LINK) { + /* + * Clean up after special HTML_LINK handling - kw + */ + if (title && *title) { + do_cstyle_flush(me); + HText_characterStyle(me->text, hash_code(title_tmp), 0); + FREE(title_tmp); + } + FREE(title); + } +#endif + if (had_attr) + allow_break(me, 12, NO); + } + HTMLGen_put_string(me, ">"); /* got rid of \n LJM */ + + /* + * Make very specific HTML assumption that PRE can't be nested! + */ + me->preformatted = (BOOL) ((element_number == HTML_PRE) + ? YES + : was_preformatted); + + /* + * Can break after element start. + */ + if (!me->preformatted && tag->contents != SGML_EMPTY) { + if (HTML_dtd.tags[element_number].contents == SGML_ELEMENT) + allow_break(me, 15, NO); + else + allow_break(me, 2, NO); + } +#if defined(USE_COLOR_STYLE) + /* + * Same logic as in HTML_start_element, copied from there. - kw + */ + + /* end really empty tags straight away */ + if (LYPreparsedSource && ReallyEmptyTagNum(element_number)) { + CTRACE2(TRACE_STYLE, + (tfp, "STYLE:begin_element:ending EMPTY element style\n")); + do_cstyle_flush(me); + HText_characterStyle(me->text, hcode, STACK_OFF); + TrimColorClass(HTML_dtd.tags[element_number].name, + Style_className, &hcode); + } +#endif /* USE_COLOR_STYLE */ + if (element_number == HTML_OBJECT && tag->contents == SGML_LITTERAL) { + /* + * These conditions only approximate the ones used in HTML.c. Let our + * SGML parser know that further content is to be parsed normally not + * literally. - kw + */ + if (!present) { + return HT_PARSER_OTHER_CONTENT; + } else if (!present[HTML_OBJECT_DECLARE] && + !(present[HTML_OBJECT_NAME] && + value[HTML_OBJECT_NAME] && *value[HTML_OBJECT_NAME])) { + if (present[HTML_OBJECT_SHAPES] || + !(present[HTML_OBJECT_USEMAP] && + value[HTML_OBJECT_USEMAP] && *value[HTML_OBJECT_USEMAP])) + return HT_PARSER_OTHER_CONTENT; + } + } + return HT_OK; +} + +/* End Element + * ----------- + * + * When we end an element, the style must be returned to that in effect before + * that element. Note that anchors (etc?) don't have an associated style, so + * that we must scan down the stack for an element with a defined style. (In + * fact, the styles should be linked to the whole stack not just the top one.) + * TBL 921119 + */ +static int HTMLGen_end_element(HTStructured * me, int element_number, + char **insert GCC_UNUSED) +{ + if (!me->preformatted && + HTML_dtd.tags[element_number].contents != SGML_EMPTY) { + /* + * Can break before element end. + */ + if (HTML_dtd.tags[element_number].contents == SGML_ELEMENT) + allow_break(me, 14, NO); + else + allow_break(me, 1, NO); + } + HTMLGen_put_string(me, "</"); + HTMLGen_put_string(me, HTML_dtd.tags[element_number].name); + HTMLGen_put_character(me, '>'); + if (element_number == HTML_PRE) { + me->preformatted = NO; + } +#ifdef USE_COLOR_STYLE + /* + * Same logic as in HTML_end_element, copied from there. - kw + */ + TrimColorClass(HTML_dtd.tags[element_number].name, + Style_className, &hcode); + + if (LYPreparsedSource && !ReallyEmptyTagNum(element_number)) { + CTRACE2(TRACE_STYLE, + (tfp, "STYLE:end_element: ending non-EMPTY style\n")); + do_cstyle_flush(me); + HText_characterStyle(me->text, hcode, STACK_OFF); + } +#endif /* USE_COLOR_STYLE */ + return HT_OK; +} + +/* Expanding entities + * ------------------ + * + */ +static int HTMLGen_put_entity(HTStructured * me, int entity_number) +{ + int nent = (int) HTML_dtd.number_of_entities; + + HTMLGen_put_character(me, '&'); + if (entity_number < nent) { + HTMLGen_put_string(me, HTML_dtd.entity_names[entity_number]); + } + HTMLGen_put_character(me, ';'); + return HT_OK; +} + +/* Free an HTML object + * ------------------- + * + */ +static void HTMLGen_free(HTStructured * me) +{ + (*me->targetClass.put_character) (me->target, '\n'); + HTMLGen_flush(me); + (*me->targetClass._free) (me->target); /* ripple through */ +#ifdef USE_COLOR_STYLE + FREE(Style_className); +#endif + FREE(me); +} + +static void PlainToHTML_free(HTStructured * me) +{ + HTMLGen_end_element(me, HTML_PRE, 0); + HTMLGen_free(me); +} + +static void HTMLGen_abort(HTStructured * me, HTError e GCC_UNUSED) +{ + HTMLGen_free(me); +#ifdef USE_COLOR_STYLE + FREE(Style_className); +#endif +} + +static void PlainToHTML_abort(HTStructured * me, HTError e GCC_UNUSED) +{ + PlainToHTML_free(me); +} + +/* Structured Object Class + * ----------------------- + */ +static const HTStructuredClass HTMLGeneration = /* As opposed to print etc */ +{ + "HTMLGen", + HTMLGen_free, + HTMLGen_abort, + HTMLGen_put_character, HTMLGen_put_string, HTMLGen_write, + HTMLGen_start_element, HTMLGen_end_element, + HTMLGen_put_entity +}; + +/* Subclass-specific Methods + * ------------------------- + */ +HTStructured *HTMLGenerator(HTStream *output) +{ + HTStructured *me = (HTStructured *) malloc(sizeof(*me)); + + if (me == NULL) + outofmem(__FILE__, "HTMLGenerator"); + + assert(me != NULL); + + me->isa = &HTMLGeneration; + + me->target = output; + me->targetClass = *me->target->isa; /* Copy pointers to routines for speed */ + + me->write_pointer = me->buffer; + flush_breaks(me); + me->line_break[0] = me->buffer; + me->cleanness = 0; + me->overflowed = NO; + me->delete_line_break_char[0] = NO; + me->preformatted = NO; + me->in_attrval = NO; + + /* + * For what line length should we attempt to wrap ? - kw + */ + if (!LYPreparsedSource) { + me->buffer_maxchars = 80; /* work as before - kw */ + } else if (dump_output_width > 1) { + me->buffer_maxchars = dump_output_width; /* try to honor -width - kw */ + } else if (dump_output_immediately) { + me->buffer_maxchars = 80; /* try to honor -width - kw */ + } else { + me->buffer_maxchars = (LYcolLimit - 1); + if (me->buffer_maxchars < 38) /* too narrow, let GridText deal */ + me->buffer_maxchars = 40; + } + if (me->buffer_maxchars > 900) /* likely not true - kw */ + me->buffer_maxchars = 78; + if (me->buffer_maxchars > BUFFER_SIZE) /* must not be larger! */ + me->buffer_maxchars = BUFFER_SIZE - 2; + + /* + * If dump_output_immediately is set, there likely isn't anything after + * this stream to interpret the Lynx special chars. Also if they get + * displayed via HTPlain, that will probably make non-breaking space chars + * etc. invisible. So let's translate them to numerical character + * references. For debugging purposes we'll use the new hex format. + */ + me->escape_specials = LYPreparsedSource; + +#ifdef USE_COLOR_STYLE + me->text = NULL; /* Will be initialized when first needed. - kw */ + FREE(Style_className); + class_string[0] = '\0'; +#endif /* COLOR_STYLE */ + + return me; +} + +/* Stream Object Class + * ------------------- + * + * This object just converts a plain text stream into HTML + * It is officially a structured strem but only the stream bits exist. + * This is just the easiest way of typecasting all the routines. + */ +static const HTStructuredClass PlainToHTMLConversion = +{ + "plaintexttoHTML", + HTMLGen_free, + PlainToHTML_abort, + HTMLGen_put_character, + HTMLGen_put_string, + HTMLGen_write, + NULL, /* Structured stuff */ + NULL, + NULL +}; + +/* HTConverter from plain text to HTML Stream + * ------------------------------------------ + */ +HTStream *HTPlainToHTML(HTPresentation *pres GCC_UNUSED, + HTParentAnchor *anchor GCC_UNUSED, + HTStream *sink) +{ + HTStructured *me = (HTStructured *) malloc(sizeof(*me)); + + if (me == NULL) + outofmem(__FILE__, "PlainToHTML"); + + assert(me != NULL); + + me->isa = (const HTStructuredClass *) &PlainToHTMLConversion; + + /* + * Copy pointers to routines for speed. + */ + me->target = sink; + me->targetClass = *me->target->isa; + me->write_pointer = me->buffer; + flush_breaks(me); + me->cleanness = 0; + me->overflowed = NO; + me->delete_line_break_char[0] = NO; + /* try to honor -width - kw */ + me->buffer_maxchars = (dump_output_width > 1 ? + dump_output_width : 80); + + HTMLGen_put_string(me, "<HTML>\n<BODY>\n<PRE>\n"); + me->preformatted = YES; + me->escape_specials = NO; + me->in_attrval = NO; + return (HTStream *) me; +} diff --git a/WWW/Library/Implementation/HTMLGen.h b/WWW/Library/Implementation/HTMLGen.h new file mode 100644 index 00000000..f2db3d40 --- /dev/null +++ b/WWW/Library/Implementation/HTMLGen.h @@ -0,0 +1,26 @@ +/* /Net/dxcern/userd/timbl/hypertext/WWW/Library/Implementation/HTMLGen.html + HTML GENERATOR + + This module converts structed stream into stream. That is, given a stream + to write to, it will give you a structured stream to + + */ +#ifndef HTMLGEN_H +#define HTMLGEN_H + +#include <HTML.h> +#include <HTStream.h> + +#ifdef __cplusplus +extern "C" { +#endif + extern HTStructured *HTMLGenerator(HTStream *output); + + extern HTStream *HTPlainToHTML(HTPresentation *pres, + HTParentAnchor *anchor, + HTStream *sink); + +#ifdef __cplusplus +} +#endif +#endif /* HTMLGEN_H */ diff --git a/WWW/Library/Implementation/HTNews.c b/WWW/Library/Implementation/HTNews.c new file mode 100644 index 00000000..b4ea9291 --- /dev/null +++ b/WWW/Library/Implementation/HTNews.c @@ -0,0 +1,3127 @@ +/* + * $LynxId: HTNews.c,v 1.69 2011/06/11 12:10:55 tom Exp $ + * + * NEWS ACCESS HTNews.c + * =========== + * + * History: + * 26 Sep 90 Written TBL + * 29 Nov 91 Downgraded to C, for portable implementation. + */ + +#include <HTUtils.h> /* Coding convention macros */ + +#ifndef DISABLE_NEWS + +/* Implements: +*/ +#include <HTNews.h> + +#include <HTCJK.h> +#include <HTMIME.h> +#include <HTFont.h> +#include <HTFormat.h> +#include <HTTCP.h> +#include <LYUtils.h> +#include <LYStrings.h> + +#define NEWS_PORT 119 /* See rfc977 */ +#define SNEWS_PORT 563 /* See Lou Montulli */ +#define APPEND /* Use append methods */ +int HTNewsChunkSize = 30; /* Number of articles for quick display */ +int HTNewsMaxChunk = 40; /* Largest number of articles in one window */ + +#ifndef DEFAULT_NEWS_HOST +#define DEFAULT_NEWS_HOST "news" +#endif /* DEFAULT_NEWS_HOST */ + +#ifndef NEWS_SERVER_FILE +#define NEWS_SERVER_FILE "/usr/local/lib/rn/server" +#endif /* NEWS_SERVER_FILE */ + +#ifndef NEWS_AUTH_FILE +#define NEWS_AUTH_FILE ".newsauth" +#endif /* NEWS_AUTH_FILE */ + +#ifdef USE_SSL +static SSL *Handle = NULL; +static int channel_s = 1; + +#define NEWS_NETWRITE(sock, buff, size) \ + (Handle ? SSL_write(Handle, buff, size) : NETWRITE(sock, buff, size)) +#define NEWS_NETCLOSE(sock) \ + { (void)NETCLOSE(sock); if (Handle) { SSL_free(Handle); Handle = NULL; } } +static int HTNewsGetCharacter(void); + +#define NEXT_CHAR HTNewsGetCharacter() +#else +#define NEWS_NETWRITE NETWRITE +#define NEWS_NETCLOSE NETCLOSE +#define NEXT_CHAR HTGetCharacter() +#endif /* USE_SSL */ + +#include <HTML.h> +#include <HTAccess.h> +#include <HTParse.h> +#include <HTFormat.h> +#include <HTAlert.h> + +#include <LYNews.h> +#include <LYGlobalDefs.h> +#include <LYLeaks.h> + +#define SnipIn(d,fmt,len,s) sprintf(d, fmt, (int)sizeof(d)-len, s) +#define SnipIn2(d,fmt,tag,len,s) sprintf(d, fmt, tag, (int)sizeof(d)-len, s) + +struct _HTStructured { + const HTStructuredClass *isa; + /* ... */ +}; + +#define LINE_LENGTH 512 /* Maximum length of line of ARTICLE etc */ +#define GROUP_NAME_LENGTH 256 /* Maximum length of group name */ + +/* + * Module-wide variables. + */ +char *HTNewsHost = NULL; /* Default host */ +static char *NewsHost = NULL; /* Current host */ +static char *NewsHREF = NULL; /* Current HREF prefix */ +static int s; /* Socket for NewsHost */ +static int HTCanPost = FALSE; /* Current POST permission */ +static char response_text[LINE_LENGTH + 1]; /* Last response */ + +static HTStructured *target; /* The output sink */ +static HTStructuredClass targetClass; /* Copy of fn addresses */ +static HTStream *rawtarget = NULL; /* The output sink for rawtext */ +static HTStreamClass rawtargetClass; /* Copy of fn addresses */ +static int diagnostic; /* level: 0=none 2=source */ +static BOOL rawtext = NO; /* Flag: HEAD or -mime_headers */ +static HTList *NNTP_AuthInfo = NULL; /* AUTHINFO database */ +static char *name = NULL; +static char *address = NULL; +static char *dbuf = NULL; /* dynamic buffer for long messages etc. */ + +#define PUTC(c) (*targetClass.put_character)(target, c) +#define PUTS(s) (*targetClass.put_string)(target, s) +#define RAW_PUTS(s) (*rawtargetClass.put_string)(rawtarget, s) +#define START(e) (*targetClass.start_element)(target, e, 0, 0, -1, 0) +#define END(e) (*targetClass.end_element)(target, e, 0) +#define MAYBE_END(e) if (HTML_dtd.tags[e].contents != SGML_EMPTY) \ + (*targetClass.end_element)(target, e, 0) +#define FREE_TARGET if (rawtext) (*rawtargetClass._free)(rawtarget); \ + else (*targetClass._free)(target) +#define ABORT_TARGET if (rawtext) (*rawtargetClass._abort)(rawtarget, NULL); \ + else (*targetClass._abort)(target, NULL) + +typedef struct _NNTPAuth { + char *host; + char *user; + char *pass; +} NNTPAuth; + +#ifdef LY_FIND_LEAKS +static void free_news_globals(void) +{ + if (s >= 0) { + NEWS_NETCLOSE(s); + s = -1; + } + FREE(HTNewsHost); + FREE(NewsHost); + FREE(NewsHREF); + FREE(name); + FREE(address); + FREE(dbuf); +} +#endif /* LY_FIND_LEAKS */ + +static void free_NNTP_AuthInfo(void) +{ + HTList *cur = NNTP_AuthInfo; + NNTPAuth *auth = NULL; + + if (!cur) + return; + + while (NULL != (auth = (NNTPAuth *) HTList_nextObject(cur))) { + FREE(auth->host); + FREE(auth->user); + FREE(auth->pass); + FREE(auth); + } + HTList_delete(NNTP_AuthInfo); + NNTP_AuthInfo = NULL; + return; +} + +/* + * Initialize the authentication list by loading the user's $HOME/.newsauth + * file. That file is part of tin's configuration and is used by a few other + * programs. + */ +static void load_NNTP_AuthInfo(void) +{ + FILE *fp; + char fname[LY_MAXPATH]; + char buffer[LINE_LENGTH + 1]; + + LYAddPathToHome(fname, sizeof(fname), NEWS_AUTH_FILE); + + if ((fp = fopen(fname, "r")) != 0) { + while (fgets(buffer, (int) sizeof(buffer), fp) != 0) { + char the_host[LINE_LENGTH + 1]; + char the_pass[LINE_LENGTH + 1]; + char the_user[LINE_LENGTH + 1]; + + if (sscanf(buffer, "%s%s%s", the_host, the_pass, the_user) == 3 + && strlen(the_host) != 0 + && strlen(the_pass) != 0 + && strlen(the_user) != 0) { + NNTPAuth *auth = typecalloc(NNTPAuth); + + if (auth == NULL) + break; + StrAllocCopy(auth->host, the_host); + StrAllocCopy(auth->pass, the_pass); + StrAllocCopy(auth->user, the_user); + + HTList_appendObject(NNTP_AuthInfo, auth); + } + } + fclose(fp); + } +} + +const char *HTGetNewsHost(void) +{ + return HTNewsHost; +} + +void HTSetNewsHost(const char *value) +{ + StrAllocCopy(HTNewsHost, value); +} + +/* Initialisation for this module + * ------------------------------ + * + * Except on the NeXT, we pick up the NewsHost name from + * + * 1. Environment variable NNTPSERVER + * 2. File NEWS_SERVER_FILE + * 3. Compilation time macro DEFAULT_NEWS_HOST + * 4. Default to "news" + * + * On the NeXT, we pick up the NewsHost name from, in order: + * + * 1. WorldWideWeb default "NewsHost" + * 2. Global default "NewsHost" + * 3. News default "NewsHost" + * 4. Compilation time macro DEFAULT_NEWS_HOST + * 5. Default to "news" + */ +static BOOL initialized = NO; +static BOOL initialize(void) +{ +#ifdef NeXTStep + char *cp = NULL; +#endif + + /* + * Get name of Host. + */ +#ifdef NeXTStep + if ((cp = NXGetDefaultValue("WorldWideWeb", "NewsHost")) == 0) { + if ((cp = NXGetDefaultValue("News", "NewsHost")) == 0) { + StrAllocCopy(HTNewsHost, DEFAULT_NEWS_HOST); + } + } + if (cp) { + StrAllocCopy(HTNewsHost, cp); + cp = NULL; + } +#else + if (LYGetEnv("NNTPSERVER")) { + StrAllocCopy(HTNewsHost, LYGetEnv("NNTPSERVER")); + CTRACE((tfp, "HTNews: NNTPSERVER defined as `%s'\n", + HTNewsHost)); + } else { + FILE *fp = fopen(NEWS_SERVER_FILE, TXT_R); + + if (fp) { + char server_name[MAXHOSTNAMELEN + 1]; + + if (fgets(server_name, (int) sizeof server_name, fp) != NULL) { + char *p = strchr(server_name, '\n'); + + if (p != NULL) + *p = '\0'; + StrAllocCopy(HTNewsHost, server_name); + CTRACE((tfp, "HTNews: File %s defines news host as `%s'\n", + NEWS_SERVER_FILE, HTNewsHost)); + } + fclose(fp); + } + } + if (!HTNewsHost) + StrAllocCopy(HTNewsHost, DEFAULT_NEWS_HOST); +#endif /* NeXTStep */ + + s = -1; /* Disconnected */ +#ifdef LY_FIND_LEAKS + atexit(free_news_globals); +#endif + return YES; +} + +/* Send NNTP Command line to remote host & Check Response + * ------------------------------------------------------ + * + * On entry, + * command points to the command to be sent, including CRLF, or is null + * pointer if no command to be sent. + * On exit, + * Negative status indicates transmission error, socket closed. + * Positive status is an NNTP status. + */ +static int response(char *command) +{ + int result; + char *p = response_text; + int ich; + + if (command) { + int status; + int length = (int) strlen(command); + + CTRACE((tfp, "NNTP command to be sent: %s", command)); +#ifdef NOT_ASCII + { + const char *p2; + char *q; + char ascii[LINE_LENGTH + 1]; + + for (p2 = command, q = ascii; *p2; p2++, q++) { + *q = TOASCII(*p2); + } + status = NEWS_NETWRITE(s, ascii, length); + } +#else + status = (int) NEWS_NETWRITE(s, (char *) command, length); +#endif /* NOT_ASCII */ + if (status < 0) { + CTRACE((tfp, "HTNews: Unable to send command. Disconnecting.\n")); + NEWS_NETCLOSE(s); + s = -1; + return status; + } /* if bad status */ + } + /* if command to be sent */ + for (;;) { + ich = NEXT_CHAR; + if (((*p++ = (char) ich) == LF) || + (p == &response_text[LINE_LENGTH])) { + *--p = '\0'; /* Terminate the string */ + CTRACE((tfp, "NNTP Response: %s\n", response_text)); + sscanf(response_text, "%d", &result); + return result; + } + /* if end of line */ + if (ich == EOF) { + *(p - 1) = '\0'; + if (interrupted_in_htgetcharacter) { + CTRACE((tfp, + "HTNews: Interrupted on read, closing socket %d\n", + s)); + } else { + CTRACE((tfp, "HTNews: EOF on read, closing socket %d\n", + s)); + } + NEWS_NETCLOSE(s); /* End of file, close socket */ + s = -1; + if (interrupted_in_htgetcharacter) { + interrupted_in_htgetcharacter = 0; + return (HT_INTERRUPTED); + } + return ((int) EOF); /* End of file on response */ + } + } /* Loop over characters */ +} + +/* Case insensitive string comparisons + * ----------------------------------- + * + * On entry, + * template must be already in upper case. + * unknown may be in upper or lower or mixed case to match. + */ +static BOOL match(const char *unknown, const char *ctemplate) +{ + const char *u = unknown; + const char *t = ctemplate; + + for (; *u && *t && (TOUPPER(*u) == *t); u++, t++) ; /* Find mismatch or end */ + return (BOOL) (*t == 0); /* OK if end of template */ +} + +typedef enum { + NNTPAUTH_ERROR = 0, /* general failure */ + NNTPAUTH_OK = 281, /* authenticated successfully */ + NNTPAUTH_CLOSE = 502 /* server probably closed connection */ +} NNTPAuthResult; + +/* + * This function handles nntp authentication. - FM + */ +static NNTPAuthResult HTHandleAuthInfo(char *host) +{ + HTList *cur = NULL; + NNTPAuth *auth = NULL; + char *UserName = NULL; + char *PassWord = NULL; + char *msg = NULL; + char buffer[512]; + int status, tries; + + /* + * Make sure we have a host. - FM + */ + if (isEmpty(host)) + return NNTPAUTH_ERROR; + + /* + * Check for an existing authorization entry. - FM + */ + if (NNTP_AuthInfo == NULL) { + NNTP_AuthInfo = HTList_new(); + load_NNTP_AuthInfo(); +#ifdef LY_FIND_LEAKS + atexit(free_NNTP_AuthInfo); +#endif + } + + cur = NNTP_AuthInfo; + while (NULL != (auth = (NNTPAuth *) HTList_nextObject(cur))) { + if (!strcmp(auth->host, host)) { + UserName = auth->user; + PassWord = auth->pass; + break; + } + } + + /* + * Handle the username. - FM + */ + buffer[sizeof(buffer) - 1] = '\0'; + tries = 3; + + while (tries) { + if (UserName == NULL) { + HTSprintf0(&msg, gettext("Username for news host '%s':"), host); + UserName = HTPrompt(msg, NULL); + FREE(msg); + if (!(UserName && *UserName)) { + FREE(UserName); + return NNTPAUTH_ERROR; + } + } + sprintf(buffer, "AUTHINFO USER %.*s%c%c", + (int) sizeof(buffer) - 17, UserName, CR, LF); + if ((status = response(buffer)) < 0) { + if (status == HT_INTERRUPTED) + _HTProgress(CONNECTION_INTERRUPTED); + else + HTAlert(FAILED_CONNECTION_CLOSED); + if (auth) { + if (auth->user != UserName) { + FREE(auth->user); + auth->user = UserName; + } + } else { + FREE(UserName); + } + return NNTPAUTH_CLOSE; + } + if (status == 281) { + /* + * Username is accepted and no password is required. - FM + */ + if (auth) { + if (auth->user != UserName) { + FREE(auth->user); + auth->user = UserName; + } + } else { + /* + * Store the accepted username and no password. - FM + */ + if ((auth = typecalloc(NNTPAuth)) != NULL) { + StrAllocCopy(auth->host, host); + auth->user = UserName; + HTList_appendObject(NNTP_AuthInfo, auth); + } + } + return NNTPAUTH_OK; + } + if (status != 381) { + /* + * Not success, nor a request for the password, so it must be an + * error. - FM + */ + HTAlert(response_text); + tries--; + if ((tries > 0) && HTConfirm(gettext("Change username?"))) { + if (!auth || auth->user != UserName) { + FREE(UserName); + } + if ((UserName = HTPrompt(gettext("Username:"), UserName)) + != NULL && + *UserName) { + continue; + } + } + if (auth) { + if (auth->user != UserName) { + FREE(auth->user); + } + FREE(auth->pass); + } + FREE(UserName); + return NNTPAUTH_ERROR; + } + break; + } + + if (status == 381) { + /* + * Handle the password. - FM + */ + tries = 3; + while (tries) { + if (PassWord == NULL) { + HTSprintf0(&msg, gettext("Password for news host '%s':"), host); + PassWord = HTPromptPassword(msg); + FREE(msg); + if (!(PassWord && *PassWord)) { + FREE(PassWord); + return NNTPAUTH_ERROR; + } + } + sprintf(buffer, "AUTHINFO PASS %.*s%c%c", + (int) sizeof(buffer) - 17, PassWord, CR, LF); + if ((status = response(buffer)) < 0) { + if (status == HT_INTERRUPTED) { + _HTProgress(CONNECTION_INTERRUPTED); + } else { + HTAlert(FAILED_CONNECTION_CLOSED); + } + if (auth) { + if (auth->user != UserName) { + FREE(auth->user); + auth->user = UserName; + } + if (auth->pass != PassWord) { + FREE(auth->pass); + auth->pass = PassWord; + } + } else { + FREE(UserName); + FREE(PassWord); + } + return NNTPAUTH_CLOSE; + } + if (status == 502) { + /* + * That's what INN's nnrpd returns. It closes the connection + * after this. - kw + */ + HTAlert(response_text); + if (auth) { + if (auth->user == UserName) + UserName = NULL; + FREE(auth->user); + if (auth->pass == PassWord) + PassWord = NULL; + FREE(auth->pass); + } + FREE(UserName); + FREE(PassWord); + return NNTPAUTH_CLOSE; + } + if (status == 281) { + /* + * Password also is accepted, and everything has been stored. + * - FM + */ + if (auth) { + if (auth->user != UserName) { + FREE(auth->user); + auth->user = UserName; + } + if (auth->pass != PassWord) { + FREE(auth->pass); + auth->pass = PassWord; + } + } else { + if ((auth = typecalloc(NNTPAuth)) != NULL) { + StrAllocCopy(auth->host, host); + auth->user = UserName; + auth->pass = PassWord; + HTList_appendObject(NNTP_AuthInfo, auth); + } + } + return NNTPAUTH_OK; + } + /* + * Not success, so it must be an error. - FM + */ + HTAlert(response_text); + if (!auth || auth->pass != PassWord) { + FREE(PassWord); + } else { + PassWord = NULL; + } + tries--; + if ((tries > 0) && HTConfirm(gettext("Change password?"))) { + continue; + } + if (auth) { + if (auth->user == UserName) + UserName = NULL; + FREE(auth->user); + FREE(auth->pass); + } + FREE(UserName); + break; + } + } + + return NNTPAUTH_ERROR; +} + +/* Find Author's name in mail address + * ---------------------------------- + * + * On exit, + * Returns allocated string which cannot be freed by the + * calling function, and is reallocated on subsequent calls + * to this function. + * + * For example, returns "Tim Berners-Lee" if given any of + * " Tim Berners-Lee <tim@online.cern.ch> " + * or " tim@online.cern.ch ( Tim Berners-Lee ) " + */ +static char *author_name(char *email) +{ + char *p, *e; + + StrAllocCopy(name, email); + CTRACE((tfp, "Trying to find name in: %s\n", name)); + + if ((p = strrchr(name, '(')) && (e = strrchr(name, ')'))) { + if (e > p) { + *e = '\0'; /* Chop off everything after the ')' */ + return HTStrip(p + 1); /* Remove leading and trailing spaces */ + } + } + + if ((p = strrchr(name, '<')) && (e = strrchr(name, '>'))) { + if (e++ > p) { + while ((*p++ = *e++) != 0) /* Remove <...> */ + ; + return HTStrip(name); /* Remove leading and trailing spaces */ + } + } + + return HTStrip(name); /* Default to the whole thing */ +} + +/* Find Author's mail address + * -------------------------- + * + * On exit, + * Returns allocated string which cannot be freed by the + * calling function, and is reallocated on subsequent calls + * to this function. + * + * For example, returns "montulli@spaced.out.galaxy.net" if given any of + * " Lou Montulli <montulli@spaced.out.galaxy.net> " + * or " montulli@spaced.out.galaxy.net ( Lou "The Stud" Montulli ) " + */ +static char *author_address(char *email) +{ + char *p, *at, *e; + + StrAllocCopy(address, email); + CTRACE((tfp, "Trying to find address in: %s\n", address)); + + if ((p = strrchr(address, '<'))) { + if ((e = strrchr(p, '>')) && (at = strrchr(p, '@'))) { + if (at < e) { + *e = '\0'; /* Remove > */ + return HTStrip(p + 1); /* Remove leading and trailing spaces */ + } + } + } + + if ((p = strrchr(address, '(')) && + (e = strrchr(address, ')')) && (at = strchr(address, '@'))) { + if (e > p && at < e) { + *p = '\0'; /* Chop off everything after the ')' */ + return HTStrip(address); /* Remove leading and trailing spaces */ + } + } + + if ((at = strrchr(address, '@')) && at > address) { + p = (at - 1); + e = (at + 1); + while (p > address && !isspace(UCH(*p))) + p--; + while (*e && !isspace(UCH(*e))) + e++; + *e = 0; + return HTStrip(p); + } + + /* + * Default to the first word. + */ + p = address; + while (isspace(UCH(*p))) + p++; /* find first non-space */ + e = p; + while (!isspace(UCH(*e)) && *e != '\0') + e++; /* find next space or end */ + *e = '\0'; /* terminate space */ + + return (p); +} + +/* Start anchor element + * -------------------- + */ +static void start_anchor(const char *href) +{ + BOOL present[HTML_A_ATTRIBUTES]; + const char *value[HTML_A_ATTRIBUTES]; + int i; + + for (i = 0; i < HTML_A_ATTRIBUTES; i++) + present[i] = (BOOL) (i == HTML_A_HREF); + value[HTML_A_HREF] = href; + (*targetClass.start_element) (target, HTML_A, present, value, -1, 0); +} + +/* Start link element + * ------------------ + */ +static void start_link(const char *href, const char *rev) +{ + BOOL present[HTML_LINK_ATTRIBUTES]; + const char *value[HTML_LINK_ATTRIBUTES]; + int i; + + for (i = 0; i < HTML_LINK_ATTRIBUTES; i++) + present[i] = (BOOL) (i == HTML_LINK_HREF || i == HTML_LINK_REV); + value[HTML_LINK_HREF] = href; + value[HTML_LINK_REV] = rev; + (*targetClass.start_element) (target, HTML_LINK, present, value, -1, 0); +} + +/* Start list element + * ------------------ + */ +static void start_list(int seqnum) +{ + BOOL present[HTML_OL_ATTRIBUTES]; + const char *value[HTML_OL_ATTRIBUTES]; + char SeqNum[20]; + int i; + + for (i = 0; i < HTML_OL_ATTRIBUTES; i++) + present[i] = (BOOL) (i == HTML_OL_SEQNUM || i == HTML_OL_START); + sprintf(SeqNum, "%d", seqnum); + value[HTML_OL_SEQNUM] = SeqNum; + value[HTML_OL_START] = SeqNum; + (*targetClass.start_element) (target, HTML_OL, present, value, -1, 0); +} + +/* Paste in an Anchor + * ------------------ + * + * + * On entry, + * HT has a selection of zero length at the end. + * text points to the text to be put into the file, 0 terminated. + * addr points to the hypertext reference address, + * terminated by white space, comma, NULL or '>' + */ +static void write_anchor(const char *text, const char *addr) +{ + char href[LINE_LENGTH + 1]; + const char *p; + char *q; + + for (p = addr; *p && (*p != '>') && !WHITE(*p) && (*p != ','); p++) { + ; + } + if (strlen(NewsHREF) + (size_t) (p - addr) + 1 < sizeof(href)) { + q = href; + strcpy(q, NewsHREF); + /* Make complete hypertext reference */ + StrNCat(q, addr, (size_t) (p - addr)); + } else { + q = NULL; + HTSprintf0(&q, "%s%.*s", NewsHREF, (int) (p - addr), addr); + } + + start_anchor(q); + PUTS(text); + END(HTML_A); + + if (q != href) + FREE(q); +} + +/* Write list of anchors + * --------------------- + * + * We take a pointer to a list of objects, and write out each, + * generating an anchor for each. + * + * On entry, + * HT has a selection of zero length at the end. + * text points to a comma or space separated list of addresses. + * On exit, + * *text is NOT any more chopped up into substrings. + */ +static void write_anchors(char *text) +{ + char *start = text; + char *end; + char c; + + for (;;) { + for (; *start && (WHITE(*start)); start++) ; /* Find start */ + if (!*start) + return; /* (Done) */ + for (end = start; + *end && (*end != ' ') && (*end != ','); end++) ; /* Find end */ + if (*end) + end++; /* Include comma or space but not NULL */ + c = *end; + *end = '\0'; + if (*start == '<') + write_anchor(start, start + 1); + else + write_anchor(start, start); + START(HTML_BR); + *end = c; + start = end; /* Point to next one */ + } +} + +/* Abort the connection abort_socket + * -------------------- + */ +static void abort_socket(void) +{ + CTRACE((tfp, "HTNews: EOF on read, closing socket %d\n", s)); + NEWS_NETCLOSE(s); /* End of file, close socket */ + if (rawtext) { + RAW_PUTS("Network Error: connection lost\n"); + } else { + PUTS("Network Error: connection lost"); + PUTC('\n'); + } + s = -1; /* End of file on response */ +} + +/* + * Determine if a line is a valid header line. valid_header + * ------------------------------------------- + */ +static BOOLEAN valid_header(char *line) +{ + char *colon, *space; + + /* + * Blank or tab in first position implies this is a continuation header. + */ + if (line[0] == ' ' || line[0] == '\t') + return (TRUE); + + /* + * Just check for initial letter, colon, and space to make sure we discard + * only invalid headers. + */ + colon = strchr(line, ':'); + space = strchr(line, ' '); + if (isalpha(UCH(line[0])) && colon && space == colon + 1) + return (TRUE); + + /* + * Anything else is a bad header -- it should be ignored. + */ + return (FALSE); +} + +/* post in an Article post_article + * ------------------ + * (added by FM, modeled on Lynx's previous mini inews) + * + * Note the termination condition of a single dot on a line by itself. + * + * On entry, + * s Global socket number is OK + * postfile file with header and article to post. + */ +static void post_article(char *postfile) +{ + char line[512]; + char buf[512]; + char crlf[3]; + char *cp; + int status; + FILE *fd; + int in_header = 1, seen_header = 0, seen_fromline = 0; + int blen = 0, llen = 0; + + /* + * Open the temporary file with the nntp headers and message body. - FM + */ + if ((fd = fopen(NonNull(postfile), TXT_R)) == NULL) { + HTAlert(FAILED_CANNOT_OPEN_POST); + return; + } + + /* + * Read the temporary file and post in maximum 512 byte chunks. - FM + */ + buf[0] = '\0'; + sprintf(crlf, "%c%c", CR, LF); + while (fgets(line, (int) sizeof(line) - 2, fd) != NULL) { + if ((cp = strchr(line, '\n')) != NULL) + *cp = '\0'; + if (line[0] == '.') { + /* + * A single '.' means end of transmission for nntp. Lead dots on + * lines normally are trimmed and the EOF is not registered if the + * dot was not followed by CRLF. We prepend an extra dot for any + * line beginning with one, to retain the one intended, as well as + * avoid a false EOF signal. We know we have room for it in the + * buffer, because we normally send when it would exceed 510. - FM + */ + strcat(buf, "."); + blen++; + } + llen = (int) strlen(line); + if (in_header && !strncasecomp(line, "From:", 5)) { + seen_header = 1; + seen_fromline = 1; + } + if (in_header && line[0] == '\0') { + if (seen_header) { + in_header = 0; + if (!seen_fromline) { + if (blen >= (int) sizeof(buf) - 35) { + IGNORE_RC(NEWS_NETWRITE(s, buf, blen)); + buf[blen = 0] = 0; + } + strcat(buf, "From: anonymous@nowhere.you.know"); + strcat(buf, crlf); + blen += 34; + } + } else { + continue; + } + } else if (in_header) { + if (valid_header(line)) { + seen_header = 1; + } else { + continue; + } + } + strcat(line, crlf); + llen += 2; + if ((blen + llen) >= (int) sizeof(buf) - 1) { + IGNORE_RC(NEWS_NETWRITE(s, buf, blen)); + buf[blen = 0] = 0; + } + strcat(buf, line); + blen += llen; + } + fclose(fd); + HTSYS_remove(postfile); + + /* + * Send the nntp EOF and get the server's response. - FM + */ + if (blen >= (int) sizeof(buf) - 4) { + IGNORE_RC(NEWS_NETWRITE(s, buf, blen)); + buf[blen = 0] = 0; + } + strcat(buf, "."); + strcat(buf, crlf); + blen += 3; + IGNORE_RC(NEWS_NETWRITE(s, buf, blen)); + + status = response(NULL); + if (status == 240) { + /* + * Successful post. - FM + */ + HTProgress(response_text); + } else { + /* + * Shucks, something went wrong. - FM + */ + HTAlert(response_text); + } +} + +#ifdef NEWS_DEBUG +/* for DEBUG 1997/11/07 (Fri) 17:20:16 */ +void debug_print(unsigned char *p) +{ + while (*p) { + if (*p == '\0') + break; + if (*p == 0x1b) + printf("[ESC]"); + else if (*p == '\n') + printf("[NL]"); + else if (*p < ' ' || *p >= 0x80) + printf("(%02x)", *p); + else + putchar(*p); + p++; + } + printf("]\n"); +} +#endif + +static char *decode_mime(char **str) +{ + static char empty[] = ""; + +#ifdef SH_EX + if (HTCJK != JAPANESE) + return *str; +#endif + HTmmdecode(str, *str); + return HTrjis(str, *str) ? *str : empty; +} + +/* Read in an Article read_article + * ------------------ + * + * Note the termination condition of a single dot on a line by itself. + * RFC 977 specifies that the line "folding" of RFC850 is not used, so we + * do not handle it here. + * + * On entry, + * s Global socket number is OK + * HT Global hypertext object is ready for appending text + */ +static int read_article(HTParentAnchor *thisanchor) +{ + char line[LINE_LENGTH + 1]; + char *full_line = NULL; + char *subject = NULL; /* Subject string */ + char *from = NULL; /* From string */ + char *replyto = NULL; /* Reply-to string */ + char *date = NULL; /* Date string */ + char *organization = NULL; /* Organization string */ + char *references = NULL; /* Hrefs for other articles */ + char *newsgroups = NULL; /* Newsgroups list */ + char *followupto = NULL; /* Followup list */ + char *href = NULL; + char *p = line; + char *cp; + const char *ccp; + BOOL done = NO; + + /* + * Read in the HEADer of the article. + * + * The header fields are either ignored, or formatted and put into the + * text. + */ + if (!diagnostic && !rawtext) { + while (!done) { + int ich = NEXT_CHAR; + + *p++ = (char) ich; + if (ich == EOF) { + if (interrupted_in_htgetcharacter) { + interrupted_in_htgetcharacter = 0; + CTRACE((tfp, + "HTNews: Interrupted on read, closing socket %d\n", + s)); + NEWS_NETCLOSE(s); + s = -1; + return (HT_INTERRUPTED); + } + abort_socket(); /* End of file, close socket */ + return (HT_LOADED); /* End of file on response */ + } + if (((char) ich == LF) || (p == &line[LINE_LENGTH])) { + *--p = '\0'; /* Terminate the string */ + CTRACE((tfp, "H %s\n", line)); + + if (line[0] == '\t' || line[0] == ' ') { + int i = 0; + + while (line[i]) { + if (line[i] == '\t') + line[i] = ' '; + i++; + } + if (full_line == NULL) { + StrAllocCopy(full_line, line); + } else { + StrAllocCat(full_line, line); + } + } else { + StrAllocCopy(full_line, line); + } + + if (full_line[0] == '.') { + /* + * End of article? + */ + if (UCH(full_line[1]) < ' ') { + done = YES; + break; + } + } else if (UCH(full_line[0]) < ' ') { + break; /* End of Header? */ + + } else if (match(full_line, "SUBJECT:")) { + StrAllocCopy(subject, HTStrip(strchr(full_line, ':') + 1)); + decode_mime(&subject); + } else if (match(full_line, "DATE:")) { + StrAllocCopy(date, HTStrip(strchr(full_line, ':') + 1)); + + } else if (match(full_line, "ORGANIZATION:")) { + StrAllocCopy(organization, + HTStrip(strchr(full_line, ':') + 1)); + decode_mime(&organization); + + } else if (match(full_line, "FROM:")) { + StrAllocCopy(from, HTStrip(strchr(full_line, ':') + 1)); + decode_mime(&from); + + } else if (match(full_line, "REPLY-TO:")) { + StrAllocCopy(replyto, HTStrip(strchr(full_line, ':') + 1)); + decode_mime(&replyto); + + } else if (match(full_line, "NEWSGROUPS:")) { + StrAllocCopy(newsgroups, HTStrip(strchr(full_line, ':') + 1)); + + } else if (match(full_line, "REFERENCES:")) { + StrAllocCopy(references, HTStrip(strchr(full_line, ':') + 1)); + + } else if (match(full_line, "FOLLOWUP-TO:")) { + StrAllocCopy(followupto, HTStrip(strchr(full_line, ':') + 1)); + + } else if (match(full_line, "MESSAGE-ID:")) { + char *msgid = HTStrip(full_line + 11); + + if (msgid[0] == '<' && msgid[strlen(msgid) - 1] == '>') { + msgid[strlen(msgid) - 1] = '\0'; /* Chop > */ + msgid++; /* Chop < */ + HTAnchor_setMessageID(thisanchor, msgid); + } + + } /* end if match */ + p = line; /* Restart at beginning */ + } /* if end of line */ + } /* Loop over characters */ + FREE(full_line); + + START(HTML_HEAD); + PUTC('\n'); + START(HTML_TITLE); + if (subject && *subject != '\0') + PUTS(subject); + else + PUTS("No Subject"); + END(HTML_TITLE); + PUTC('\n'); + /* + * Put in the owner as a link rel. + */ + if (from || replyto) { + char *temp = NULL; + + StrAllocCopy(temp, author_address(replyto ? replyto : from)); + StrAllocCopy(href, STR_MAILTO_URL); + if (strchr(temp, '%') || strchr(temp, '?')) { + cp = HTEscape(temp, URL_XPALPHAS); + StrAllocCat(href, cp); + FREE(cp); + } else { + StrAllocCat(href, temp); + } + start_link(href, "made"); + PUTC('\n'); + FREE(temp); + } + END(HTML_HEAD); + PUTC('\n'); + + START(HTML_H1); + if (subject && *subject != '\0') + PUTS(subject); + else + PUTS("No Subject"); + END(HTML_H1); + PUTC('\n'); + + if (subject) + FREE(subject); + + START(HTML_DLC); + PUTC('\n'); + + if (from || replyto) { + START(HTML_DT); + START(HTML_B); + PUTS("From:"); + END(HTML_B); + PUTC(' '); + if (from) + PUTS(from); + else + PUTS(replyto); + MAYBE_END(HTML_DT); + PUTC('\n'); + + if (!replyto) + StrAllocCopy(replyto, from); + START(HTML_DT); + START(HTML_B); + PUTS("Reply to:"); + END(HTML_B); + PUTC(' '); + start_anchor(href); + if (*replyto != '<') + PUTS(author_name(replyto)); + else + PUTS(author_address(replyto)); + END(HTML_A); + START(HTML_BR); + MAYBE_END(HTML_DT); + PUTC('\n'); + + FREE(from); + FREE(replyto); + } + + if (date) { + START(HTML_DT); + START(HTML_B); + PUTS("Date:"); + END(HTML_B); + PUTC(' '); + PUTS(date); + MAYBE_END(HTML_DT); + PUTC('\n'); + FREE(date); + } + + if (organization) { + START(HTML_DT); + START(HTML_B); + PUTS("Organization:"); + END(HTML_B); + PUTC(' '); + PUTS(organization); + MAYBE_END(HTML_DT); + PUTC('\n'); + FREE(organization); + } + + /* sanitize some headers - kw */ + if (newsgroups && + ((cp = strchr(newsgroups, '/')) || + (cp = strchr(newsgroups, '(')))) { + *cp = '\0'; + } + if (newsgroups && !*newsgroups) { + FREE(newsgroups); + } + if (followupto && + ((cp = strchr(followupto, '/')) || + (cp = strchr(followupto, '(')))) { + *cp = '\0'; + } + if (followupto && !*followupto) { + FREE(followupto); + } + + if (newsgroups && HTCanPost) { + START(HTML_DT); + START(HTML_B); + PUTS("Newsgroups:"); + END(HTML_B); + PUTC('\n'); + MAYBE_END(HTML_DT); + START(HTML_DD); + write_anchors(newsgroups); + MAYBE_END(HTML_DD); + PUTC('\n'); + } + + if (followupto && !strcasecomp(followupto, "poster")) { + /* + * "Followup-To: poster" has special meaning. Don't use it to + * construct a newsreply link. -kw + */ + START(HTML_DT); + START(HTML_B); + PUTS("Followup to:"); + END(HTML_B); + PUTC(' '); + if (href) { + start_anchor(href); + PUTS("poster"); + END(HTML_A); + } else { + PUTS("poster"); + } + MAYBE_END(HTML_DT); + PUTC('\n'); + FREE(followupto); + } + + if (newsgroups && HTCanPost) { + /* + * We have permission to POST to this host, so add a link for + * posting followups for this article. - FM + */ + if (!strncasecomp(NewsHREF, STR_SNEWS_URL, 6)) + StrAllocCopy(href, "snewsreply://"); + else + StrAllocCopy(href, "newsreply://"); + StrAllocCat(href, NewsHost); + StrAllocCat(href, "/"); + StrAllocCat(href, (followupto ? followupto : newsgroups)); + if (*href == 'n' && + (ccp = HTAnchor_messageID(thisanchor)) && *ccp) { + StrAllocCat(href, ";ref="); + if (strchr(ccp, '<') || strchr(ccp, '&') || + strchr(ccp, ' ') || strchr(ccp, ':') || + strchr(ccp, '/') || strchr(ccp, '%') || + strchr(ccp, ';')) { + char *cp1 = HTEscape(ccp, URL_XPALPHAS); + + StrAllocCat(href, cp1); + FREE(cp1); + } else { + StrAllocCat(href, ccp); + } + } + + START(HTML_DT); + START(HTML_B); + PUTS("Followup to:"); + END(HTML_B); + PUTC(' '); + start_anchor(href); + if (strchr((followupto ? followupto : newsgroups), ',')) { + PUTS("newsgroups"); + } else { + PUTS("newsgroup"); + } + END(HTML_A); + MAYBE_END(HTML_DT); + PUTC('\n'); + } + FREE(newsgroups); + FREE(followupto); + + if (references) { + START(HTML_DT); + START(HTML_B); + PUTS("References:"); + END(HTML_B); + MAYBE_END(HTML_DT); + PUTC('\n'); + START(HTML_DD); + write_anchors(references); + MAYBE_END(HTML_DD); + PUTC('\n'); + FREE(references); + } + + END(HTML_DLC); + PUTC('\n'); + FREE(href); + } + + if (rawtext) { + /* + * No tags, and never do a PUTC. - kw + */ + ; + } else if (diagnostic) { + /* + * Read in the HEAD and BODY of the Article as XMP formatted text. - + * FM + */ + START(HTML_XMP); + PUTC('\n'); + } else { + /* + * Read in the BODY of the Article as PRE formatted text. - FM + */ + START(HTML_PRE); + PUTC('\n'); + } + + p = line; + while (!done) { + int ich = NEXT_CHAR; + + *p++ = (char) ich; + if (ich == EOF) { + if (interrupted_in_htgetcharacter) { + interrupted_in_htgetcharacter = 0; + CTRACE((tfp, + "HTNews: Interrupted on read, closing socket %d\n", + s)); + NEWS_NETCLOSE(s); + s = -1; + return (HT_INTERRUPTED); + } + abort_socket(); /* End of file, close socket */ + return (HT_LOADED); /* End of file on response */ + } + if (((char) ich == LF) || (p == &line[LINE_LENGTH])) { + *p = '\0'; /* Terminate the string */ + CTRACE((tfp, "B %s", line)); +#ifdef NEWS_DEBUG /* 1997/11/09 (Sun) 15:56:11 */ + debug_print(line); /* @@@ */ +#endif + if (line[0] == '.') { + /* + * End of article? + */ + if (UCH(line[1]) < ' ') { + break; + } else { /* Line starts with dot */ + if (rawtext) { + RAW_PUTS(&line[1]); + } else { + PUTS(&line[1]); /* Ignore first dot */ + } + } + } else { + if (rawtext) { + RAW_PUTS(line); + } else if (diagnostic || !scan_for_buried_news_references) { + /* + * All lines are passed as unmodified source. - FM + */ + PUTS(line); + } else { + /* + * Normal lines are scanned for buried references to other + * articles. Unfortunately, it could pick up mail + * addresses as well! It also can corrupt uuencoded + * messages! So we don't do this when fetching articles as + * WWW_SOURCE or when downloading (diagnostic is TRUE) or + * if the client has set scan_for_buried_news_references to + * FALSE. Otherwise, we convert all "<...@...>" strings + * preceded by "rticle " to "news:...@..." links, and any + * strings that look like URLs to links. - FM + */ + char *l = line; + char *p2; + + while ((p2 = strstr(l, "rticle <")) != NULL) { + char *q = strrchr(p2, '>'); + char *at = strrchr(p2, '@'); + + if (q && at && at < q) { + char c = q[1]; + + q[1] = 0; /* chop up */ + p2 += 7; + *p2 = 0; + while (*l) { + if (StrNCmp(l, STR_NEWS_URL, LEN_NEWS_URL) && + StrNCmp(l, "snews://", 8) && + StrNCmp(l, "nntp://", 7) && + StrNCmp(l, "snewspost:", 10) && + StrNCmp(l, "snewsreply:", 11) && + StrNCmp(l, "newspost:", 9) && + StrNCmp(l, "newsreply:", 10) && + StrNCmp(l, "ftp://", 6) && + StrNCmp(l, "file:/", 6) && + StrNCmp(l, "finger://", 9) && + StrNCmp(l, "http://", 7) && + StrNCmp(l, "https://", 8) && + StrNCmp(l, "wais://", 7) && + StrNCmp(l, STR_MAILTO_URL, LEN_MAILTO_URL) && + StrNCmp(l, "cso://", 6) && + StrNCmp(l, "gopher://", 9)) { + PUTC(*l++); + } else { + StrAllocCopy(href, l); + start_anchor(strtok(href, " \r\n\t,>)\"")); + while (*l && !strchr(" \r\n\t,>)\"", *l)) + PUTC(*l++); + END(HTML_A); + FREE(href); + } + } + *p2 = '<'; /* again */ + *q = 0; + start_anchor(p2 + 1); + *q = '>'; /* again */ + PUTS(p2); + END(HTML_A); + q[1] = c; /* again */ + l = q + 1; + } else { + break; /* line has unmatched <> */ + } + } + while (*l) { /* Last bit of the line */ + if (StrNCmp(l, STR_NEWS_URL, LEN_NEWS_URL) && + StrNCmp(l, "snews://", 8) && + StrNCmp(l, "nntp://", 7) && + StrNCmp(l, "snewspost:", 10) && + StrNCmp(l, "snewsreply:", 11) && + StrNCmp(l, "newspost:", 9) && + StrNCmp(l, "newsreply:", 10) && + StrNCmp(l, "ftp://", 6) && + StrNCmp(l, "file:/", 6) && + StrNCmp(l, "finger://", 9) && + StrNCmp(l, "http://", 7) && + StrNCmp(l, "https://", 8) && + StrNCmp(l, "wais://", 7) && + StrNCmp(l, STR_MAILTO_URL, LEN_MAILTO_URL) && + StrNCmp(l, "cso://", 6) && + StrNCmp(l, "gopher://", 9)) + PUTC(*l++); + else { + StrAllocCopy(href, l); + start_anchor(strtok(href, " \r\n\t,>)\"")); + while (*l && !strchr(" \r\n\t,>)\"", *l)) + PUTC(*l++); + END(HTML_A); + FREE(href); + } + } + } /* if diagnostic or not scan_for_buried_news_references */ + } /* if not dot */ + p = line; /* Restart at beginning */ + } /* if end of line */ + } /* Loop over characters */ + + if (rawtext) + return (HT_LOADED); + + if (diagnostic) + END(HTML_XMP); + else + END(HTML_PRE); + PUTC('\n'); + return (HT_LOADED); +} + +/* Read in a List of Newsgroups + * ---------------------------- + * + * Note the termination condition of a single dot on a line by itself. + * RFC 977 specifies that the line "folding" of RFC850 is not used, + * so we do not handle it here. + */ +static int read_list(char *arg) +{ + char line[LINE_LENGTH + 1]; + char *p; + BOOL done = NO; + BOOL head = NO; + BOOL tail = NO; + BOOL skip_this_line = NO; + BOOL skip_rest_of_line = NO; + int listing = 0; + char *pattern = NULL; + int len = 0; + + /* + * Support head or tail matches for groups to list. - FM + */ + if (arg && strlen(arg) > 1) { + if (*arg == '*') { + tail = YES; + StrAllocCopy(pattern, (arg + 1)); + } else if (arg[strlen(arg) - 1] == '*') { + head = YES; + StrAllocCopy(pattern, arg); + pattern[strlen(pattern) - 1] = '\0'; + } + if (tail || head) { + len = (int) strlen(pattern); + } + + } + + /* + * Read the server's reply. + * + * The lines are scanned for newsgroup names and descriptions. + */ + START(HTML_HEAD); + PUTC('\n'); + START(HTML_TITLE); + PUTS("Newsgroups"); + END(HTML_TITLE); + PUTC('\n'); + END(HTML_HEAD); + PUTC('\n'); + START(HTML_H1); + PUTS("Newsgroups"); + END(HTML_H1); + PUTC('\n'); + p = line; + START(HTML_DLC); + PUTC('\n'); + while (!done) { + int ich = NEXT_CHAR; + char ch = (char) ich; + + if (ich == EOF) { + if (interrupted_in_htgetcharacter) { + interrupted_in_htgetcharacter = 0; + CTRACE((tfp, + "HTNews: Interrupted on read, closing socket %d\n", + s)); + NEWS_NETCLOSE(s); + s = -1; + return (HT_INTERRUPTED); + } + abort_socket(); /* End of file, close socket */ + FREE(pattern); + return (HT_LOADED); /* End of file on response */ + } else if (skip_this_line) { + if (ch == LF) { + skip_this_line = skip_rest_of_line = NO; + p = line; + } + continue; + } else if (skip_rest_of_line) { + if (ch != LF) { + continue; + } + } else if (p == &line[LINE_LENGTH]) { + CTRACE((tfp, "b %.*s%c[...]\n", (LINE_LENGTH), line, ch)); + *p = '\0'; + if (ch == LF) { + ; /* Will be dealt with below */ + } else if (WHITE(ch)) { + ch = LF; /* May treat as line without description */ + skip_this_line = YES; /* ...and ignore until LF */ + } else if (strchr(line, ' ') == NULL && + strchr(line, '\t') == NULL) { + /* No separator found */ + CTRACE((tfp, "HTNews..... group name too long, discarding.\n")); + skip_this_line = YES; /* ignore whole line */ + continue; + } else { + skip_rest_of_line = YES; /* skip until ch == LF found */ + } + } else { + *p++ = ch; + } + if (ch == LF) { + skip_rest_of_line = NO; /* done, reset flag */ + *p = '\0'; /* Terminate the string */ + CTRACE((tfp, "B %s", line)); + if (line[0] == '.') { + /* + * End of article? + */ + if (UCH(line[1]) < ' ') { + break; + } else { /* Line starts with dot */ + START(HTML_DT); + PUTS(&line[1]); + MAYBE_END(HTML_DT); + } + } else if (line[0] == '#') { /* Comment? */ + p = line; /* Restart at beginning */ + continue; + } else { + /* + * Normal lines are scanned for references to newsgroups. + */ + int i = 0; + + /* find whitespace if it exits */ + for (; line[i] != '\0' && !WHITE(line[i]); i++) ; /* null body */ + + if (line[i] != '\0') { + line[i] = '\0'; + if ((head && strncasecomp(line, pattern, len)) || + (tail && (i < len || + strcasecomp((line + (i - len)), pattern)))) { + p = line; /* Restart at beginning */ + continue; + } + START(HTML_DT); + write_anchor(line, line); + listing++; + MAYBE_END(HTML_DT); + PUTC('\n'); + START(HTML_DD); + PUTS(&line[i + 1]); /* put description */ + MAYBE_END(HTML_DD); + } else { + if ((head && strncasecomp(line, pattern, len)) || + (tail && (i < len || + strcasecomp((line + (i - len)), pattern)))) { + p = line; /* Restart at beginning */ + continue; + } + START(HTML_DT); + write_anchor(line, line); + MAYBE_END(HTML_DT); + listing++; + } + } /* if not dot */ + p = line; /* Restart at beginning */ + } /* if end of line */ + } /* Loop over characters */ + if (!listing) { + char *msg = NULL; + + START(HTML_DT); + HTSprintf0(&msg, gettext("No matches for: %s"), arg); + PUTS(msg); + MAYBE_END(HTML_DT); + FREE(msg); + } + END(HTML_DLC); + PUTC('\n'); + FREE(pattern); + return (HT_LOADED); +} + +/* Read in a Newsgroup + * ------------------- + * + * Unfortunately, we have to ask for each article one by one if we + * want more than one field. + * + */ +static int read_group(const char *groupName, + int first_required, + int last_required) +{ + char line[LINE_LENGTH + 1]; + char *author = NULL; + char *subject = NULL; + char *date = NULL; + int i; + char *p; + BOOL done; + + char buffer[LINE_LENGTH + 1]; + char *temp = NULL; + char *reference = NULL; /* Href for article */ + int art; /* Article number WITHIN GROUP */ + int status, count, first, last; /* Response fields */ + + START(HTML_HEAD); + PUTC('\n'); + START(HTML_TITLE); + PUTS("Newsgroup "); + PUTS(groupName); + END(HTML_TITLE); + PUTC('\n'); + END(HTML_HEAD); + PUTC('\n'); + + sscanf(response_text, " %d %d %d %d", &status, &count, &first, &last); + CTRACE((tfp, "Newsgroup status=%d, count=%d, (%d-%d) required:(%d-%d)\n", + status, count, first, last, first_required, last_required)); + if (last == 0) { + PUTS(gettext("\nNo articles in this group.\n")); + goto add_post; + } +#define FAST_THRESHOLD 100 /* Above this, read IDs fast */ +#define CHOP_THRESHOLD 50 /* Above this, chop off the rest */ + + if (first_required < first) + first_required = first; /* clip */ + if ((last_required == 0) || (last_required > last)) + last_required = last; + + if (last_required < first_required) { + PUTS(gettext("\nNo articles in this range.\n")); + goto add_post; + } + + if (last_required - first_required + 1 > HTNewsMaxChunk) { /* Trim this block */ + first_required = last_required - HTNewsChunkSize + 1; + } + CTRACE((tfp, " Chunk will be (%d-%d)\n", + first_required, last_required)); + + /* + * Set window title. + */ + HTSprintf0(&temp, gettext("%s, Articles %d-%d"), + groupName, first_required, last_required); + START(HTML_H1); + PUTS(temp); + FREE(temp); + END(HTML_H1); + PUTC('\n'); + + /* + * Link to earlier articles. + */ + if (first_required > first) { + int before; /* Start of one before */ + + if (first_required - HTNewsMaxChunk <= first) + before = first; + else + before = first_required - HTNewsChunkSize; + HTSprintf0(&dbuf, "%s%s/%d-%d", NewsHREF, groupName, + before, first_required - 1); + CTRACE((tfp, " Block before is %s\n", dbuf)); + PUTC('('); + start_anchor(dbuf); + PUTS(gettext("Earlier articles")); + END(HTML_A); + PUTS("...)\n"); + START(HTML_P); + PUTC('\n'); + } + + done = NO; + +/*#define USE_XHDR*/ +#ifdef USE_XHDR + if (count > FAST_THRESHOLD) { + HTSprintf0(&temp, + gettext("\nThere are about %d articles currently available in %s, IDs as follows:\n\n"), + count, groupName); + PUTS(temp); + FREE(temp); + sprintf(buffer, "XHDR Message-ID %d-%d%c%c", first, last, CR, LF); + status = response(buffer); + if (status == 221) { + p = line; + while (!done) { + int ich = NEXT_CHAR; + + *p++ = ich; + if (ich == EOF) { + if (interrupted_in_htgetcharacter) { + interrupted_in_htgetcharacter = 0; + CTRACE((tfp, + "HTNews: Interrupted on read, closing socket %d\n", + s)); + NEWS_NETCLOSE(s); + s = -1; + return (HT_INTERRUPTED); + } + abort_socket(); /* End of file, close socket */ + return (HT_LOADED); /* End of file on response */ + } + if (((char) ich == '\n') || (p == &line[LINE_LENGTH])) { + *p = '\0'; /* Terminate the string */ + CTRACE((tfp, "X %s", line)); + if (line[0] == '.') { + /* + * End of article? + */ + if (UCH(line[1]) < ' ') { + done = YES; + break; + } else { /* Line starts with dot */ + /* Ignore strange line */ + } + } else { + /* + * Normal lines are scanned for references to articles. + */ + char *space = strchr(line, ' '); + + if (space++) + write_anchor(space, space); + } /* if not dot */ + p = line; /* Restart at beginning */ + } /* if end of line */ + } /* Loop over characters */ + + /* leaving loop with "done" set */ + } /* Good status */ + } +#endif /* USE_XHDR */ + + /* + * Read newsgroup using individual fields. + */ + if (!done) { + START(HTML_B); + if (first == first_required && last == last_required) + PUTS(gettext("All available articles in ")); + else + PUTS("Articles in "); + PUTS(groupName); + END(HTML_B); + PUTC('\n'); + if (LYListNewsNumbers) + start_list(first_required); + else + START(HTML_UL); + for (art = first_required; art <= last_required; art++) { +/*#define OVERLAP*/ +#ifdef OVERLAP + /* + * With this code we try to keep the server running flat out by + * queuing just one extra command ahead of time. We assume (1) + * that the server won't abort if it gets input during output, and + * (2) that TCP buffering is enough for the two commands. Both + * these assumptions seem very reasonable. However, we HAVE had a + * hangup with a loaded server. + */ + if (art == first_required) { + if (art == last_required) { /* Only one */ + sprintf(buffer, "HEAD %d%c%c", + art, CR, LF); + status = response(buffer); + } else { /* First of many */ + sprintf(buffer, "HEAD %d%c%cHEAD %d%c%c", + art, CR, LF, art + 1, CR, LF); + status = response(buffer); + } + } else if (art == last_required) { /* Last of many */ + status = response(NULL); + } else { /* Middle of many */ + sprintf(buffer, "HEAD %d%c%c", art + 1, CR, LF); + status = response(buffer); + } +#else /* Not OVERLAP: */ + sprintf(buffer, "HEAD %d%c%c", art, CR, LF); + status = response(buffer); +#endif /* OVERLAP */ + /* + * Check for a good response (221) for the HEAD request, and if so, + * parse it. Otherwise, indicate the error so that the number of + * listings corresponds to what's claimed for the range, and if we + * are listing numbers via an ordered list, they stay in synchrony + * with the article numbers. - FM + */ + if (status == 221) { /* Head follows - parse it: */ + p = line; /* Write pointer */ + done = NO; + while (!done) { + int ich = NEXT_CHAR; + + *p++ = (char) ich; + if (ich == EOF) { + if (interrupted_in_htgetcharacter) { + interrupted_in_htgetcharacter = 0; + CTRACE((tfp, + "HTNews: Interrupted on read, closing socket %d\n", + s)); + NEWS_NETCLOSE(s); + s = -1; + return (HT_INTERRUPTED); + } + abort_socket(); /* End of file, close socket */ + return (HT_LOADED); /* End of file on response */ + } + if (((char) ich == LF) || + (p == &line[LINE_LENGTH])) { + + *--p = '\0'; /* Terminate & chop LF */ + p = line; /* Restart at beginning */ + CTRACE((tfp, "G %s\n", line)); + switch (line[0]) { + + case '.': + /* + * End of article? + */ + done = (BOOL) (UCH(line[1]) < ' '); + break; + + case 'S': + case 's': + if (match(line, "SUBJECT:")) { + StrAllocCopy(subject, line + 9); + decode_mime(&subject); + } + break; + + case 'M': + case 'm': + if (match(line, "MESSAGE-ID:")) { + char *addr = HTStrip(line + 11) + 1; /* Chop < */ + + addr[strlen(addr) - 1] = '\0'; /* Chop > */ + StrAllocCopy(reference, addr); + } + break; + + case 'f': + case 'F': + if (match(line, "FROM:")) { + char *p2; + + StrAllocCopy(author, strchr(line, ':') + 1); + decode_mime(&author); + p2 = author + strlen(author) - 1; + if (*p2 == LF) + *p2 = '\0'; /* Chop off newline */ + } + break; + + case 'd': + case 'D': + if (LYListNewsDates && match(line, "DATE:")) { + StrAllocCopy(date, + HTStrip(strchr(line, ':') + 1)); + } + break; + + } /* end switch on first character */ + } /* if end of line */ + } /* Loop over characters */ + + PUTC('\n'); + START(HTML_LI); + p = decode_mime(&subject); + HTSprintf0(&temp, "\"%s\"", NonNull(p)); + if (reference) { + write_anchor(temp, reference); + FREE(reference); + } else { + PUTS(temp); + } + FREE(temp); + + if (author != NULL) { + PUTS(" - "); + if (LYListNewsDates) + START(HTML_I); + PUTS(decode_mime(&author)); + if (LYListNewsDates) + END(HTML_I); + FREE(author); + } + if (date) { + if (!diagnostic) { + for (i = 0; date[i]; i++) { + if (date[i] == ' ') { + date[i] = HT_NON_BREAK_SPACE; + } + } + } + sprintf(buffer, " [%.*s]", (int) (sizeof(buffer) - 4), date); + PUTS(buffer); + FREE(date); + } + MAYBE_END(HTML_LI); + /* + * Indicate progress! @@@@@@ + */ + } else if (status == HT_INTERRUPTED) { + interrupted_in_htgetcharacter = 0; + CTRACE((tfp, + "HTNews: Interrupted on read, closing socket %d\n", + s)); + NEWS_NETCLOSE(s); + s = -1; + return (HT_INTERRUPTED); + } else { + /* + * Use the response text on error. - FM + */ + PUTC('\n'); + START(HTML_LI); + START(HTML_I); + if (LYListNewsNumbers) + LYStrNCpy(buffer, "Status:", sizeof(buffer) - 1); + else + sprintf(buffer, "Status (ARTICLE %d):", art); + PUTS(buffer); + END(HTML_I); + PUTC(' '); + PUTS(response_text); + MAYBE_END(HTML_LI); + } /* Handle response to HEAD request */ + } /* Loop over article */ + FREE(author); + FREE(subject); + } /* If read headers */ + PUTC('\n'); + if (LYListNewsNumbers) + END(HTML_OL); + else + END(HTML_UL); + PUTC('\n'); + + /* + * Link to later articles. + */ + if (last_required < last) { + int after; /* End of article after */ + + after = last_required + HTNewsChunkSize; + if (after == last) + HTSprintf0(&dbuf, "%s%s", NewsHREF, groupName); /* original group */ + else + HTSprintf0(&dbuf, "%s%s/%d-%d", NewsHREF, groupName, + last_required + 1, after); + CTRACE((tfp, " Block after is %s\n", dbuf)); + PUTC('('); + start_anchor(dbuf); + PUTS(gettext("Later articles")); + END(HTML_A); + PUTS("...)\n"); + } + + add_post: + if (HTCanPost) { + /* + * We have permission to POST to this host, so add a link for posting + * messages to this newsgroup. - FM + */ + char *href = NULL; + + START(HTML_HR); + PUTC('\n'); + if (!strncasecomp(NewsHREF, STR_SNEWS_URL, 6)) + StrAllocCopy(href, "snewspost://"); + else + StrAllocCopy(href, "newspost://"); + StrAllocCat(href, NewsHost); + StrAllocCat(href, "/"); + StrAllocCat(href, groupName); + start_anchor(href); + PUTS(gettext("Post to ")); + PUTS(groupName); + END(HTML_A); + FREE(href); + } else { + START(HTML_HR); + } + PUTC('\n'); + return (HT_LOADED); +} + +/* Load by name. HTLoadNews + * ============= + */ +static int HTLoadNews(const char *arg, + HTParentAnchor *anAnchor, + HTFormat format_out, + HTStream *stream) +{ + char command[262]; /* The whole command */ + char proxycmd[260]; /* The proxy command */ + char groupName[GROUP_NAME_LENGTH]; /* Just the group name */ + int status; /* tcp return */ + int retries; /* A count of how hard we have tried */ + BOOL normal_url; /* Flag: "news:" or "nntp:" (physical) URL */ + BOOL group_wanted; /* Flag: group was asked for, not article */ + BOOL list_wanted; /* Flag: list was asked for, not article */ + BOOL post_wanted; /* Flag: new post to group was asked for */ + BOOL reply_wanted; /* Flag: followup post was asked for */ + BOOL spost_wanted; /* Flag: new SSL post to group was asked for */ + BOOL sreply_wanted; /* Flag: followup SSL post was asked for */ + BOOL head_wanted = NO; /* Flag: want HEAD of single article */ + int first, last; /* First and last articles asked for */ + char *cp = 0; + char *ListArg = NULL; + char *ProxyHost = NULL; + char *ProxyHREF = NULL; + char *postfile = NULL; + +#ifdef USE_SSL + char SSLprogress[256]; +#endif /* USE_SSL */ + + diagnostic = (format_out == WWW_SOURCE || /* set global flag */ + format_out == HTAtom_for("www/download") || + format_out == HTAtom_for("www/dump")); + rawtext = NO; + + CTRACE((tfp, "HTNews: Looking for %s\n", arg)); + + if (!initialized) + initialized = initialize(); + if (!initialized) + return -1; /* FAIL */ + + FREE(NewsHREF); + command[0] = '\0'; + command[sizeof(command) - 1] = '\0'; + proxycmd[0] = '\0'; + proxycmd[sizeof(proxycmd) - 1] = '\0'; + + { + const char *p1; + + /* + * We will ask for the document, omitting the host name & anchor. + * + * Syntax of address is + * xxx@yyy Article + * <xxx@yyy> Same article + * xxxxx News group (no "@") + * group/n1-n2 Articles n1 to n2 in group + */ + normal_url = (BOOL) (!StrNCmp(arg, STR_NEWS_URL, LEN_NEWS_URL) || + !StrNCmp(arg, "nntp:", 5)); + spost_wanted = (BOOL) (!normal_url && strstr(arg, "snewspost:") != NULL); + sreply_wanted = (BOOL) (!(normal_url || spost_wanted) && + strstr(arg, "snewsreply:") != NULL); + post_wanted = (BOOL) (!(normal_url || spost_wanted || sreply_wanted) && + strstr(arg, "newspost:") != NULL); + reply_wanted = (BOOL) (!(normal_url || spost_wanted || sreply_wanted || + post_wanted) && + strstr(arg, "newsreply:") != NULL); + group_wanted = (BOOL) ((!(spost_wanted || sreply_wanted || + post_wanted || reply_wanted) && + strchr(arg, '@') == NULL) && + (strchr(arg, '*') == NULL)); + list_wanted = (BOOL) ((!(spost_wanted || sreply_wanted || + post_wanted || reply_wanted || + group_wanted) && + strchr(arg, '@') == NULL) && + (strchr(arg, '*') != NULL)); + +#ifndef USE_SSL + if (!strncasecomp(arg, "snewspost:", 10) || + !strncasecomp(arg, "snewsreply:", 11)) { + HTAlert(FAILED_CANNOT_POST_SSL); + return HT_NOT_LOADED; + } +#endif /* !USE_SSL */ + if (post_wanted || reply_wanted || spost_wanted || sreply_wanted) { + /* + * Make sure we have a non-zero path for the newsgroup(s). - FM + */ + if ((p1 = strrchr(arg, '/')) != NULL) { + p1++; + } else if ((p1 = strrchr(arg, ':')) != NULL) { + p1++; + } + if (!(p1 && *p1)) { + HTAlert(WWW_ILLEGAL_URL_MESSAGE); + return (HT_NO_DATA); + } + if (!(cp = HTParse(arg, "", PARSE_HOST)) || *cp == '\0') { + if (s >= 0 && NewsHost && strcasecomp(NewsHost, HTNewsHost)) { + NEWS_NETCLOSE(s); + s = -1; + } + StrAllocCopy(NewsHost, HTNewsHost); + } else { + if (s >= 0 && NewsHost && strcasecomp(NewsHost, cp)) { + NEWS_NETCLOSE(s); + s = -1; + } + StrAllocCopy(NewsHost, cp); + } + FREE(cp); + HTSprintf0(&NewsHREF, "%s://%.*s/", + (post_wanted ? + "newspost" : + (reply_wanted ? + "newreply" : + (spost_wanted ? + "snewspost" : "snewsreply"))), + (int) sizeof(command) - 15, NewsHost); + + /* + * If the SSL daemon is being used as a proxy, reset p1 to the + * start of the proxied URL rather than to the start of the + * newsgroup(s). - FM + */ + if (spost_wanted && strncasecomp(arg, "snewspost:", 10)) + p1 = strstr(arg, "snewspost:"); + if (sreply_wanted && strncasecomp(arg, "snewsreply:", 11)) + p1 = strstr(arg, "snewsreply:"); + + /* p1 = HTParse(arg, "", PARSE_PATH | PARSE_PUNCTUATION); */ + /* + * Don't use HTParse because news: access doesn't follow + * traditional rules. For instance, if the article reference + * contains a '#', the rest of it is lost -- JFG 10/7/92, from a + * bug report + */ + } else if (isNNTP_URL(arg)) { + if (((*(arg + 5) == '\0') || + (!strcmp((arg + 5), "/") || + !strcmp((arg + 5), "//") || + !strcmp((arg + 5), "///"))) || + ((!StrNCmp((arg + 5), "//", 2)) && + (!(cp = strchr((arg + 7), '/')) || *(cp + 1) == '\0'))) { + p1 = "*"; + group_wanted = FALSE; + list_wanted = TRUE; + } else if (*(arg + 5) != '/') { + p1 = (arg + 5); + } else if (*(arg + 5) == '/' && *(arg + 6) != '/') { + p1 = (arg + 6); + } else { + p1 = (cp + 1); + } + if (!(cp = HTParse(arg, "", PARSE_HOST)) || *cp == '\0') { + if (s >= 0 && NewsHost && strcasecomp(NewsHost, HTNewsHost)) { + NEWS_NETCLOSE(s); + s = -1; + } + StrAllocCopy(NewsHost, HTNewsHost); + } else { + if (s >= 0 && NewsHost && strcasecomp(NewsHost, cp)) { + NEWS_NETCLOSE(s); + s = -1; + } + StrAllocCopy(NewsHost, cp); + } + FREE(cp); + SnipIn2(command, "%s//%.*s/", STR_NNTP_URL, 9, NewsHost); + StrAllocCopy(NewsHREF, command); + } else if (!strncasecomp(arg, STR_SNEWS_URL, 6)) { +#ifdef USE_SSL + if (((*(arg + 6) == '\0') || + (!strcmp((arg + 6), "/") || + !strcmp((arg + 6), "//") || + !strcmp((arg + 6), "///"))) || + ((!StrNCmp((arg + 6), "//", 2)) && + (!(cp = strchr((arg + 8), '/')) || *(cp + 1) == '\0'))) { + p1 = "*"; + group_wanted = FALSE; + list_wanted = TRUE; + } else if (*(arg + 6) != '/') { + p1 = (arg + 6); + } else if (*(arg + 6) == '/' && *(arg + 7) != '/') { + p1 = (arg + 7); + } else { + p1 = (cp + 1); + } + if (!(cp = HTParse(arg, "", PARSE_HOST)) || *cp == '\0') { + if (s >= 0 && NewsHost && strcasecomp(NewsHost, HTNewsHost)) { + NEWS_NETCLOSE(s); + s = -1; + } + StrAllocCopy(NewsHost, HTNewsHost); + } else { + if (s >= 0 && NewsHost && strcasecomp(NewsHost, cp)) { + NEWS_NETCLOSE(s); + s = -1; + } + StrAllocCopy(NewsHost, cp); + } + FREE(cp); + sprintf(command, "%s//%.250s/", STR_SNEWS_URL, NewsHost); + StrAllocCopy(NewsHREF, command); +#else + HTAlert(gettext("This client does not contain support for SNEWS URLs.")); + return HT_NOT_LOADED; +#endif /* USE_SSL */ + } else if (!strncasecomp(arg, "news:/", 6)) { + if (((*(arg + 6) == '\0') || + !strcmp((arg + 6), "/") || + !strcmp((arg + 6), "//")) || + ((*(arg + 6) == '/') && + (!(cp = strchr((arg + 7), '/')) || *(cp + 1) == '\0'))) { + p1 = "*"; + group_wanted = FALSE; + list_wanted = TRUE; + } else if (*(arg + 6) != '/') { + p1 = (arg + 6); + } else { + p1 = (cp + 1); + } + if (!(cp = HTParse(arg, "", PARSE_HOST)) || *cp == '\0') { + if (s >= 0 && NewsHost && strcasecomp(NewsHost, HTNewsHost)) { + NEWS_NETCLOSE(s); + s = -1; + } + StrAllocCopy(NewsHost, HTNewsHost); + } else { + if (s >= 0 && NewsHost && strcasecomp(NewsHost, cp)) { + NEWS_NETCLOSE(s); + s = -1; + } + StrAllocCopy(NewsHost, cp); + } + FREE(cp); + SnipIn(command, "news://%.*s/", 9, NewsHost); + StrAllocCopy(NewsHREF, command); + } else { + p1 = (arg + 5); /* Skip "news:" prefix */ + if (*p1 == '\0') { + p1 = "*"; + group_wanted = FALSE; + list_wanted = TRUE; + } + if (s >= 0 && NewsHost && strcasecomp(NewsHost, HTNewsHost)) { + NEWS_NETCLOSE(s); + s = -1; + } + StrAllocCopy(NewsHost, HTNewsHost); + StrAllocCopy(NewsHREF, STR_NEWS_URL); + } + + /* + * Set up any proxy for snews URLs that returns NNTP responses for Lynx + * to convert to HTML, instead of doing the conversion itself, and for + * handling posts or followups. - TZ & FM + */ + if (!strncasecomp(p1, STR_SNEWS_URL, 6) || + !strncasecomp(p1, "snewspost:", 10) || + !strncasecomp(p1, "snewsreply:", 11)) { + StrAllocCopy(ProxyHost, NewsHost); + if ((cp = HTParse(p1, "", PARSE_HOST)) != NULL && *cp != '\0') { + SnipIn2(command, "%s//%.*s", STR_SNEWS_URL, 10, cp); + StrAllocCopy(NewsHost, cp); + } else { + SnipIn2(command, "%s//%.*s", STR_SNEWS_URL, 10, NewsHost); + } + command[sizeof(command) - 2] = '\0'; + FREE(cp); + sprintf(proxycmd, "GET %.*s%c%c%c%c", + (int) sizeof(proxycmd) - 9, command, + CR, LF, CR, LF); + CTRACE((tfp, "HTNews: Proxy command is '%.*s'\n", + (int) (strlen(proxycmd) - 4), proxycmd)); + strcat(command, "/"); + StrAllocCopy(ProxyHREF, NewsHREF); + StrAllocCopy(NewsHREF, command); + if (spost_wanted || sreply_wanted) { + /* + * Reset p1 so that it points to the newsgroup(s). + */ + if ((p1 = strrchr(arg, '/')) != NULL) { + p1++; + } else { + p1 = (strrchr(arg, ':') + 1); + } + } else { + /* + * Reset p1 so that it points to the newsgroup (or a wildcard), + * or the article. + */ + if (!(cp = strrchr((p1 + 6), '/')) || *(cp + 1) == '\0') { + p1 = "*"; + group_wanted = FALSE; + list_wanted = TRUE; + } else { + p1 = (cp + 1); + } + } + } + + /* + * Set up command for a post, listing, or article request. - FM + */ + if (post_wanted || reply_wanted || spost_wanted || sreply_wanted) { + strcpy(command, "POST"); + } else if (list_wanted) { + if (strlen(p1) > 249) { + FREE(ProxyHost); + FREE(ProxyHREF); + HTAlert(URL_TOO_LONG); + return -400; + } + SnipIn(command, "XGTITLE %.*s", 11, p1); + } else if (group_wanted) { + char *slash = strchr(p1, '/'); + + first = 0; + last = 0; + if (slash) { + *slash = '\0'; + if (strlen(p1) >= sizeof(groupName)) { + FREE(ProxyHost); + FREE(ProxyHREF); + HTAlert(URL_TOO_LONG); + return -400; + } + LYStrNCpy(groupName, p1, sizeof(groupName) - 1); + *slash = '/'; + (void) sscanf(slash + 1, "%d-%d", &first, &last); + if ((first > 0) && (isdigit(UCH(*(slash + 1)))) && + (strchr(slash + 1, '-') == NULL || first == last)) { + /* + * We got a number greater than 0, which will be loaded as + * first, and either no range or the range computes to + * zero, so make last negative, as a flag to select the + * group and then fetch an article by number (first) + * instead of by messageID. - FM + */ + last = -1; + } + } else { + if (strlen(p1) >= sizeof(groupName)) { + FREE(ProxyHost); + FREE(ProxyHREF); + HTAlert(URL_TOO_LONG); + return -400; + } + LYStrNCpy(groupName, p1, sizeof(groupName) - 1); + } + SnipIn(command, "GROUP %.*s", 9, groupName); + } else { + size_t add_open = (size_t) (strchr(p1, '<') == 0); + size_t add_close = (size_t) (strchr(p1, '>') == 0); + + if (strlen(p1) + add_open + add_close >= 252) { + FREE(ProxyHost); + FREE(ProxyHREF); + HTAlert(URL_TOO_LONG); + return -400; + } + sprintf(command, "ARTICLE %s%.*s%s", + add_open ? "<" : "", + (int) (sizeof(command) - (11 + add_open + add_close)), + p1, + add_close ? ">" : ""); + } + + { + char *p = command + strlen(command); + + /* + * Terminate command with CRLF, as in RFC 977. + */ + *p++ = CR; /* Macros to be correct on Mac */ + *p++ = LF; + *p = 0; + } + StrAllocCopy(ListArg, p1); + } /* scope of p1 */ + + if (!*arg) { + FREE(NewsHREF); + FREE(ProxyHost); + FREE(ProxyHREF); + FREE(ListArg); + return NO; /* Ignore if no name */ + } + + if (!(post_wanted || reply_wanted || spost_wanted || sreply_wanted || + (group_wanted && last != -1) || list_wanted)) { + head_wanted = anAnchor->isHEAD; + if (head_wanted && !StrNCmp(command, "ARTICLE ", 8)) { + /* overwrite "ARTICLE" - hack... */ + strcpy(command, "HEAD "); + for (cp = command + 5;; cp++) + if ((*cp = *(cp + 3)) == '\0') + break; + } + rawtext = (BOOL) (head_wanted || keep_mime_headers); + } + if (rawtext) { + rawtarget = HTStreamStack(WWW_PLAINTEXT, + format_out, + stream, anAnchor); + if (!rawtarget) { + FREE(NewsHost); + FREE(NewsHREF); + FREE(ProxyHost); + FREE(ProxyHREF); + FREE(ListArg); + HTAlert(gettext("No target for raw text!")); + return (HT_NOT_LOADED); + } /* Copy routine entry points */ + rawtargetClass = *rawtarget->isa; + } else + /* + * Make a hypertext object with an anchor list. + */ + if (!(post_wanted || reply_wanted || spost_wanted || sreply_wanted)) { + target = HTML_new(anAnchor, format_out, stream); + targetClass = *target->isa; /* Copy routine entry points */ + } + + /* + * Now, let's get a stream setup up from the NewsHost. + */ + for (retries = 0; retries < 2; retries++) { + if (s < 0) { + /* CONNECTING to news host */ + char url[260]; + + if (!strcmp(NewsHREF, STR_NEWS_URL)) { + SnipIn(url, "lose://%.*s/", 9, NewsHost); + } else if (ProxyHREF) { + SnipIn(url, "%.*s", 1, ProxyHREF); + } else { + SnipIn(url, "%.*s", 1, NewsHREF); + } + CTRACE((tfp, "News: doing HTDoConnect on '%s'\n", url)); + + _HTProgress(gettext("Connecting to NewsHost ...")); + +#ifdef USE_SSL + if (!using_proxy && + (!StrNCmp(arg, STR_SNEWS_URL, 6) || + !StrNCmp(arg, "snewspost:", 10) || + !StrNCmp(arg, "snewsreply:", 11))) + status = HTDoConnect(url, "NNTPS", SNEWS_PORT, &s); + else + status = HTDoConnect(url, "NNTP", NEWS_PORT, &s); +#else + status = HTDoConnect(url, "NNTP", NEWS_PORT, &s); +#endif /* USE_SSL */ + + if (status == HT_INTERRUPTED) { + /* + * Interrupt cleanly. + */ + CTRACE((tfp, + "HTNews: Interrupted on connect; recovering cleanly.\n")); + _HTProgress(CONNECTION_INTERRUPTED); + if (!(post_wanted || reply_wanted || + spost_wanted || sreply_wanted)) { + ABORT_TARGET; + } + FREE(NewsHost); + FREE(NewsHREF); + FREE(ProxyHost); + FREE(ProxyHREF); + FREE(ListArg); +#ifdef USE_SSL + if (Handle) { + SSL_free(Handle); + Handle = NULL; + } +#endif /* USE_SSL */ + if (postfile) { + HTSYS_remove(postfile); + FREE(postfile); + } + return HT_NOT_LOADED; + } + if (status < 0) { + NEWS_NETCLOSE(s); + s = -1; + CTRACE((tfp, "HTNews: Unable to connect to news host.\n")); + if (retries < 1) + continue; + if (!(post_wanted || reply_wanted || + spost_wanted || sreply_wanted)) { + ABORT_TARGET; + } + HTSprintf0(&dbuf, gettext("Could not access %s."), NewsHost); + FREE(NewsHost); + FREE(NewsHREF); + FREE(ProxyHost); + FREE(ProxyHREF); + FREE(ListArg); + if (postfile) { + HTSYS_remove(postfile); + FREE(postfile); + } + return HTLoadError(stream, 500, dbuf); + } else { + CTRACE((tfp, "HTNews: Connected to news host %s.\n", + NewsHost)); +#ifdef USE_SSL + /* + * If this is an snews url, then do the SSL stuff here + */ + if (!using_proxy && + (!StrNCmp(url, "snews", 5) || + !StrNCmp(url, "snewspost:", 10) || + !StrNCmp(url, "snewsreply:", 11))) { + Handle = HTGetSSLHandle(); + SSL_set_fd(Handle, s); + HTSSLInitPRNG(); + status = SSL_connect(Handle); + + if (status <= 0) { + unsigned long SSLerror; + + CTRACE((tfp, + "HTNews: Unable to complete SSL handshake for '%s', SSL_connect=%d, SSL error stack dump follows\n", + url, status)); + SSL_load_error_strings(); + while ((SSLerror = ERR_get_error()) != 0) { + CTRACE((tfp, "HTNews: SSL: %s\n", + ERR_error_string(SSLerror, NULL))); + } + HTAlert("Unable to make secure connection to remote host."); + NEWS_NETCLOSE(s); + s = -1; + if (!(post_wanted || reply_wanted || + spost_wanted || sreply_wanted)) + (*targetClass._abort) (target, NULL); + FREE(NewsHost); + FREE(NewsHREF); + FREE(ProxyHost); + FREE(ProxyHREF); + FREE(ListArg); + if (postfile) { +#ifdef VMS + while (remove(postfile) == 0) ; /* loop through all versions */ +#else + remove(postfile); +#endif /* VMS */ + FREE(postfile); + } + return HT_NOT_LOADED; + } + sprintf(SSLprogress, + "Secure %d-bit %s (%s) NNTP connection", + SSL_get_cipher_bits(Handle, NULL), + SSL_get_cipher_version(Handle), + SSL_get_cipher(Handle)); + _HTProgress(SSLprogress); + } +#endif /* USE_SSL */ + HTInitInput(s); /* set up buffering */ + if (proxycmd[0]) { + status = (int) NEWS_NETWRITE(s, proxycmd, (int) strlen(proxycmd)); + CTRACE((tfp, + "HTNews: Proxy command returned status '%d'.\n", + status)); + } + if (((status = response(NULL)) / 100) != 2) { + NEWS_NETCLOSE(s); + s = -1; + if (status == HT_INTERRUPTED) { + _HTProgress(CONNECTION_INTERRUPTED); + if (!(post_wanted || reply_wanted || + spost_wanted || sreply_wanted)) { + ABORT_TARGET; + } + FREE(NewsHost); + FREE(NewsHREF); + FREE(ProxyHost); + FREE(ProxyHREF); + FREE(ListArg); + if (postfile) { + HTSYS_remove(postfile); + FREE(postfile); + } + return (HT_NOT_LOADED); + } + if (retries < 1) + continue; + FREE(ProxyHost); + FREE(ProxyHREF); + FREE(ListArg); + FREE(postfile); + if (!(post_wanted || reply_wanted || + spost_wanted || sreply_wanted)) { + ABORT_TARGET; + } + if (response_text[0]) { + HTSprintf0(&dbuf, + gettext("Can't read news info. News host %.20s responded: %.200s"), + NewsHost, response_text); + } else { + HTSprintf0(&dbuf, + gettext("Can't read news info, empty response from host %s"), + NewsHost); + } + return HTLoadError(stream, 500, dbuf); + } + if (status == 200) { + HTCanPost = TRUE; + } else { + HTCanPost = FALSE; + if (post_wanted || reply_wanted || + spost_wanted || sreply_wanted) { + HTAlert(CANNOT_POST); + FREE(NewsHREF); + if (ProxyHREF) { + StrAllocCopy(NewsHost, ProxyHost); + FREE(ProxyHost); + FREE(ProxyHREF); + } + FREE(ListArg); + if (postfile) { + HTSYS_remove(postfile); + FREE(postfile); + } + return (HT_NOT_LOADED); + } + } + } + } + /* If needed opening */ + if (post_wanted || reply_wanted || + spost_wanted || sreply_wanted) { + if (!HTCanPost) { + HTAlert(CANNOT_POST); + FREE(NewsHREF); + if (ProxyHREF) { + StrAllocCopy(NewsHost, ProxyHost); + FREE(ProxyHost); + FREE(ProxyHREF); + } + FREE(ListArg); + if (postfile) { + HTSYS_remove(postfile); + FREE(postfile); + } + return (HT_NOT_LOADED); + } + if (postfile == NULL) { + postfile = LYNewsPost(ListArg, + (reply_wanted || sreply_wanted)); + } + if (postfile == NULL) { + HTProgress(CANCELLED); + FREE(NewsHREF); + if (ProxyHREF) { + StrAllocCopy(NewsHost, ProxyHost); + FREE(ProxyHost); + FREE(ProxyHREF); + } + FREE(ListArg); + return (HT_NOT_LOADED); + } + } else { + /* + * Ensure reader mode, but don't bother checking the status for + * anything but HT_INTERRUPTED or a 480 Authorization request, + * because if the reader mode command is not needed, the server + * probably returned a 500, which is irrelevant at this point. - + * FM + */ + char buffer[20]; + + sprintf(buffer, "mode reader%c%c", CR, LF); + if ((status = response(buffer)) == HT_INTERRUPTED) { + _HTProgress(CONNECTION_INTERRUPTED); + break; + } + if (status == 480) { + NNTPAuthResult auth_result = HTHandleAuthInfo(NewsHost); + + if (auth_result == NNTPAUTH_CLOSE) { + if (s != -1 && !(ProxyHost || ProxyHREF)) { + NEWS_NETCLOSE(s); + s = -1; + } + } + if (auth_result != NNTPAUTH_OK) { + break; + } + if (response(buffer) == HT_INTERRUPTED) { + _HTProgress(CONNECTION_INTERRUPTED); + break; + } + } + } + + Send_NNTP_command: +#ifdef NEWS_DEB + if (postfile) + printf("postfile = %s, command = %s", postfile, command); + else + printf("command = %s", command); +#endif + if ((status = response(command)) == HT_INTERRUPTED) { + _HTProgress(CONNECTION_INTERRUPTED); + break; + } + if (status < 0) { + if (retries < 1) { + continue; + } else { + break; + } + } + /* + * For some well known error responses which are expected to occur in + * normal use, break from the loop without retrying and without closing + * the connection. It is unlikely that these are leftovers from a + * timed-out connection (but we do some checks to see whether the + * response corresponds to the last command), or that they will give + * anything else when automatically retried. - kw + */ + if (status == 411 && group_wanted && + !StrNCmp(command, "GROUP ", 6) && + !strncasecomp(response_text + 3, " No such group ", 15) && + !strcmp(response_text + 18, groupName)) { + + HTAlert(response_text); + break; + } else if (status == 430 && !group_wanted && !list_wanted && + !StrNCmp(command, "ARTICLE <", 9) && + !strcasecomp(response_text + 3, " No such article")) { + + HTAlert(response_text); + break; + } + if ((status / 100) != 2 && + status != 340 && + status != 480) { + if (retries) { + if (list_wanted && !StrNCmp(command, "XGTITLE", 7)) { + sprintf(command, "LIST NEWSGROUPS%c%c", CR, LF); + goto Send_NNTP_command; + } + HTAlert(response_text); + } else { + _HTProgress(response_text); + } + NEWS_NETCLOSE(s); + s = -1; + /* + * Message might be a leftover "Timeout-disconnected", so try again + * if the retries maximum has not been reached. + */ + continue; + } + + /* + * Post or load a group, article, etc + */ + if (status == 480) { + NNTPAuthResult auth_result; + + /* + * Some servers return 480 for a failed XGTITLE. - FM + */ + if (list_wanted && !StrNCmp(command, "XGTITLE", 7) && + strstr(response_text, "uthenticat") == NULL && + strstr(response_text, "uthor") == NULL) { + sprintf(command, "LIST NEWSGROUPS%c%c", CR, LF); + goto Send_NNTP_command; + } + /* + * Handle Authorization. - FM + */ + if ((auth_result = HTHandleAuthInfo(NewsHost)) == NNTPAUTH_OK) { + goto Send_NNTP_command; + } else if (auth_result == NNTPAUTH_CLOSE) { + if (s != -1 && !(ProxyHost || ProxyHREF)) { + NEWS_NETCLOSE(s); + s = -1; + } + if (retries < 1) + continue; + } + status = HT_NOT_LOADED; + } else if (post_wanted || reply_wanted || + spost_wanted || sreply_wanted) { + /* + * Handle posting of an article. - FM + */ + if (status != 340) { + HTAlert(CANNOT_POST); + if (postfile) { + HTSYS_remove(postfile); + } + } else { + post_article(postfile); + } + FREE(postfile); + status = HT_NOT_LOADED; + } else if (list_wanted) { + /* + * List available newsgroups. - FM + */ + _HTProgress(gettext("Reading list of available newsgroups.")); + status = read_list(ListArg); + } else if (group_wanted) { + /* + * List articles in a news group. - FM + */ + if (last < 0) { + /* + * We got one article number rather than a range following the + * slash which followed the group name, or the range was zero, + * so now that we have selected that group, load ARTICLE and + * the the number (first) as the command and go back to send it + * and check the response. - FM + */ + sprintf(command, "%s %d%c%c", + head_wanted ? "HEAD" : "ARTICLE", + first, CR, LF); + group_wanted = FALSE; + retries = 2; + goto Send_NNTP_command; + } + _HTProgress(gettext("Reading list of articles in newsgroup.")); + status = read_group(groupName, first, last); + } else { + /* + * Get an article from a news group. - FM + */ + _HTProgress(gettext("Reading news article.")); + status = read_article(anAnchor); + } + if (status == HT_INTERRUPTED) { + _HTProgress(CONNECTION_INTERRUPTED); + status = HT_LOADED; + } + if (!(post_wanted || reply_wanted || + spost_wanted || sreply_wanted)) { + if (status == HT_NOT_LOADED) { + ABORT_TARGET; + } else { + FREE_TARGET; + } + } + FREE(NewsHREF); + if (ProxyHREF) { + StrAllocCopy(NewsHost, ProxyHost); + FREE(ProxyHost); + FREE(ProxyHREF); + } + FREE(ListArg); + if (postfile) { + HTSYS_remove(postfile); + FREE(postfile); + } + return status; + } /* Retry loop */ + +#if 0 + HTAlert(gettext("Sorry, could not load requested news.")); + NXRunAlertPanel(NULL, "Sorry, could not load `%s'.", NULL, NULL, NULL, arg); + /* No -- message earlier wil have covered it */ +#endif + + if (!(post_wanted || reply_wanted || + spost_wanted || sreply_wanted)) { + ABORT_TARGET; + } + FREE(NewsHREF); + if (ProxyHREF) { + StrAllocCopy(NewsHost, ProxyHost); + FREE(ProxyHost); + FREE(ProxyHREF); + } + FREE(ListArg); + if (postfile) { + HTSYS_remove(postfile); + FREE(postfile); + } + return HT_NOT_LOADED; +} + +/* + * This function clears all authorization information by + * invoking the free_NNTP_AuthInfo() function, which normally + * is invoked at exit. It allows a browser command to do + * this at any time, for example, if the user is leaving + * the terminal for a period of time, but does not want + * to end the current session. - FM + */ +void HTClearNNTPAuthInfo(void) +{ + /* + * Need code to check cached documents and do something to ensure that any + * protected documents no longer can be accessed without a new retrieval. + * - FM + */ + + /* + * Now free all of the authorization info. - FM + */ + free_NNTP_AuthInfo(); +} + +#ifdef USE_SSL +static int HTNewsGetCharacter(void) +{ + if (!Handle) + return HTGetCharacter(); + else + return HTGetSSLCharacter((void *) Handle); +} + +int HTNewsProxyConnect(int sock, + const char *url, + HTParentAnchor *anAnchor, + HTFormat format_out, + HTStream *sink) +{ + int status; + const char *arg = url; + char SSLprogress[256]; + + s = channel_s = sock; + Handle = HTGetSSLHandle(); + SSL_set_fd(Handle, s); + HTSSLInitPRNG(); + status = SSL_connect(Handle); + + if (status <= 0) { + unsigned long SSLerror; + + channel_s = -1; + CTRACE((tfp, + "HTNews: Unable to complete SSL handshake for '%s', SSL_connect=%d, SSL error stack dump follows\n", + url, status)); + SSL_load_error_strings(); + while ((SSLerror = ERR_get_error()) != 0) { + CTRACE((tfp, "HTNews: SSL: %s\n", ERR_error_string(SSLerror, NULL))); + } + HTAlert("Unable to make secure connection to remote host."); + NEWS_NETCLOSE(s); + s = -1; + return HT_NOT_LOADED; + } + sprintf(SSLprogress, "Secure %d-bit %s (%s) NNTP connection", + SSL_get_cipher_bits(Handle, NULL), + SSL_get_cipher_version(Handle), + SSL_get_cipher(Handle)); + _HTProgress(SSLprogress); + status = HTLoadNews(arg, anAnchor, format_out, sink); + channel_s = -1; + return status; +} +#endif /* USE_SSL */ + +#ifdef GLOBALDEF_IS_MACRO +#define _HTNEWS_C_1_INIT { "news", HTLoadNews, NULL } +GLOBALDEF(HTProtocol, HTNews, _HTNEWS_C_1_INIT); +#define _HTNEWS_C_2_INIT { "nntp", HTLoadNews, NULL } +GLOBALDEF(HTProtocol, HTNNTP, _HTNEWS_C_2_INIT); +#define _HTNEWS_C_3_INIT { "newspost", HTLoadNews, NULL } +GLOBALDEF(HTProtocol, HTNewsPost, _HTNEWS_C_3_INIT); +#define _HTNEWS_C_4_INIT { "newsreply", HTLoadNews, NULL } +GLOBALDEF(HTProtocol, HTNewsReply, _HTNEWS_C_4_INIT); +#define _HTNEWS_C_5_INIT { "snews", HTLoadNews, NULL } +GLOBALDEF(HTProtocol, HTSNews, _HTNEWS_C_5_INIT); +#define _HTNEWS_C_6_INIT { "snewspost", HTLoadNews, NULL } +GLOBALDEF(HTProtocol, HTSNewsPost, _HTNEWS_C_6_INIT); +#define _HTNEWS_C_7_INIT { "snewsreply", HTLoadNews, NULL } +GLOBALDEF(HTProtocol, HTSNewsReply, _HTNEWS_C_7_INIT); +#else +GLOBALDEF HTProtocol HTNews = +{"news", HTLoadNews, NULL}; +GLOBALDEF HTProtocol HTNNTP = +{"nntp", HTLoadNews, NULL}; +GLOBALDEF HTProtocol HTNewsPost = +{"newspost", HTLoadNews, NULL}; +GLOBALDEF HTProtocol HTNewsReply = +{"newsreply", HTLoadNews, NULL}; +GLOBALDEF HTProtocol HTSNews = +{"snews", HTLoadNews, NULL}; +GLOBALDEF HTProtocol HTSNewsPost = +{"snewspost", HTLoadNews, NULL}; +GLOBALDEF HTProtocol HTSNewsReply = +{"snewsreply", HTLoadNews, NULL}; +#endif /* GLOBALDEF_IS_MACRO */ + +#endif /* not DISABLE_NEWS */ diff --git a/WWW/Library/Implementation/HTNews.h b/WWW/Library/Implementation/HTNews.h new file mode 100644 index 00000000..ef9a6e49 --- /dev/null +++ b/WWW/Library/Implementation/HTNews.h @@ -0,0 +1,60 @@ +/* Network News Transfer protocol module for the WWW library + HTNEWS + + */ +/* History: + * 26 Sep 90 Written TBL in Objective-C + * 29 Nov 91 Downgraded to C, for portable implementation. + */ + +#ifndef HTNEWS_H +#define HTNEWS_H + +#include <HTAccess.h> +#include <HTAnchor.h> + +#ifdef __cplusplus +extern "C" { +#endif + extern int HTNewsChunkSize; + extern int HTNewsMaxChunk; + +#ifdef GLOBALREF_IS_MACRO + extern GLOBALREF (HTProtocol, HTNews); + extern GLOBALREF (HTProtocol, HTNNTP); + extern GLOBALREF (HTProtocol, HTNewsPost); + extern GLOBALREF (HTProtocol, HTNewsReply); + extern GLOBALREF (HTProtocol, HTSNews); + extern GLOBALREF (HTProtocol, HTSNewsPost); + extern GLOBALREF (HTProtocol, HTSNewsReply); + +#else + GLOBALREF HTProtocol HTNews; + GLOBALREF HTProtocol HTNNTP; + GLOBALREF HTProtocol HTNewsPost; + GLOBALREF HTProtocol HTNewsReply; + GLOBALREF HTProtocol HTSNews; + GLOBALREF HTProtocol HTSNewsPost; + GLOBALREF HTProtocol HTSNewsReply; +#endif /* GLOBALREF_IS_MACRO */ + + extern void HTSetNewsHost(const char *value); + extern const char *HTGetNewsHost(void); + extern char *HTNewsHost; + + extern void HTClearNNTPAuthInfo(void); + +#ifdef USE_SSL + extern SSL_CTX *ssl_ctx; + + extern int HTNewsProxyConnect(int sock, + const char *url, + HTParentAnchor *anAnchor, + HTFormat format_out, + HTStream *sink); +#endif + +#ifdef __cplusplus +} +#endif +#endif /* HTNEWS_H */ diff --git a/WWW/Library/Implementation/HTParse.c b/WWW/Library/Implementation/HTParse.c new file mode 100644 index 00000000..0338dfd4 --- /dev/null +++ b/WWW/Library/Implementation/HTParse.c @@ -0,0 +1,1348 @@ +/* + * $LynxId: HTParse.c,v 1.70 2012/02/09 19:57:37 tom Exp $ + * + * Parse HyperText Document Address HTParse.c + * ================================ + */ + +#include <HTUtils.h> +#include <HTParse.h> + +#include <LYUtils.h> +#include <LYLeaks.h> +#include <LYStrings.h> +#include <LYCharUtils.h> +#include <LYGlobalDefs.h> + +#ifdef HAVE_ALLOCA_H +#include <alloca.h> +#else +#ifdef __MINGW32__ +#include <malloc.h> +#endif /* __MINGW32__ */ +#endif + +#ifdef USE_IDNA +#include <idna.h> +#endif + +#define HEX_ESCAPE '%' + +struct struct_parts { + char *access; + char *host; + char *absolute; + char *relative; + char *search; /* treated normally as part of path */ + char *anchor; +}; + +#if 0 /* for debugging */ +static void show_parts(const char *name, struct struct_parts *parts, int line) +{ + if (TRACE) { + CTRACE((tfp, "struct_parts(%s) %s@%d\n", name, __FILE__, line)); + CTRACE((tfp, " access '%s'\n", NONNULL(parts->access))); + CTRACE((tfp, " host '%s'\n", NONNULL(parts->host))); + CTRACE((tfp, " absolute '%s'\n", NONNULL(parts->absolute))); + CTRACE((tfp, " relative '%s'\n", NONNULL(parts->relative))); + CTRACE((tfp, " search '%s'\n", NONNULL(parts->search))); + CTRACE((tfp, " anchor '%s'\n", NONNULL(parts->anchor))); + } +} +#define SHOW_PARTS(name) show_parts(#name, &name, __LINE__) +#else +#define SHOW_PARTS(name) /* nothing */ +#endif + +/* Strip white space off a string. HTStrip() + * ------------------------------- + * + * On exit, + * Return value points to first non-white character, or to 0 if none. + * All trailing white space is OVERWRITTEN with zero. + */ +char *HTStrip(char *s) +{ +#define SPACE(c) ((c == ' ') || (c == '\t') || (c == '\n')) + char *p; + + for (p = s; *p; p++) { /* Find end of string */ + ; + } + for (p--; p >= s; p--) { + if (SPACE(*p)) + *p = '\0'; /* Zap trailing blanks */ + else + break; + } + while (SPACE(*s)) + s++; /* Strip leading blanks */ + return s; +} + +/* Scan a filename for its constituents. scan() + * ------------------------------------- + * + * On entry, + * name points to a document name which may be incomplete. + * On exit, + * absolute or relative may be nonzero (but not both). + * host, anchor and access may be nonzero if they were specified. + * Any which are nonzero point to zero terminated strings. + */ +static void scan(char *name, + struct struct_parts *parts) +{ + char *after_access; + char *p; + + parts->access = NULL; + parts->host = NULL; + parts->absolute = NULL; + parts->relative = NULL; + parts->search = NULL; /* normally not used - kw */ + parts->anchor = NULL; + + /* + * Scan left-to-right for a scheme (access). + */ + after_access = name; + for (p = name; *p; p++) { + if (*p == ':') { + *p = '\0'; + parts->access = name; /* Access name has been specified */ + after_access = (p + 1); + break; + } + if (*p == '/' || *p == '#' || *p == ';' || *p == '?') + break; + } + + /* + * Scan left-to-right for a fragment (anchor). + */ + for (p = after_access; *p; p++) { + if (*p == '#') { + parts->anchor = (p + 1); + *p = '\0'; /* terminate the rest */ + break; /* leave things after first # alone - kw */ + } + } + + /* + * Scan left-to-right for a host or absolute path. + */ + p = after_access; + if (*p == '/') { + if (p[1] == '/') { + parts->host = (p + 2); /* host has been specified */ + *p = '\0'; /* Terminate access */ + p = strchr(parts->host, '/'); /* look for end of host name if any */ + if (p != NULL) { + *p = '\0'; /* Terminate host */ + parts->absolute = (p + 1); /* Root has been found */ + } else { + p = strchr(parts->host, '?'); + if (p != NULL) { + *p = '\0'; /* Terminate host */ + parts->search = (p + 1); + } + } + } else { + parts->absolute = (p + 1); /* Root found but no host */ + } + } else { + parts->relative = (*after_access) ? + after_access : NULL; /* NULL for "" */ + } + + /* + * Check schemes that commonly have unescaped hashes. + */ + if (parts->access && parts->anchor && + /* optimize */ strchr("lnsdLNSD", *parts->access) != NULL) { + if ((!parts->host && strcasecomp(parts->access, "lynxcgi")) || + !strcasecomp(parts->access, "nntp") || + !strcasecomp(parts->access, "snews") || + !strcasecomp(parts->access, "news") || + !strcasecomp(parts->access, "data")) { + /* + * Access specified but no host and not a lynxcgi URL, so the + * anchor may not really be one, e.g., news:j462#36487@foo.bar, or + * it's an nntp or snews URL, or news URL with a host. Restore the + * '#' in the address. + */ + /* but only if we have found a path component of which this will + * become part. - kw */ + if (parts->relative || parts->absolute) { + *(parts->anchor - 1) = '#'; + parts->anchor = NULL; + } + } + } +} /*scan */ + +#if defined(HAVE_ALLOCA) && !defined(LY_FIND_LEAKS) +#define LYalloca(x) alloca(x) +#define LYalloca_free(x) {} +#else +#define LYalloca(x) malloc(x) +#define LYalloca_free(x) free(x) +#endif + +static char *strchr_or_end(char *string, int ch) +{ + char *result = strchr(string, ch); + + if (result == 0) { + result = string + strlen(string); + } + return result; +} + +/* + * Given a host specification that may end with a port number, e.g., + * foobar:123 + * point to the ':' which begins the ":port" to make it simple to handle the + * substring. + * + * If no port is found (or a syntax error), return null. + */ +char *HTParsePort(char *host, int *portp) +{ + int brackets = 0; + char *result = NULL; + + *portp = 0; + if (host != NULL) { + while (*host != '\0' && result == 0) { + switch (*host++) { + case ':': + if (brackets == 0 && isdigit(UCH(*host))) { + char *next = NULL; + + *portp = (int) strtol(host, &next, 10); + if (next != 0 && next != host && *next == '\0') { + result = (host - 1); + CTRACE((tfp, "HTParsePort %d\n", *portp)); + } + } + break; + case '[': /* for ipv6 */ + ++brackets; + break; + case ']': /* for ipv6 */ + --brackets; + break; + } + } + } + return result; +} + +#ifdef USE_IDNA +static int hex_decode(int ch) +{ + int result = -1; + + if (ch >= '0' && ch <= '9') + result = (ch - '0'); + else if (ch >= 'a' && ch <= 'f') + result = (ch - 'a') + 10; + else if (ch >= 'A' && ch <= 'F') + result = (ch - 'A') + 10; + return result; +} + +/* + * Convert in-place the given hostname to IDNA form. That requires up to 64 + * characters, and we've allowed for that, with MIN_PARSE. + */ +static void convert_to_idna(char *host) +{ + size_t length = strlen(host); + char *endhost = host + length; + char *buffer = malloc(length + 1); + char *output = NULL; + char *src, *dst; + int code; + int hi, lo; + + if (buffer != 0) { + code = TRUE; + for (dst = buffer, src = host; src < endhost; ++dst) { + int ch = *src++; + + if (ch == HEX_ESCAPE) { + if ((src + 1) < endhost + && (hi = hex_decode(src[0])) >= 0 + && (lo = hex_decode(src[1])) >= 0) { + + *dst = (char) ((hi << 4) | lo); + src += 2; + } else { + CTRACE((tfp, "convert_to_idna: `%s' is malformed\n", host)); + code = FALSE; + break; + } + } else { + *dst = (char) ch; + } + } + if (code) { + *dst = '\0'; + code = idna_to_ascii_8z(buffer, &output, IDNA_USE_STD3_ASCII_RULES); + if (code == IDNA_SUCCESS) { + strcpy(host, output); + } else { + CTRACE((tfp, "convert_to_idna: `%s': %s\n", + buffer, + idna_strerror((Idna_rc) code))); + } + if (output) /* "(free)" to bypass LYLeaks.c */ + (free) (output); + } + free(buffer); + } +} +#define MIN_PARSE 80 +#else +#define MIN_PARSE 8 +#endif + +/* Parse a Name relative to another name. HTParse() + * -------------------------------------- + * + * This returns those parts of a name which are given (and requested) + * substituting bits from the related name where necessary. + * + * On entry, + * aName A filename given + * relatedName A name relative to which aName is to be parsed + * wanted A mask for the bits which are wanted. + * + * On exit, + * returns A pointer to a malloc'd string which MUST BE FREED + */ +char *HTParse(const char *aName, + const char *relatedName, + int wanted) +{ + char *result = NULL; + char *tail = NULL; /* a pointer to the end of the 'result' string */ + char *return_value = NULL; + size_t len, len1, len2; + size_t need; + char *name = NULL; + char *rel = NULL; + char *p, *q; + char *acc_method; + struct struct_parts given, related; + + CTRACE((tfp, "HTParse: aName:`%s'\n", aName)); + CTRACE((tfp, " relatedName:`%s'\n", relatedName)); + + if (wanted & (PARSE_STRICTPATH | PARSE_QUERY)) { /* if detail wanted... */ + if ((wanted & (PARSE_STRICTPATH | PARSE_QUERY)) + == (PARSE_STRICTPATH | PARSE_QUERY)) /* if strictpath AND query */ + wanted |= PARSE_PATH; /* then treat as if PARSE_PATH wanted */ + if (wanted & PARSE_PATH) /* if PARSE_PATH wanted */ + wanted &= ~(PARSE_STRICTPATH | PARSE_QUERY); /* ignore details */ + } +/* *INDENT-OFF* */ + CTRACE((tfp, " want:%s%s%s%s%s%s%s\n", + wanted & PARSE_PUNCTUATION ? " punc" : "", + wanted & PARSE_ANCHOR ? " anchor" : "", + wanted & PARSE_PATH ? " path" : "", + wanted & PARSE_HOST ? " host" : "", + wanted & PARSE_ACCESS ? " access" : "", + wanted & PARSE_STRICTPATH ? " PATH" : "", + wanted & PARSE_QUERY ? " QUERY" : "")); +/* *INDENT-ON* */ + + /* + * Allocate the temporary string. Optimized. + */ + len1 = strlen(aName) + 1; + len2 = strlen(relatedName) + 1; + len = len1 + len2 + MIN_PARSE; /* Lots of space: more than enough */ + + need = (len * 2 + len1 + len2); + if (need > (size_t) max_uri_size || + (int) need < (int) len1 || + (int) need < (int) len2) + return StrAllocCopy(return_value, ""); + + result = tail = (char *) LYalloca(need); + if (result == NULL) { + outofmem(__FILE__, "HTParse"); + + assert(result != NULL); + } + *result = '\0'; + name = result + len; + rel = name + len1; + + /* + * Make working copy of the input string to cut up. + */ + MemCpy(name, aName, len1); + + /* + * Cut up the string into URL fields. + */ + scan(name, &given); + SHOW_PARTS(given); + + /* + * Now related string. + */ + if ((given.access && given.host && given.absolute) || !*relatedName) { + /* + * Inherit nothing! + */ + related.access = NULL; + related.host = NULL; + related.absolute = NULL; + related.relative = NULL; + related.search = NULL; + related.anchor = NULL; + } else { + MemCpy(rel, relatedName, len2); + scan(rel, &related); + } + SHOW_PARTS(related); + + /* + * Handle the scheme (access) field. + */ + if (given.access && given.host && !given.relative && !given.absolute) { + if (!strcmp(given.access, "http") || + !strcmp(given.access, "https") || + !strcmp(given.access, "ftp")) { + + /* + * Assume root. + */ + given.absolute = empty_string; + } + } + acc_method = given.access ? given.access : related.access; + if (wanted & PARSE_ACCESS) { + if (acc_method) { + strcpy(tail, acc_method); + tail += strlen(tail); + if (wanted & PARSE_PUNCTUATION) { + *tail++ = ':'; + *tail = '\0'; + } + } + } + + /* + * If different schemes, inherit nothing. + * + * We'll try complying with RFC 1808 and the Fielding draft, and inherit + * nothing if both schemes are given, rather than only when they differ, + * except for file URLs - FM + * + * After trying it for a while, it's still premature, IHMO, to go along + * with it, so this is back to inheriting for identical schemes whether or + * not they are "file". If you want to try it again yourself, uncomment + * the strcasecomp() below. - FM + */ + if ((given.access && related.access) && + ( /* strcasecomp(given.access, "file") || */ + strcmp(given.access, related.access))) { + related.host = NULL; + related.absolute = NULL; + related.relative = NULL; + related.search = NULL; + related.anchor = NULL; + } + + /* + * Handle the host field. + */ + if (wanted & PARSE_HOST) { + if (given.host || related.host) { + if (wanted & PARSE_PUNCTUATION) { + *tail++ = '/'; + *tail++ = '/'; + } + strcpy(tail, given.host ? given.host : related.host); +#define CLEAN_URLS +#ifdef CLEAN_URLS + /* + * Ignore default port numbers, and trailing dots on FQDNs, which + * will only cause identical addresses to look different. (related + * is already a clean url). + */ + { + char *p2, *h; + int portnumber; + + if ((p2 = strchr(result, '@')) != NULL) + tail = (p2 + 1); + p2 = HTParsePort(result, &portnumber); + if (p2 != NULL && acc_method != NULL) { + /* + * Port specified. + */ +#define ACC_METHOD(a,b) (!strcmp(acc_method, a) && (portnumber == b)) + if (ACC_METHOD("http", 80) || + ACC_METHOD("https", 443) || + ACC_METHOD("gopher", 70) || + ACC_METHOD("ftp", 21) || + ACC_METHOD("wais", 210) || + ACC_METHOD("nntp", 119) || + ACC_METHOD("news", 119) || + ACC_METHOD("newspost", 119) || + ACC_METHOD("newsreply", 119) || + ACC_METHOD("snews", 563) || + ACC_METHOD("snewspost", 563) || + ACC_METHOD("snewsreply", 563) || + ACC_METHOD("finger", 79) || + ACC_METHOD("telnet", 23) || + ACC_METHOD("tn3270", 23) || + ACC_METHOD("rlogin", 513) || + ACC_METHOD("cso", 105)) + *p2 = '\0'; /* It is the default: ignore it */ + } + if (p2 == NULL) { + int len3 = (int) strlen(tail); + + if (len3 > 0) { + h = tail + len3 - 1; /* last char of hostname */ + if (*h == '.') + *h = '\0'; /* chop final . */ + } + } else if (p2 != result) { + h = p2; + h--; /* End of hostname */ + if (*h == '.') { + /* + * Slide p2 over h. + */ + while (*p2 != '\0') + *h++ = *p2++; + *h = '\0'; /* terminate */ + } + } + } +#ifdef USE_IDNA + /* + * Depending on locale-support, we could have a literal UTF-8 + * string as a host name, or a URL-encoded form of that. + */ + convert_to_idna(tail); +#endif +#endif /* CLEAN_URLS */ + } + } + + /* + * Trim any blanks from the result so far - there's no excuse for blanks + * in a hostname. Also update the tail here. + */ + tail = LYRemoveBlanks(result); + + /* + * If host in given or related was ended directly with a '?' (no slash), + * fake the search part into absolute. This is the only case search is + * returned from scan. A host must have been present. this restores the + * '?' at which the host part had been truncated in scan, we have to do + * this after host part handling is done. - kw + */ + if (given.search && *(given.search - 1) == '\0') { + given.absolute = given.search - 1; + given.absolute[0] = '?'; + } else if (related.search && !related.absolute && + *(related.search - 1) == '\0') { + related.absolute = related.search - 1; + related.absolute[0] = '?'; + } + + /* + * If different hosts, inherit no path. + */ + if (given.host && related.host) + if (strcmp(given.host, related.host) != 0) { + related.absolute = NULL; + related.relative = NULL; + related.anchor = NULL; + } + + /* + * Handle the path. + */ + if (wanted & (PARSE_PATH | PARSE_STRICTPATH | PARSE_QUERY)) { + int want_detail = (wanted & (PARSE_STRICTPATH | PARSE_QUERY)); + + if (acc_method && !given.absolute && given.relative) { + /* + * Treat all given nntp or snews paths, or given paths for news + * URLs with a host, as absolute. + */ + switch (*acc_method) { + case 'N': + case 'n': + if (!strcasecomp(acc_method, "nntp") || + (!strcasecomp(acc_method, "news") && + !strncasecomp(result, "news://", 7))) { + given.absolute = given.relative; + given.relative = NULL; + } + break; + case 'S': + case 's': + if (!strcasecomp(acc_method, "snews")) { + given.absolute = given.relative; + given.relative = NULL; + } + break; + } + } + + if (given.absolute) { /* All is given */ + if (wanted & PARSE_PUNCTUATION) + *tail++ = '/'; + strcpy(tail, given.absolute); + CTRACE((tfp, "HTParse: (ABS)\n")); + } else if (related.absolute) { /* Adopt path not name */ + char *base = tail; + + *tail++ = '/'; + strcpy(tail, related.absolute); + if (given.relative) { + /* RFC 1808 part 4 step 5 (if URL path is empty) */ + /* a) if given has params, add/replace that */ + if (given.relative[0] == ';') { + strcpy(strchr_or_end(tail, ';'), given.relative); + } + /* b) if given has query, add/replace that */ + else if (given.relative[0] == '?') { + strcpy(strchr_or_end(tail, '?'), given.relative); + } + /* otherwise fall through to RFC 1808 part 4 step 6 */ + else { + p = strchr(tail, '?'); /* Search part? */ + if (p == NULL) + p = (tail + strlen(tail) - 1); + for (; *p != '/'; p--) ; /* last / */ + p[1] = '\0'; /* Remove filename */ + strcat(p, given.relative); /* Add given one */ + } + HTSimplify(base); + if (*base == '\0') + strcpy(base, "/"); + } + CTRACE((tfp, "HTParse: (Related-ABS)\n")); + } else if (given.relative) { + strcpy(tail, given.relative); /* what we've got */ + CTRACE((tfp, "HTParse: (REL)\n")); + } else if (related.relative) { + strcpy(tail, related.relative); + CTRACE((tfp, "HTParse: (Related-REL)\n")); + } else { /* No inheritance */ + if (!isLYNXCGI(aName) && + !isLYNXEXEC(aName) && + !isLYNXPROG(aName)) { + *tail++ = '/'; + *tail = '\0'; + } + if (!strcmp(result, "news:/")) + result[5] = '*'; + CTRACE((tfp, "HTParse: (No inheritance)\n")); + } + if (want_detail) { + p = strchr(tail, '?'); /* Search part? */ + if (p) { + if (PARSE_STRICTPATH) { + *p = '\0'; + } else { + if (!(wanted & PARSE_PUNCTUATION)) + p++; + do { + *tail++ = *p; + } while (*p++); + } + } else { + if (wanted & PARSE_QUERY) + *tail = '\0'; + } + } + } + + /* + * Handle the fragment (anchor). Never inherit. + */ + if (wanted & PARSE_ANCHOR) { + if (given.anchor && *given.anchor) { + tail += strlen(tail); + if (wanted & PARSE_PUNCTUATION) + *tail++ = '#'; + strcpy(tail, given.anchor); + } + } + + /* + * If there are any blanks remaining in the string, escape them as needed. + * See the discussion in LYLegitimizeHREF() for example. + */ + if ((p = strchr(result, ' ')) != 0) { + switch (is_url(result)) { + case UNKNOWN_URL_TYPE: + CTRACE((tfp, "HTParse: ignore:`%s'\n", result)); + break; + case LYNXEXEC_URL_TYPE: + case LYNXPROG_URL_TYPE: + case LYNXCGI_URL_TYPE: + case LYNXPRINT_URL_TYPE: + case LYNXHIST_URL_TYPE: + case LYNXDOWNLOAD_URL_TYPE: + case LYNXKEYMAP_URL_TYPE: + case LYNXIMGMAP_URL_TYPE: + case LYNXCOOKIE_URL_TYPE: + case LYNXCACHE_URL_TYPE: + case LYNXDIRED_URL_TYPE: + case LYNXOPTIONS_URL_TYPE: + case LYNXCFG_URL_TYPE: + case LYNXCOMPILE_OPTS_URL_TYPE: + case LYNXMESSAGES_URL_TYPE: + CTRACE((tfp, "HTParse: spaces:`%s'\n", result)); + break; + case NOT_A_URL_TYPE: + default: + CTRACE((tfp, "HTParse: encode:`%s'\n", result)); + do { + q = p + strlen(p) + 2; + + while (q != p + 1) { + q[0] = q[-2]; + --q; + } + p[0] = HEX_ESCAPE; + p[1] = '2'; + p[2] = '0'; + } while ((p = strchr(result, ' ')) != 0); + break; + } + } + CTRACE((tfp, "HTParse: result:`%s'\n", result)); + + StrAllocCopy(return_value, result); + LYalloca_free(result); + + /* FIXME: could be optimized using HTParse() internals */ + if (*relatedName && + ((wanted & PARSE_ALL_WITHOUT_ANCHOR) == PARSE_ALL_WITHOUT_ANCHOR)) { + /* + * Check whether to fill in localhost. - FM + */ + LYFillLocalFileURL(&return_value, relatedName); + CTRACE((tfp, "pass LYFillLocalFile:`%s'\n", return_value)); + } + + return return_value; /* exactly the right length */ +} + +/* HTParseAnchor(), fast HTParse() specialization + * ---------------------------------------------- + * + * On exit, + * returns A pointer within input string (probably to its end '\0') + */ +const char *HTParseAnchor(const char *aName) +{ + const char *p = aName; + + for (; *p && *p != '#'; p++) { + ; + } + if (*p == '#') { + /* the safe way based on HTParse() - + * keeping in mind scan() peculiarities on schemes: + */ + struct struct_parts given; + size_t need = ((unsigned) ((p - aName) + (int) strlen(p) + 1)); + char *name; + + if (need > (size_t) max_uri_size) { + p += strlen(p); + } else { + name = (char *) LYalloca(need); + + if (name == NULL) { + outofmem(__FILE__, "HTParseAnchor"); + + assert(name != NULL); + } + strcpy(name, aName); + scan(name, &given); + LYalloca_free(name); + + p++; /*next to '#' */ + if (given.anchor == NULL) { + for (; *p; p++) /*scroll to end '\0' */ + ; + } + } + } + return p; +} + +/* Simplify a filename. HTSimplify() + * -------------------- + * + * A unix-style file is allowed to contain the sequence xxx/../ which may + * be replaced by "" , and the sequence "/./" which may be replaced by "/". + * Simplification helps us recognize duplicate filenames. + * + * Thus, /etc/junk/../fred becomes /etc/fred + * /etc/junk/./fred becomes /etc/junk/fred + * + * but we should NOT change + * http://fred.xxx.edu/../.. + * + * or ../../albert.html + */ +void HTSimplify(char *filename) +{ + char *p; + char *q, *q1; + + if (filename == NULL) + return; + + if (!(filename[0] && filename[1]) || + filename[0] == '?' || filename[1] == '?' || filename[2] == '?') + return; + + if (strchr(filename, '/') != NULL) { + for (p = (filename + 2); *p; p++) { + if (*p == '?') { + /* + * We're still treating a ?searchpart as part of the path in + * HTParse() and scan(), but if we encounter a '?' here, assume + * it's the delimiter and break. We also could check for a + * parameter delimiter (';') here, but the current Fielding + * draft (wisely or ill-advisedly :) says that it should be + * ignored and collapsing be allowed in it's value). The only + * defined parameter at present is ;type=[A, I, or D] for ftp + * URLs, so if there's a "/..", "/../", "/./", or terminal '.' + * following the ';', it must be due to the ';' being an + * unescaped path character and not actually a parameter + * delimiter. - FM + */ + break; + } + if (*p == '/') { + if ((p[1] == '.') && (p[2] == '.') && + (p[3] == '/' || p[3] == '?' || p[3] == '\0')) { + /* + * Handle "../", "..?" or "..". + */ + for (q = (p - 1); (q >= filename) && (*q != '/'); q--) + /* + * Back up to previous slash or beginning of string. + */ + ; + if ((q[0] == '/') && + (StrNCmp(q, "/../", 4) && + StrNCmp(q, "/..?", 4)) && + !((q - 1) > filename && q[-1] == '/')) { + /* + * Not at beginning of string or in a host field, so + * remove the "/xxx/..". + */ + q1 = (p + 3); + p = q; + while (*q1 != '\0') + *p++ = *q1++; + *p = '\0'; /* terminate */ + /* + * Start again with previous slash. + */ + p = (q - 1); + } + } else if (p[1] == '.' && p[2] == '/') { + /* + * Handle "./" by removing both characters. + */ + q = p; + q1 = (p + 2); + while (*q1 != '\0') + *q++ = *q1++; + *q = '\0'; /* terminate */ + p--; + } else if (p[1] == '.' && p[2] == '?') { + /* + * Handle ".?" by removing the dot. + */ + q = (p + 1); + q1 = (p + 2); + while (*q1 != '\0') + *q++ = *q1++; + *q = '\0'; /* terminate */ + p--; + } else if (p[1] == '.' && p[2] == '\0') { + /* + * Handle terminal "." by removing the character. + */ + p[1] = '\0'; + } + } + } + if (p >= filename + 2 && *p == '?' && *(p - 1) == '.') { + if (*(p - 2) == '/') { + /* + * Handle "/.?" by removing the dot. + */ + q = p - 1; + q1 = p; + while (*q1 != '\0') + *q++ = *q1++; + *q = '\0'; + } else if (*(p - 2) == '.' && + p >= filename + 4 && *(p - 3) == '/' && + (*(p - 4) != '/' || + (p > filename + 4 && *(p - 5) != ':'))) { + /* + * Handle "xxx/..?" + */ + for (q = (p - 4); (q > filename) && (*q != '/'); q--) + /* + * Back up to previous slash or beginning of string. + */ + ; + if (*q == '/') { + if (q > filename && *(q - 1) == '/' && + !(q > filename + 1 && *(q - 1) != ':')) + return; + q++; + } + if (StrNCmp(q, "../", 3) && StrNCmp(q, "./", 2)) { + /* + * Not after "//" at beginning of string or after "://", + * and xxx is not ".." or ".", so remove the "xxx/..". + */ + q1 = p; + p = q; + while (*q1 != '\0') + *p++ = *q1++; + *p = '\0'; /* terminate */ + } + } + } + } +} + +/* Make Relative Name. HTRelative() + * ------------------- + * + * This function creates and returns a string which gives an expression of + * one address as related to another. Where there is no relation, an absolute + * address is returned. + * + * On entry, + * Both names must be absolute, fully qualified names of nodes + * (no anchor bits) + * + * On exit, + * The return result points to a newly allocated name which, if + * parsed by HTParse relative to relatedName, will yield aName. + * The caller is responsible for freeing the resulting name later. + * + */ +char *HTRelative(const char *aName, + const char *relatedName) +{ + char *result = NULL; + const char *p = aName; + const char *q = relatedName; + const char *after_access = NULL; + const char *path = NULL; + const char *last_slash = NULL; + int slashes = 0; + + for (; *p; p++, q++) { /* Find extent of match */ + if (*p != *q) + break; + if (*p == ':') + after_access = p + 1; + if (*p == '/') { + last_slash = p; + slashes++; + if (slashes == 3) + path = p; + } + } + + /* q, p point to the first non-matching character or zero */ + + if (!after_access) { /* Different access */ + StrAllocCopy(result, aName); + } else if (slashes < 3) { /* Different nodes */ + StrAllocCopy(result, after_access); + } else if (slashes == 3) { /* Same node, different path */ + StrAllocCopy(result, path); + } else { /* Some path in common */ + unsigned levels = 0; + + for (; *q && (*q != '#'); q++) + if (*q == '/') + levels++; + result = typecallocn(char, 3 * levels + strlen(last_slash) + 1); + + if (result == NULL) + outofmem(__FILE__, "HTRelative"); + + assert(result != NULL); + + result[0] = '\0'; + for (; levels; levels--) + strcat(result, "../"); + strcat(result, last_slash + 1); + } + CTRACE((tfp, + "HTparse: `%s' expressed relative to\n `%s' is\n `%s'.\n", + aName, relatedName, result)); + return result; +} + +#define AlloCopy(next,base,extra) \ + typecallocn(char, ((next - base) + ((int) extra))) + +/* Escape undesirable characters using % HTEscape() + * ------------------------------------- + * + * This function takes a pointer to a string in which + * some characters may be unacceptable unescaped. + * It returns a string which has these characters + * represented by a '%' character followed by two hex digits. + * + * Unlike HTUnEscape(), this routine returns a calloc'd string. + */ +/* *INDENT-OFF* */ +static const unsigned char isAcceptable[96] = + +/* Bit 0 xalpha -- see HTFile.h + * Bit 1 xpalpha -- as xalpha but with plus. + * Bit 2 ... path -- as xpalphas but with / + */ + /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ + { 0,0,0,0,0,0,0,0,0,0,7,6,0,7,7,4, /* 2x !"#$%&'()*+,-./ */ + 7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */ + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */ + 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */ + 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */ + 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0 }; /* 7X pqrstuvwxyz{|}~ DEL */ +/* *INDENT-ON* */ + +static const char *hex = "0123456789ABCDEF"; + +#define ACCEPTABLE(a) ( a>=32 && a<128 && ((isAcceptable[a-32]) & mask)) + +char *HTEscape(const char *str, + unsigned mask) +{ + const char *p; + char *q; + char *result; + size_t unacceptable = 0; + + for (p = str; *p; p++) + if (!ACCEPTABLE(UCH(TOASCII(*p)))) + unacceptable++; + result = AlloCopy(p, str, (unacceptable * 2) + 1); + + if (result == NULL) + outofmem(__FILE__, "HTEscape"); + + assert(result != NULL); + + for (q = result, p = str; *p; p++) { + unsigned char a = UCH(TOASCII(*p)); + + if (!ACCEPTABLE(a)) { + *q++ = HEX_ESCAPE; /* Means hex coming */ + *q++ = hex[a >> 4]; + *q++ = hex[a & 15]; + } else + *q++ = *p; + } + *q = '\0'; /* Terminate */ + return result; +} + +/* Escape unsafe characters using % HTEscapeUnsafe() + * -------------------------------- + * + * This function takes a pointer to a string in which + * some characters may be that may be unsafe are unescaped. + * It returns a string which has these characters + * represented by a '%' character followed by two hex digits. + * + * Unlike HTUnEscape(), this routine returns a malloc'd string. + */ +#define UNSAFE(ch) (((ch) <= 32) || ((ch) >= 127)) + +char *HTEscapeUnsafe(const char *str) +{ + const char *p; + char *q; + char *result; + size_t unacceptable = 0; + + for (p = str; *p; p++) + if (UNSAFE(UCH(TOASCII(*p)))) + unacceptable++; + result = AlloCopy(p, str, (unacceptable * 2) + 1); + + if (result == NULL) + outofmem(__FILE__, "HTEscapeUnsafe"); + + assert(result != NULL); + + for (q = result, p = str; *p; p++) { + unsigned char a = UCH(TOASCII(*p)); + + if (UNSAFE(a)) { + *q++ = HEX_ESCAPE; /* Means hex coming */ + *q++ = hex[a >> 4]; + *q++ = hex[a & 15]; + } else + *q++ = *p; + } + *q = '\0'; /* Terminate */ + return result; +} + +/* Escape undesirable characters using % but space to +. HTEscapeSP() + * ----------------------------------------------------- + * + * This function takes a pointer to a string in which + * some characters may be unacceptable unescaped. + * It returns a string which has these characters + * represented by a '%' character followed by two hex digits, + * except that spaces are converted to '+' instead of %2B. + * + * Unlike HTUnEscape(), this routine returns a calloced string. + */ +char *HTEscapeSP(const char *str, + unsigned mask) +{ + const char *p; + char *q; + char *result; + size_t unacceptable = 0; + + for (p = str; *p; p++) + if (!(*p == ' ' || ACCEPTABLE(UCH(TOASCII(*p))))) + unacceptable++; + result = AlloCopy(p, str, (unacceptable * 2) + 1); + + if (result == NULL) + outofmem(__FILE__, "HTEscape"); + + assert(result != NULL); + + for (q = result, p = str; *p; p++) { + unsigned char a = UCH(TOASCII(*p)); + + if (a == 32) { + *q++ = '+'; + } else if (!ACCEPTABLE(a)) { + *q++ = HEX_ESCAPE; /* Means hex coming */ + *q++ = hex[a >> 4]; + *q++ = hex[a & 15]; + } else { + *q++ = *p; + } + } + *q = '\0'; /* Terminate */ + return result; +} + +/* Decode %xx escaped characters. HTUnEscape() + * ------------------------------ + * + * This function takes a pointer to a string in which some + * characters may have been encoded in %xy form, where xy is + * the ASCII hex code for character 16x+y. + * The string is converted in place, as it will never grow. + */ +static char from_hex(int c) +{ + return (char) (c >= '0' && c <= '9' ? c - '0' + : c >= 'A' && c <= 'F' ? c - 'A' + 10 + : c - 'a' + 10); /* accept small letters just in case */ +} + +char *HTUnEscape(char *str) +{ + char *p = str; + char *q = str; + + if (!(p && *p)) + return str; + + while (*p != '\0') { + if (*p == HEX_ESCAPE && + /* + * Tests shouldn't be needed, but better safe than sorry. + */ + p[1] && p[2] && + isxdigit(UCH(p[1])) && + isxdigit(UCH(p[2]))) { + p++; + if (*p) + *q = (char) (from_hex(*p++) * 16); + if (*p) { + /* + * Careful! FROMASCII() may evaluate its arg more than once! + */ + /* S/390 -- gil -- 0221 */ + *q = (char) (*q + from_hex(*p++)); + } + *q = FROMASCII(*q); + q++; + } else { + *q++ = *p++; + } + } + + *q = '\0'; + return str; + +} /* HTUnEscape */ + +/* Decode some %xx escaped characters. HTUnEscapeSome() + * ----------------------------------- Klaus Weide + * (kweide@tezcat.com) + * This function takes a pointer to a string in which some + * characters may have been encoded in %xy form, where xy is + * the ASCII hex code for character 16x+y, and a pointer to + * a second string containing one or more characters which + * should be unescaped if escaped in the first string. + * The first string is converted in place, as it will never grow. + */ +char *HTUnEscapeSome(char *str, + const char *do_trans) +{ + char *p = str; + char *q = str; + char testcode; + + if (p == NULL || *p == '\0' || do_trans == NULL || *do_trans == '\0') + return str; + + while (*p != '\0') { + if (*p == HEX_ESCAPE && + p[1] && p[2] && /* tests shouldn't be needed, but.. */ + isxdigit(UCH(p[1])) && + isxdigit(UCH(p[2])) && + (testcode = (char) FROMASCII(from_hex(p[1]) * 16 + + from_hex(p[2]))) && /* %00 no good */ + strchr(do_trans, testcode)) { /* it's one of the ones we want */ + *q++ = testcode; + p += 3; + } else { + *q++ = *p++; + } + } + + *q = '\0'; + return str; + +} /* HTUnEscapeSome */ +/* *INDENT-OFF* */ +static const unsigned char crfc[96] = + +/* Bit 0 xalpha -- need "quoting" + * Bit 1 xpalpha -- need \escape if quoted + */ + /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ + { 1,0,3,0,0,0,0,0,1,1,0,0,1,0,1,0, /* 2x !"#$%&'()*+,-./ */ + 0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,0, /* 3x 0123456789:;<=>? */ + 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 4x @ABCDEFGHIJKLMNO */ + 0,0,0,0,0,0,0,0,0,0,0,1,2,1,0,0, /* 5X PQRSTUVWXYZ[\]^_ */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 6x `abcdefghijklmno */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3 }; /* 7X pqrstuvwxyz{|}~ DEL */ +/* *INDENT-ON* */ + +#define ASCII_TAB '\011' +#define ASCII_LF '\012' +#define ASCII_CR '\015' +#define ASCII_SPC '\040' +#define ASCII_BAK '\134' + +/* + * Turn a string which is not a RFC 822 token into a quoted-string. - KW + * The "quoted" parameter tells whether we need the beginning/ending quote + * marks. If not, the caller will provide them -TD + */ +void HTMake822Word(char **str, + int quoted) +{ + const char *p; + char *q; + char *result; + unsigned char a; + unsigned added = 0; + + if (isEmpty(*str)) { + StrAllocCopy(*str, quoted ? "\"\"" : ""); + return; + } + for (p = *str; *p; p++) { + a = UCH(TOASCII(*p)); /* S/390 -- gil -- 0240 */ + if (a < 32 || a >= 128 || + ((crfc[a - 32]) & 1)) { + if (!added) + added = 2; + if (a >= 160 || a == '\t') + continue; + if (a == '\r' || a == '\n') + added += 2; + else if ((a & 127) < 32 || ((crfc[a - 32]) & 2)) + added++; + } + } + if (!added) + return; + result = AlloCopy(p, *str, added + 1); + if (result == NULL) + outofmem(__FILE__, "HTMake822Word"); + + assert(result != NULL); + + q = result; + if (quoted) + *q++ = '"'; + /* + * Having converted the character to ASCII, we can't use symbolic + * escape codes, since they're in the host character set, which + * is not necessarily ASCII. Thus we use octal escape codes instead. + * -- gil (Paul Gilmartin) <pg@sweng.stortek.com> + */ + /* S/390 -- gil -- 0268 */ + for (p = *str; *p; p++) { + a = UCH(TOASCII(*p)); + if ((a != ASCII_TAB) && + ((a & 127) < ASCII_SPC || + (a < 128 && ((crfc[a - 32]) & 2)))) + *q++ = ASCII_BAK; + *q++ = *p; + if (a == ASCII_LF || + (a == ASCII_CR && (TOASCII(*(p + 1)) != ASCII_LF))) + *q++ = ' '; + } + if (quoted) + *q++ = '"'; + *q = '\0'; /* Terminate */ + FREE(*str); + *str = result; +} diff --git a/WWW/Library/Implementation/HTParse.h b/WWW/Library/Implementation/HTParse.h new file mode 100644 index 00000000..3f427c41 --- /dev/null +++ b/WWW/Library/Implementation/HTParse.h @@ -0,0 +1,202 @@ +/* + * $LynxId: HTParse.h,v 1.21 2010/09/24 22:45:23 tom Exp $ + * HTParse: URL parsing in the WWW Library + * HTPARSE + * + * This module of the WWW library contains code to parse URLs and various + * related things. + * Implemented by HTParse.c . + */ +#ifndef HTPARSE_H +#define HTPARSE_H + +#ifndef HTUTILS_H +#include <HTUtils.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif +/* + * The following are flag bits which may be ORed together to form + * a number to give the 'wanted' argument to HTParse. + */ +#define PARSE_ACCESS 16 +#define PARSE_HOST 8 +#define PARSE_PATH 4 +#define PARSE_ANCHOR 2 +#define PARSE_PUNCTUATION 1 +#define PARSE_ALL 31 +#define PARSE_ALL_WITHOUT_ANCHOR (PARSE_ALL ^ PARSE_ANCHOR) +/* + * Additional flag bits for more details on components already + * covered by the above. The PARSE_PATH above doesn't really + * strictly refer to the path component in the sense of the URI + * specs only, but rather to that combined with a possible query + * component. - kw + */ +#define PARSE_STRICTPATH 32 +#define PARSE_QUERY 64 +/* + * The following are valid mask values. The terms are the BNF names + * in the URL document. + */ +#define URL_XALPHAS UCH(1) +#define URL_XPALPHAS UCH(2) +#define URL_PATH UCH(4) +/* Strip white space off a string. HTStrip() + * ------------------------------- + * + * On exit, + * Return value points to first non-white character, or to 0 if none. + * All trailing white space is OVERWRITTEN with zero. + */ extern char *HTStrip(char *s); + +/* + * Parse a port number + * ------------------- + * + * On entry, + * host A pointer to hostname possibly followed by port + * + * On exit, + * returns A pointer to the ":" before the port + * sets the port number via the pointer portp. + */ + extern char *HTParsePort(char *host, int *portp); + +/* Parse a Name relative to another name. HTParse() + * -------------------------------------- + * + * This returns those parts of a name which are given (and requested) + * substituting bits from the related name where necessary. + * + * On entry, + * aName A filename given + * relatedName A name relative to which aName is to be parsed + * wanted A mask for the bits which are wanted. + * + * On exit, + * returns A pointer to a malloc'd string which MUST BE FREED + */ + extern char *HTParse(const char *aName, + const char *relatedName, + int wanted); + +/* HTParseAnchor(), fast HTParse() specialization + * ---------------------------------------------- + * + * On exit, + * returns A pointer within input string (probably to its end '\0') + */ + extern const char *HTParseAnchor(const char *aName); + +/* Simplify a filename. HTSimplify() + * -------------------- + * + * A unix-style file is allowed to contain the seqeunce xxx/../ which may + * be replaced by "" , and the seqeunce "/./" which may be replaced by "/". + * Simplification helps us recognize duplicate filenames. + * + * Thus, /etc/junk/../fred becomes /etc/fred + * /etc/junk/./fred becomes /etc/junk/fred + * + * but we should NOT change + * http://fred.xxx.edu/../.. + * + * or ../../albert.html + */ + extern void HTSimplify(char *filename); + +/* Make Relative Name. HTRelative() + * ------------------- + * + * This function creates and returns a string which gives an expression of + * one address as related to another. Where there is no relation, an absolute + * address is retured. + * + * On entry, + * Both names must be absolute, fully qualified names of nodes + * (no anchor bits) + * + * On exit, + * The return result points to a newly allocated name which, if + * parsed by HTParse relative to relatedName, will yield aName. + * The caller is responsible for freeing the resulting name later. + * + */ + extern char *HTRelative(const char *aName, + const char *relatedName); + +/* Escape undesirable characters using % HTEscape() + * ------------------------------------- + * + * This function takes a pointer to a string in which + * some characters may be unacceptable are unescaped. + * It returns a string which has these characters + * represented by a '%' character followed by two hex digits. + * + * Unlike HTUnEscape(), this routine returns a malloc'd string. + */ + extern char *HTEscape(const char *str, + unsigned mask); + +/* Escape unsafe characters using % HTEscapeUnsafe() + * -------------------------------- + * + * This function takes a pointer to a string in which + * some characters may be that may be unsafe are unescaped. + * It returns a string which has these characters + * represented by a '%' character followed by two hex digits. + * + * Unlike HTUnEscape(), this routine returns a malloc'd string. + */ + extern char *HTEscapeUnsafe(const char *str); + +/* Escape undesirable characters using % but space to +. HTEscapeSP() + * ----------------------------------------------------- + * + * This function takes a pointer to a string in which + * some characters may be unacceptable are unescaped. + * It returns a string which has these characters + * represented by a '%' character followed by two hex digits, + * except that spaces are converted to '+' instead of %2B. + * + * Unlike HTUnEscape(), this routine returns a malloc'd string. + */ + extern char *HTEscapeSP(const char *str, + unsigned mask); + +/* Decode %xx escaped characters. HTUnEscape() + * ------------------------------ + * + * This function takes a pointer to a string in which some + * characters may have been encoded in %xy form, where xy is + * the acsii hex code for character 16x+y. + * The string is converted in place, as it will never grow. + */ + extern char *HTUnEscape(char *str); + +/* Decode some %xx escaped characters. HTUnEscapeSome() + * ----------------------------------- Klaus Weide + * (kweide@tezcat.com) + * This function takes a pointer to a string in which some + * characters may have been encoded in %xy form, where xy is + * the acsii hex code for character 16x+y, and a pointer to + * a second string containing one or more characters which + * should be unescaped if escaped in the first string. + * The first string is converted in place, as it will never grow. + */ + extern char *HTUnEscapeSome(char *str, + const char *do_trans); + +/* + * Turn a string which is not a RFC 822 token into a quoted-string. - KW + */ + extern void HTMake822Word(char **str, + int quoted); + +#ifdef __cplusplus +} +#endif +#endif /* HTPARSE_H */ diff --git a/WWW/Library/Implementation/HTPlain.c b/WWW/Library/Implementation/HTPlain.c new file mode 100644 index 00000000..b3e80c6f --- /dev/null +++ b/WWW/Library/Implementation/HTPlain.c @@ -0,0 +1,722 @@ +/* + * $LynxId: HTPlain.c,v 1.49 2011/06/11 12:09:07 tom Exp $ + * + * Plain text object HTWrite.c + * ================= + * + * This version of the stream object just writes to a socket. + * The socket is assumed open and left open. + * + * Bugs: + * strings written must be less than buffer size. + */ + +#define HTSTREAM_INTERNAL 1 + +#include <HTUtils.h> +#include <LYCharVals.h> /* S/390 -- gil -- 0288 */ + +#include <HTPlain.h> + +#include <HTChunk.h> +#include <HText.h> +#include <HTStyle.h> +#define Lynx_HTML_Handler +#include <HTML.h> /* styles[] */ + +#define BUFFER_SIZE 4096; /* Tradeoff */ + +#include <HTMLDTD.h> +#include <HTCJK.h> +#include <UCMap.h> +#include <UCDefs.h> +#include <UCAux.h> + +#include <LYCharSets.h> +#include <LYStrings.h> +#include <LYLeaks.h> + +static int HTPlain_lastraw = -1; +static int HTPlain_bs_pending = 0; /* 1:bs 2:underline 3:underline+bs - kw */ + +/* HTML Object + * ----------- + */ +struct _HTStream { + const HTStreamClass *isa; + HText *text; + /* + * The node_anchor UCInfo and handle for the input (PARSER) stage. - FM + */ + LYUCcharset *inUCI; + int inUCLYhndl; + /* + * The node_anchor UCInfo and handle for the output (HTEXT) stage. - FM + */ + LYUCcharset *outUCI; + int outUCLYhndl; + /* + * Counter, value, buffer and pointer for UTF-8 handling. - FM + */ + char utf_count; + UCode_t utf_char; + char utf_buf[8]; + char *utf_buf_p; + /* + * The charset transformation structure. - FM + */ + UCTransParams T; +}; + +static char replace_buf[64]; /* buffer for replacement strings */ + +static void HTPlain_getChartransInfo(HTStream *me, HTParentAnchor *anchor) +{ + if (me->inUCLYhndl < 0) { + HTAnchor_copyUCInfoStage(anchor, UCT_STAGE_PARSER, UCT_STAGE_MIME, + UCT_SETBY_PARSER); + me->inUCLYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_PARSER); + } + if (me->outUCLYhndl < 0) { + int chndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT); + + if (chndl < 0) { + chndl = current_char_set; + HTAnchor_setUCInfoStage(anchor, chndl, + UCT_STAGE_HTEXT, UCT_SETBY_DEFAULT); + } + HTAnchor_setUCInfoStage(anchor, chndl, + UCT_STAGE_HTEXT, UCT_SETBY_DEFAULT); + me->outUCLYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT); + } + me->inUCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_PARSER); + me->outUCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_HTEXT); +} + +/* Write the buffer out to the socket + * ---------------------------------- + */ + +/*_________________________________________________________________________ + * + * A C T I O N R O U T I N E S + */ + +static void HTPlain_write(HTStream *me, const char *s, + int l); + +/* Character handling + * ------------------ + */ +static void HTPlain_put_character(HTStream *me, int c) +{ +#ifdef REMOVE_CR_ONLY + /* + * Throw away \r's. + */ + if (c != '\r') { + HText_appendCharacter(me->text, c); + } +#else + /* + * See HTPlain_write() for explanations of the following code (we've been + * called via HTPlain_put_string() to do for each character of a terminated + * string what HTPlain_write() does via a while loop for each character in + * a stream of given length). - FM + */ + if ((HTPlain_lastraw == '\r') && c == '\n') { + HTPlain_lastraw = -1; + return; + } + if (c == '\b' || c == '_' || HTPlain_bs_pending) { + char temp[1]; + + temp[0] = (char) c; + HTPlain_write(me, temp, 1); + return; + } + HTPlain_lastraw = UCH(c); + if (c == '\r') { + HText_appendCharacter(me->text, '\n'); + } else if (TOASCII(UCH(c)) >= 127) { /* S/390 -- gil -- 0305 */ + char temp[1]; + + temp[0] = (char) c; + /* + * For now, don't repeat everything here that has been done below - KW + */ + HTPlain_write(me, temp, 1); + } else if (IS_CJK_TTY) { + HText_appendCharacter(me->text, c); + } else if (TOASCII(UCH(c)) >= 127 && TOASCII(UCH(c)) < 161 && + HTPassHighCtrlRaw) { + HText_appendCharacter(me->text, c); + } else if (UCH(c) == CH_NBSP) { /* S/390 -- gil -- 0341 */ + HText_appendCharacter(me->text, ' '); + } else if (UCH(c) == CH_SHY) { + return; + } else if ((UCH(c) >= ' ' && TOASCII(UCH(c)) < 127) || + c == '\n' || c == '\t') { + HText_appendCharacter(me->text, c); + } else if (TOASCII(UCH(c)) > 160) { + if (!HTPassEightBitRaw && + !((me->outUCLYhndl == LATIN1) || + (me->outUCI->enc & (UCT_CP_SUPERSETOF_LAT1)))) { + int len, high, low, i, diff = 1; + const char *name; + UCode_t value = (UCode_t) FROMASCII((TOASCII(UCH(c)) - 160)); + + name = HTMLGetEntityName(value); + len = (int) strlen(name); + for (low = 0, high = (int) HTML_dtd.number_of_entities; + high > low; + diff < 0 ? (low = i + 1) : (high = i)) { + /* Binary search */ + i = (low + (high - low) / 2); + diff = AS_ncmp(HTML_dtd.entity_names[i], name, (unsigned) len); + if (diff == 0) { + HText_appendText(me->text, + LYCharSets[me->outUCLYhndl][i]); + break; + } + } + if (diff) { + HText_appendCharacter(me->text, c); + } + } else { + HText_appendCharacter(me->text, c); + } + } +#endif /* REMOVE_CR_ONLY */ +} + +/* String handling + * --------------- + * + */ +static void HTPlain_put_string(HTStream *me, const char *s) +{ +#ifdef REMOVE_CR_ONLY + HText_appendText(me->text, s); +#else + const char *p; + + if (s == NULL) + return; + for (p = s; *p; p++) { + HTPlain_put_character(me, *p); + } +#endif /* REMOVE_CR_ONLY */ +} + +/* + * Entry function for displayed text/plain and WWW_SOURCE strings. - FM + * --------------------------------------------------------------- + */ +static void HTPlain_write(HTStream *me, const char *s, int l) +{ + const char *p; + const char *e = s + l; + char c; + unsigned c_unsign; + BOOL chk; + UCode_t code, uck = -1; + char saved_char_in = '\0'; + + for (p = s; p < e; p++) { +#ifdef REMOVE_CR_ONLY + /* + * Append the whole string, but remove any \r's. - FM + */ + if (*p != '\r') { + HText_appendCharacter(me->text, *p); + } +#else + if (*p == '\b') { + if (HTPlain_lastraw >= UCH(' ') && + HTPlain_lastraw != '\r' && HTPlain_lastraw != '\n') { + if (!HTPlain_bs_pending) { + HTPlain_bs_pending = 1; + continue; + } else if (HTPlain_bs_pending == 2) { + HTPlain_bs_pending = 3; + continue; + } + } + if (HTPlain_bs_pending >= 2) + HText_appendCharacter(me->text, '_'); + HTPlain_bs_pending = 0; + } else if (*p == '_') { + if (!HTPlain_bs_pending) { + HTPlain_bs_pending = 2; + HTPlain_lastraw = UCH(*p); + continue; +#if 0 + } else if (HTPlain_bs_pending != 2) { + HTPlain_bs_pending--; /* 1 -> 0, 3 -> 2 */ + HTPlain_lastraw = UCH(*p); + continue; +#endif + } + } + + /* + * Try to handle lone LFs, CRLFs and lone CRs as newline, and to deal + * with control, ASCII, and 8-bit characters based on best guesses of + * what's appropriate. - FM + */ + if ((HTPlain_lastraw == '\r') && *p == '\n') { + HTPlain_lastraw = -1; + continue; + } + + if (HTPlain_bs_pending && + !(UCH(*p) >= ' ' && *p != '\r' && *p != '\n' && + (HTPlain_lastraw == UCH(*p) || + HTPlain_lastraw == UCH('_') || + *p == '_'))) { + if (HTPlain_bs_pending >= 2) + HText_appendCharacter(me->text, '_'); + HTPlain_bs_pending = 0; + } else if (HTPlain_bs_pending == 1) { + HTPlain_bs_pending = 0; + continue; /* ignore last two of "X\bX" or "X\b_" - kw */ + } else if (HTPlain_bs_pending == 3) { + if (*p == '_') { + HTPlain_bs_pending = 2; + continue; /* ignore last two of "_\b_" - kw */ + } else { + HTPlain_bs_pending = 0; + /* ignore first two of "_\bX" - kw */ + } + } else if (HTPlain_bs_pending == 2) { + HText_appendCharacter(me->text, '_'); + if (*p == '_') + continue; /* keep second of "__" pending - kw */ + HTPlain_bs_pending = 0; + } else { + HTPlain_bs_pending = 0; + } + HTPlain_lastraw = UCH(*p); + if (*p == '\r') { + HText_appendCharacter(me->text, '\n'); + continue; + } + /* + * Make sure the character is handled as Unicode whenever that's + * appropriate. - FM + */ + c = *p; + c_unsign = UCH(c); + code = (UCode_t) c_unsign; + saved_char_in = '\0'; + /* + * Combine any UTF-8 multibytes into Unicode to check for special + * characters. - FM + */ + if (me->T.decode_utf8) { + /* + * Combine UTF-8 into Unicode. Incomplete characters silently + * ignored. from Linux kernel's console.c - KW + */ + if (TOASCII(c_unsign) > 127) { /* S/390 -- gil -- 0371 */ + /* + * We have an octet from a multibyte character. - FM + */ + if (me->utf_count > 0 && (c & 0xc0) == 0x80) { + /* + * Adjust the UCode_t value, add the octet to the buffer, + * and decrement the byte count. - FM + */ + me->utf_char = (me->utf_char << 6) | (c & 0x3f); + me->utf_count--; + *(me->utf_buf_p) = c; + (me->utf_buf_p)++; + if (me->utf_count == 0) { + /* + * Got a complete multibyte character. + */ + *(me->utf_buf_p) = '\0'; + code = me->utf_char; + if (code > 0 && code < 256) { + c = FROMASCII((char) code); + c_unsign = UCH(c); + } + } else { + /* + * Get the next byte. - FM + */ + continue; + } + } else { + /* + * Start handling a new multibyte character. - FM + */ + me->utf_buf_p[0] = c; + me->utf_buf_p = &me->utf_buf[1]; + if ((*p & 0xe0) == 0xc0) { + me->utf_count = 1; + me->utf_char = (c & 0x1f); + } else if ((*p & 0xf0) == 0xe0) { + me->utf_count = 2; + me->utf_char = (c & 0x0f); + } else if ((*p & 0xf8) == 0xf0) { + me->utf_count = 3; + me->utf_char = (c & 0x07); + } else if ((*p & 0xfc) == 0xf8) { + me->utf_count = 4; + me->utf_char = (c & 0x03); + } else if ((*p & 0xfe) == 0xfc) { + me->utf_count = 5; + me->utf_char = (c & 0x01); + } else { + /* + * We got garbage, so ignore it. - FM + */ + me->utf_count = 0; + me->utf_buf_p[0] = '\0'; + me->utf_buf_p = me->utf_buf; + } + /* + * Get the next byte. - FM + */ + continue; + } + } else if (me->utf_count > 0) { + /* + * Got an ASCII character when expecting UTF-8 multibytes, so + * ignore the buffered multibye characters and fall through + * with the current ASCII character. - FM + */ + me->utf_count = 0; + me->utf_buf[0] = '\0'; + me->utf_buf_p = me->utf_buf; + code = (UCode_t) c_unsign; + } else { + /* + * Got a valid ASCII character, so fall through with it. - FM + */ + code = (UCode_t) c_unsign; + } + } + /* + * Convert characters from non-UTF-8 charsets to Unicode (if + * appropriate). - FM + */ + if (!(me->T.decode_utf8 && + UCH(*p) > 127)) { +#ifdef NOTDEFINED + if (me->T.strip_raw_char_in) + saved_char_in = c; +#endif /* NOTDEFINED */ + if (me->T.trans_to_uni && + (TOASCII(code) >= LYlowest_eightbit[me->inUCLYhndl] || /* S/390 -- gil -- 0389 */ + (code < ' ' && code != 0 && + me->T.trans_C0_to_uni))) { + /* + * Convert the octet to Unicode. - FM + */ + code = (UCode_t) UCTransToUni(c, me->inUCLYhndl); + if (code > 0) { + saved_char_in = c; + if (code < 256) { + c = FROMASCII((char) code); + c_unsign = UCH(c); + } + } + } else if (code < 32 && code != 0 && + me->T.trans_C0_to_uni) { + /* + * Quote from SGML.c: + * "This else if may be too ugly to keep. - KW" + */ + if (me->T.trans_from_uni && + (((code = UCTransToUni(c, me->inUCLYhndl)) >= 32) || + (me->T.transp && + (code = UCTransToUni(c, me->inUCLYhndl)) > 0))) { + saved_char_in = c; + if (code < 256) { + c = FROMASCII((char) code); + c_unsign = UCH(c); + } + } else { + uck = -1; + if (me->T.transp) { + uck = UCTransCharStr(replace_buf, 60, c, + me->inUCLYhndl, + me->inUCLYhndl, NO); + } + if (!me->T.transp || uck < 0) { + uck = UCTransCharStr(replace_buf, 60, c, + me->inUCLYhndl, + me->outUCLYhndl, YES); + } + if (uck == 0) { + continue; + } else if (uck < 0) { + me->utf_buf[0] = '\0'; + } else { + c = replace_buf[0]; + if (c && replace_buf[1]) { + HText_appendText(me->text, replace_buf); + continue; + } + } + me->utf_buf[0] = '\0'; + code = UCH(c); + } /* Next line end of ugly stuff for C0. - KW */ + } else { + me->utf_buf[0] = '\0'; + code = UCH(c); + } + } + /* + * At this point we have either code in Unicode (and c in latin1 if + * code is in the latin1 range), or code and c will have to be passed + * raw. + */ + + /* + * If CJK mode is on, we'll assume the document matches the user's + * display character set, and if not, the user should toggle off + * raw/CJK mode to reload. - FM + */ + if (IS_CJK_TTY) { + HText_appendCharacter(me->text, c); + +#define PASSHICTRL (me->T.transp || \ + code >= LYlowest_eightbit[me->inUCLYhndl]) +#define PASS8859SPECL me->T.pass_160_173_raw +#define PASSHI8BIT (HTPassEightBitRaw || \ + (me->T.do_8bitraw && !me->T.trans_from_uni)) + /* + * If HTPassHighCtrlRaw is set (e.g., for KOI8-R) assume the + * document matches and pass 127-160 8-bit characters. If it + * doesn't match, the user should toggle raw/CJK mode off. - FM + */ + } else if (TOASCII(code) >= 127 && TOASCII(code) < 161 && /* S/390 -- gil -- 0427 */ + PASSHICTRL && PASS8859SPECL) { + HText_appendCharacter(me->text, c); + } else if (code == CH_SHY && PASS8859SPECL) { + HText_appendCharacter(me->text, c); + /* + * If neither HTPassHighCtrlRaw nor CJK is set, play it safe and + * treat 160 (nbsp) as an ASCII space (32). - FM + */ + } else if (code == CH_NBSP) { + HText_appendCharacter(me->text, ' '); + /* + * If neither HTPassHighCtrlRaw nor CJK is set, play it safe and + * ignore 173 (shy). - FM + * Now only ignore it for color style, which doesn't handle it + * anyway. Otherwise pass it on as LY_SOFT_HYPHEN and let HText + * deal with it. It should be either ignored, or displayed as a + * hyphen if it was indeed at the end of a line. Well it should. + * - kw + */ + } else if (code == CH_SHY) { +#ifndef USE_COLOR_STYLE + HText_appendCharacter(me->text, LY_SOFT_HYPHEN); +#endif + continue; + /* + * If we get to here, pass the displayable ASCII characters. - FM + */ + } else if ((code >= ' ' && TOASCII(code) < 127) || + (PASSHI8BIT && + c >= LYlowest_eightbit[me->outUCLYhndl]) || + *p == '\n' || *p == '\t') { + HText_appendCharacter(me->text, c); + /* + * Use an ASCII space (32) for ensp, emsp or thinsp. - FM + */ + } else if (code == 8194 || code == 8195 || code == 8201) { + HText_appendCharacter(me->text, ' '); + /* + * If we want the raw character, pass it now. - FM + */ + } else if (me->T.use_raw_char_in && saved_char_in) { + HText_appendCharacter(me->text, saved_char_in); +/****************************************************************** + * I. LATIN-1 OR UCS2 TO DISPLAY CHARSET + ******************************************************************/ + } else if ((chk = (BOOL) (me->T.trans_from_uni && code >= 160)) && + (uck = UCTransUniChar(code, + me->outUCLYhndl)) >= ' ' && /* S/390 -- gil -- 0464 */ + uck < 256) { + CTRACE((tfp, "UCTransUniChar returned 0x%.2" PRI_UCode_t + ":'%c'.\n", + uck, FROMASCII(UCH(uck)))); + HText_appendCharacter(me->text, ((char) (uck & 0xff))); + } else if (chk && + (uck == -4 || + (me->T.repl_translated_C0 && uck > 0 && uck < ' ')) && /* S/390 -- gil -- 0481 */ + /* + * Not found; look for replacement string. + */ + (uck = UCTransUniCharStr(replace_buf, 60, code, + me->outUCLYhndl, 0) >= 0)) { + /* + * No further tests for valididy - assume that whoever defined + * replacement strings knew what she was doing. + */ + HText_appendText(me->text, replace_buf); + /* + * If we get to here, and should have translated, translation has + * failed so far. + */ + } else if (chk && TOASCII(code) > 127 && me->T.output_utf8) { /* S/390 -- gil -- 0498 */ + /* + * We want UTF-8 output, so do it now. - FM + */ + if (*me->utf_buf) { + HText_appendText(me->text, me->utf_buf); + me->utf_buf[0] = '\0'; + me->utf_buf_p = me->utf_buf; + } else if (UCConvertUniToUtf8(code, replace_buf)) { + HText_appendText(me->text, replace_buf); + } else { + /* + * Out of luck, so use the UHHH notation (ugh). - gil + */ + /* S/390 -- gil -- 0517 */ + sprintf(replace_buf, "U%.2lX", (unsigned long) TOASCII(code)); + HText_appendText(me->text, replace_buf); + } +#ifdef NOTDEFINED + } else if (me->T.strip_raw_char_in && + UCH(*p) >= 192 && + UCH(*p) < 255) { + /* + * KOI special: strip high bit, gives (somewhat) readable ASCII. + */ + HText_appendCharacter(me->text, (char) (*p & 0x7f)); +#endif /* NOTDEFINED */ + /* + * If we don't actually want the character, make it safe and output + * that now. - FM + */ + } else if ((c_unsign > 0 && + (int) c_unsign < LYlowest_eightbit[me->outUCLYhndl]) || + (me->T.trans_from_uni && !HTPassEightBitRaw)) { + /* + * If we do not have the "7-bit approximations" as our output + * character set (in which case we did it already) seek a + * translation for that. Otherwise, or if the translation fails, + * use UHHH notation. - FM + */ + if ((chk = (BOOL) (me->outUCLYhndl != + UCGetLYhndl_byMIME("us-ascii"))) && + (uck = UCTransUniChar(code, + UCGetLYhndl_byMIME("us-ascii"))) + >= ' ' && TOASCII(uck) < 127) { /* S/390 -- gil -- 0535 */ + /* + * Got an ASCII character (yippey). - FM + */ + c = FROMASCII((char) uck); + HText_appendCharacter(me->text, c); + } else if ((chk && uck == -4) && + (uck = UCTransUniCharStr(replace_buf, + 60, code, + UCGetLYhndl_byMIME("us-ascii"), + 0) >= 0)) { + /* + * Got a repacement string (yippey). - FM + */ + HText_appendText(me->text, replace_buf); + } else if (code == 8204 || code == 8205) { + /* + * Ignore 8204 (zwnj) or 8205 (zwj), if we get to here. - FM + */ + CTRACE((tfp, "HTPlain_write: Ignoring '%" PRI_UCode_t "'.\n", code)); + } else if (code == 8206 || code == 8207) { + /* + * Ignore 8206 (lrm) or 8207 (rlm), if we get to here. - FM + */ + CTRACE((tfp, "HTPlain_write: Ignoring '%" PRI_UCode_t "'.\n", code)); + } else { + /* + * Out of luck, so use the UHHH notation (ugh). - FM + */ + /* do not print UHHH for now + sprintf(replace_buf, "U%.2lX", code); + HText_appendText(me->text, replace_buf); + */ + } + /* + * If we get to here and have a monobyte character, pass it. - FM + */ + } else if (c_unsign != 0 && c_unsign < 256) { + HText_appendCharacter(me->text, c); + } +#endif /* REMOVE_CR_ONLY */ + } +} + +/* Free an HTML object + * ------------------- + * + * Note that the SGML parsing context is freed, but the created object is + * not, as it takes on an existence of its own unless explicitly freed. + */ +static void HTPlain_free(HTStream *me) +{ + if (HTPlain_bs_pending >= 2) + HText_appendCharacter(me->text, '_'); + FREE(me); +} + +/* End writing +*/ +static void HTPlain_abort(HTStream *me, HTError e GCC_UNUSED) +{ + HTPlain_free(me); +} + +/* Structured Object Class + * ----------------------- + */ +static const HTStreamClass HTPlain = +{ + "PlainPresenter", + HTPlain_free, + HTPlain_abort, + HTPlain_put_character, HTPlain_put_string, HTPlain_write, +}; + +/* New object + * ---------- + */ +HTStream *HTPlainPresent(HTPresentation *pres GCC_UNUSED, HTParentAnchor *anchor, + HTStream *sink GCC_UNUSED) +{ + + HTStream *me = (HTStream *) malloc(sizeof(*me)); + + if (me == NULL) + outofmem(__FILE__, "HTPlain_new"); + + assert(me != NULL); + + me->isa = &HTPlain; + + HTPlain_lastraw = -1; + + me->utf_count = 0; + me->utf_char = 0; + me->utf_buf[0] = me->utf_buf[6] = me->utf_buf[7] = '\0'; + me->utf_buf_p = me->utf_buf; + me->outUCLYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT); + me->inUCLYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_PARSER); + HTPlain_getChartransInfo(me, anchor); + UCSetTransParams(&me->T, + me->inUCLYhndl, me->inUCI, + me->outUCLYhndl, + HTAnchor_getUCInfoStage(anchor, UCT_STAGE_HTEXT)); + + me->text = HText_new(anchor); + HText_setStyle(me->text, LYstyles(HTML_XMP)); + HText_beginAppend(me->text); + + return (HTStream *) me; +} diff --git a/WWW/Library/Implementation/HTPlain.h b/WWW/Library/Implementation/HTPlain.h new file mode 100644 index 00000000..24fd6691 --- /dev/null +++ b/WWW/Library/Implementation/HTPlain.h @@ -0,0 +1,21 @@ +/* /Net/dxcern/userd/timbl/hypertext/WWW/Library/Implementation/HTPlain.html + PLAIN TEXT OBJECT + + */ +#ifndef HTPLAIN_H +#define HTPLAIN_H + +#include <HTStream.h> +#include <HTAnchor.h> + +#ifdef __cplusplus +extern "C" { +#endif + extern HTStream *HTPlainPresent(HTPresentation *pres, + HTParentAnchor *anchor, + HTStream *sink); + +#ifdef __cplusplus +} +#endif +#endif /* HTPLAIN_H */ diff --git a/WWW/Library/Implementation/HTRules.c b/WWW/Library/Implementation/HTRules.c new file mode 100644 index 00000000..35b36719 --- /dev/null +++ b/WWW/Library/Implementation/HTRules.c @@ -0,0 +1,705 @@ +/* + * $LynxId: HTRules.c,v 1.42 2010/06/17 21:33:35 tom Exp $ + * + * Configuration manager for Hypertext Daemon HTRules.c + * ========================================== + * + * + * History: + * 3 Jun 91 Written TBL + * 10 Aug 91 Authorisation added after Daniel Martin (pass, fail) + * Rule order in file changed + * Comments allowed with # on 1st char of rule line + * 17 Jun 92 Bug fix: pass and fail failed if didn't contain '*' TBL + * 1 Sep 93 Bug fix: no memory check - Nathan Torkington + * BYTE_ADDRESSING removed - Arthur Secret + * 11 Sep 93 MD Changed %i into %d in debug printf. + * VMS does not recognize %i. + * Bug Fix: in case of PASS, only one parameter to printf. + * 19 Sep 93 AL Added Access Authorization stuff. + * 1 Nov 93 AL Added htbin. + * 25 May 99 KW Added redirect for lynx. + * + */ + +#include <HTUtils.h> + +/* (c) CERN WorldWideWeb project 1990,91. See Copyright.html for details */ +#include <HTRules.h> + +#include <HTFile.h> +#include <LYLeaks.h> +#include <HTAAProt.h> + +#define LINE_LENGTH 256 + +typedef struct _rule { + struct _rule *next; + HTRuleOp op; + char *pattern; + char *equiv; + char *condition_op; /* as strings - may be inefficient, */ + char *condition; /* but this is not for a server - kw */ +} rule; + +#ifndef NO_RULES + +#include <HTTP.h> /* for redirecting_url, indirectly HTPermitRedir - kw */ +#include <LYGlobalDefs.h> /* for LYUserSpecifiedURL - kw */ +#include <LYStrings.h> /* for LYscanFloat */ +#include <LYUtils.h> /* for LYFixCursesOn - kw */ +#include <HTAlert.h> + +/* Global variables + * ---------------- + */ +char *HTBinDir = NULL; /* Physical /htbin directory path. */ + + /* In future this should not be global. */ +char *HTSearchScript = NULL; /* Search script name. */ + +/* Module-wide variables + * --------------------- + */ + +static rule *rules = 0; /* Pointer to first on list */ + +#ifndef PUT_ON_HEAD +static rule *rule_tail = 0; /* Pointer to last on list */ +#endif + +/* Add rule to the list HTAddRule() + * -------------------- + * + * On entry, + * pattern points to 0-terminated string containing a single "*" + * equiv points to the equivalent string with * for the + * place where the text matched by * goes. + * On exit, + * returns 0 if success, -1 if error. + */ + +int HTAddRule(HTRuleOp op, const char *pattern, + const char *equiv, + const char *cond_op, + const char *cond) +{ /* BYTE_ADDRESSING removed and memory check - AS - 1 Sep 93 */ + rule *temp; + char *pPattern = NULL; + + temp = typecalloc(rule); + if (temp == NULL) + outofmem(__FILE__, "HTAddRule"); + + assert(temp != NULL); + + if (equiv) { /* Two operands */ + char *pEquiv = NULL; + + StrAllocCopy(pEquiv, equiv); + temp->equiv = pEquiv; + } else { + temp->equiv = 0; + } + if (cond_op) { + StrAllocCopy(temp->condition_op, cond_op); + StrAllocCopy(temp->condition, cond); + } + StrAllocCopy(pPattern, pattern); + temp->pattern = pPattern; + temp->op = op; + + if (equiv) { + CTRACE((tfp, "Rule: For `%s' op %d `%s'", pattern, (int) op, equiv)); + } else { + CTRACE((tfp, "Rule: For `%s' op %d", pattern, (int) op)); + } + if (cond_op) { + CTRACE((tfp, "\t%s %s\n", cond_op, NONNULL(cond))); + } else { + CTRACE((tfp, "\n")); + } + + if (!rules) { +#ifdef LY_FIND_LEAKS + atexit(HTClearRules); +#endif + } +#ifdef PUT_ON_HEAD + temp->next = rules; + rules = temp; +#else + temp->next = 0; + if (rule_tail) + rule_tail->next = temp; + else + rules = temp; + rule_tail = temp; +#endif + + return 0; +} + +/* Clear all rules HTClearRules() + * --------------- + * + * On exit, + * There are no rules + * + * See also + * HTAddRule() + */ +void HTClearRules(void) +{ + while (rules) { + rule *temp = rules; + + rules = temp->next; + FREE(temp->pattern); + FREE(temp->equiv); + FREE(temp->condition_op); + FREE(temp->condition); + FREE(temp); + } +#ifndef PUT_ON_HEAD + rule_tail = 0; +#endif +} + +static BOOL rule_cond_ok(rule * r) +{ + BOOL result; + + if (!r->condition_op) + return YES; + if (strcmp(r->condition_op, "if") && strcmp(r->condition_op, "unless")) { + CTRACE((tfp, "....... rule ignored, unrecognized `%s'!\n", + r->condition_op)); + return NO; + } + if (!strcmp(r->condition, "redirected")) + result = (BOOL) (redirection_attempts > 0); + else if (!strcmp(r->condition, "userspec")) + result = LYUserSpecifiedURL; + else { + CTRACE((tfp, "....... rule ignored, unrecognized `%s %s'!\n", + r->condition_op, NONNULL(r->condition))); + return NO; + } + if (!strcmp(r->condition_op, "if")) + return result; + else + return (BOOL) (!result); + +} + +/* Translate by rules HTTranslate() + * ------------------ + * + * The most recently defined rules are applied first. + * + * On entry, + * required points to a string whose equivalent value is needed + * On exit, + * returns the address of the equivalent string allocated from + * the heap which the CALLER MUST FREE. If no translation + * occurred, then it is a copy of the original. + * NEW FEATURES: + * When a "protect" or "defprot" rule is matched, + * a call to HTAA_setCurrentProtection() or + * HTAA_setDefaultProtection() is made to notify + * the Access Authorization module that the file is + * protected, and so it knows how to handle it. + * -- AL + */ +char *HTTranslate(const char *required) +{ + rule *r; + char *current = NULL; + char *msgtmp = NULL; + const char *pMsg; + int proxy_none_flag = 0; + int permitredir_flag = 0; + + StrAllocCopy(current, required); + + HTAA_clearProtections(); /* Reset from previous call -- AL */ + + for (r = rules; r; r = r->next) { + char *p = r->pattern; + int m = 0; /* Number of characters matched against wildcard */ + const char *q = current; + + for (; *p && *q; p++, q++) { /* Find first mismatch */ + if (*p != *q) + break; + } + + if (*p == '*') { /* Match up to wildcard */ + m = (int) strlen(q) - (int) strlen(p + 1); /* Amount to match to wildcard */ + if (m < 0) + continue; /* tail is too short to match */ + if (0 != strcmp(q + m, p + 1)) + continue; /* Tail mismatch */ + } else + /* Not wildcard */ if (*p != *q) + continue; /* plain mismatch: go to next rule */ + + if (!rule_cond_ok(r)) /* check condition, next rule if false - kw */ + continue; + + switch (r->op) { /* Perform operation */ + + case HT_DefProt: + case HT_Protect: +#ifdef ACCESS_AUTH + { + char *local_copy = NULL; + char *p2; + char *eff_ids = NULL; + char *prot_file = NULL; + + CTRACE((tfp, "HTRule: `%s' matched %s %s: `%s'\n", + current, + (r->op == HT_Protect ? "Protect" : "DefProt"), + "rule, setup", + (r->equiv ? r->equiv : + (r->op == HT_Protect ? "DEFAULT" : "NULL!!")))); + + if (r->equiv) { + StrAllocCopy(local_copy, r->equiv); + p2 = local_copy; + prot_file = HTNextField(&p2); + eff_ids = HTNextField(&p2); + } + + if (r->op == HT_Protect) + HTAA_setCurrentProtection(current, prot_file, eff_ids); + else + HTAA_setDefaultProtection(current, prot_file, eff_ids); + + FREE(local_copy); + + /* continue translating rules */ + } +#endif /* ACCESS_AUTH */ + break; + + case HT_UserMsg: /* Produce message immediately */ + LYFixCursesOn("show rule message:"); + HTUserMsg2((r->equiv ? r->equiv : "Rule: %s"), current); + break; + case HT_InfoMsg: /* Produce messages immediately */ + case HT_Progress: + case HT_Alert: + LYFixCursesOn("show rule message:"); /* and fall through */ + case HT_AlwaysAlert: + pMsg = r->equiv ? r->equiv : + (r->op == HT_AlwaysAlert) ? "%s" : "Rule: %s"; + if (strchr(pMsg, '%')) { + HTSprintf0(&msgtmp, pMsg, current); + pMsg = msgtmp; + } + switch (r->op) { /* Actually produce message */ + case HT_InfoMsg: + HTInfoMsg(pMsg); + break; + case HT_Progress: + HTProgress(pMsg); + break; + case HT_Alert: + HTAlert(pMsg); + break; + case HT_AlwaysAlert: + HTAlwaysAlert("Rule alert:", pMsg); + break; + default: + break; + } + FREE(msgtmp); + break; + + case HT_PermitRedir: /* Set special flag */ + permitredir_flag = 1; + CTRACE((tfp, "HTRule: Mark for redirection permitted\n")); + break; + + case HT_Pass: /* Authorised */ + if (!r->equiv) { + if (proxy_none_flag) { + char *temp = NULL; + + StrAllocCopy(temp, "NoProxy="); + StrAllocCat(temp, current); + FREE(current); + current = temp; + } + CTRACE((tfp, "HTRule: Pass `%s'\n", current)); + return current; + } + /* Else fall through ...to map and pass */ + + case HT_Map: + case HT_Redirect: + case HT_RedirectPerm: + if (*p == *q) { /* End of both strings, no wildcard */ + CTRACE((tfp, "For `%s' using `%s'\n", current, r->equiv)); + StrAllocCopy(current, r->equiv); /* use entire translation */ + } else { + char *ins = strchr(r->equiv, '*'); /* Insertion point */ + + if (ins) { /* Consistent rule!!! */ + char *temp = NULL; + + HTSprintf0(&temp, "%.*s%.*s%s", + (int) (ins - r->equiv), + r->equiv, + m, + q, + ins + 1); + CTRACE((tfp, "For `%s' using `%s'\n", + current, temp)); + FREE(current); + current = temp; /* Use this */ + + } else { /* No insertion point */ + char *temp = NULL; + + StrAllocCopy(temp, r->equiv); + CTRACE((tfp, "For `%s' using `%s'\n", + current, temp)); + FREE(current); + current = temp; /* Use this */ + } /* If no insertion point exists */ + } + if (r->op == HT_Pass) { + if (proxy_none_flag) { + char *temp = NULL; + + StrAllocCopy(temp, "NoProxy="); + StrAllocCat(temp, current); + FREE(current); + current = temp; + } + CTRACE((tfp, "HTRule: ...and pass `%s'\n", + current)); + return current; + } else if (r->op == HT_Redirect) { + CTRACE((tfp, "HTRule: ...and redirect to `%s'\n", + current)); + redirecting_url = current; + HTPermitRedir = (BOOL) (permitredir_flag == 1); + return (char *) 0; + } else if (r->op == HT_RedirectPerm) { + CTRACE((tfp, "HTRule: ...and redirect like 301 to `%s'\n", + current)); + redirecting_url = current; + permanent_redirection = TRUE; + HTPermitRedir = (BOOL) (permitredir_flag == 1); + return (char *) 0; + } + break; + + case HT_UseProxy: + if (r->equiv && 0 == strcasecomp(r->equiv, "none")) { + CTRACE((tfp, "For `%s' will not use proxy\n", current)); + proxy_none_flag = 1; + } else if (proxy_none_flag) { + CTRACE((tfp, "For `%s' proxy server ignored: %s\n", + current, + NONNULL(r->equiv))); + } else { + char *temp = NULL; + + StrAllocCopy(temp, "Proxied="); + StrAllocCat(temp, r->equiv); + StrAllocCat(temp, current); + CTRACE((tfp, "HTRule: proxy server found: %s\n", + NONNULL(r->equiv))); + FREE(current); + return temp; + } + break; + + case HT_Invalid: + case HT_Fail: /* Unauthorised */ + CTRACE((tfp, "HTRule: *** FAIL `%s'\n", current)); + FREE(current); + return (char *) 0; + } /* if tail matches ... switch operation */ + + } /* loop over rules */ + + if (proxy_none_flag) { + char *temp = NULL; + + StrAllocCopy(temp, "NoProxy="); + StrAllocCat(temp, current); + FREE(current); + return temp; + } + + return current; +} + +/* Load one line of configuration + * ------------------------------ + * + * Call this, for example, to load a X resource with config info. + * + * returns 0 OK, < 0 syntax error. + */ +int HTSetConfiguration(char *config) +{ + HTRuleOp op; + char *line = NULL; + char *pointer = line; + char *word1; + const char *word2; + const char *word3; + const char *cond_op = NULL; + const char *cond = NULL; + float quality, secs, secs_per_byte; + long maxbytes; + int status; + + StrAllocCopy(line, config); + { + char *p = line; + + /* Chop off comments */ + while ((p = strchr(p, '#'))) { + if (p == line || isspace(UCH(*(p - 1)))) { + *p = 0; + break; + } else { + p++; + } + } + } + pointer = line; + word1 = HTNextField(&pointer); + if (!word1) { + FREE(line); + return 0; + }; /* Comment only or blank */ + + word2 = HTNextField(&pointer); + + if (0 == strcasecomp(word1, "defprot") || + 0 == strcasecomp(word1, "protect")) + word3 = pointer; /* The rest of the line to be parsed by AA module */ + else + word3 = HTNextField(&pointer); /* Just the next word */ + + if (!word2) { + fprintf(stderr, "HTRule: %s %s\n", RULE_NEEDS_DATA, line); + FREE(line); + return -2; /*syntax error */ + } + + if (0 == strcasecomp(word1, "suffix")) { + char *encoding = HTNextField(&pointer); + + status = 0; + if (pointer) + status = LYscanFloat(pointer, &quality); + + HTSetSuffix(word2, word3, + encoding ? encoding : "binary", + status >= 1 ? quality : (float) 1.0); + + } else if (0 == strcasecomp(word1, "presentation")) { + status = 0; + if (pointer) { + const char *temp = pointer; + + if (LYscanFloat2(&temp, &quality)) { + status = 1; + if (LYscanFloat2(&temp, &secs)) { + status = 2; + if (LYscanFloat2(&temp, &secs_per_byte)) { + status = 3; + if (sscanf(temp, "%ld", &maxbytes)) { + status = 4; + } + } + } + } + } + + HTSetPresentation(word2, word3, NULL, + status >= 1 ? quality : 1.0, + status >= 2 ? secs : 0.0, + status >= 3 ? secs_per_byte : 0.0, + status >= 4 ? maxbytes : 0, + mediaCFG); + + } else if (0 == strncasecomp(word1, "htbin", 5) || + 0 == strncasecomp(word1, "bindir", 6)) { + StrAllocCopy(HTBinDir, word2); /* Physical /htbin location */ + + } else if (0 == strncasecomp(word1, "search", 6)) { + StrAllocCopy(HTSearchScript, word2); /* Search script name */ + + } else { + op = 0 == strcasecomp(word1, "map") ? HT_Map + : 0 == strcasecomp(word1, "pass") ? HT_Pass + : 0 == strcasecomp(word1, "fail") ? HT_Fail + : 0 == strcasecomp(word1, "redirect") ? HT_Redirect + : 0 == strncasecomp(word1, "redirectperm", 12) ? HT_RedirectPerm + : 0 == strcasecomp(word1, "redirecttemp") ? HT_Redirect + : 0 == strcasecomp(word1, "permitredirection") ? HT_PermitRedir + : 0 == strcasecomp(word1, "useproxy") ? HT_UseProxy + : 0 == strcasecomp(word1, "alert") ? HT_Alert + : 0 == strcasecomp(word1, "alwaysalert") ? HT_AlwaysAlert + : 0 == strcasecomp(word1, "progress") ? HT_Progress + : 0 == strcasecomp(word1, "usermsg") ? HT_UserMsg + : 0 == strcasecomp(word1, "infomsg") ? HT_InfoMsg + : 0 == strcasecomp(word1, "defprot") ? HT_DefProt + : 0 == strcasecomp(word1, "protect") ? HT_Protect + : HT_Invalid; + if (op == HT_Invalid) { + fprintf(stderr, "HTRule: %s '%s'\n", RULE_INCORRECT, config); + } else { + switch (op) { + case HT_Fail: /* never a or other 2nd parameter */ + case HT_PermitRedir: + cond_op = word3; + if (cond_op && *cond_op) { + word3 = NULL; + cond = HTNextField(&pointer); + } + break; + + case HT_Pass: /* possibly a URL2 */ + if (word3 && (!strcasecomp(word3, "if") || + !strcasecomp(word3, "unless"))) { + cond_op = word3; + word3 = NULL; + cond = HTNextField(&pointer); + break; + } + /* else fall through */ + case HT_Map: /* always a URL2 (or other 2nd parameter) */ + case HT_Redirect: + case HT_RedirectPerm: + case HT_UseProxy: + cond_op = HTNextField(&pointer); + /* check for extra status word in "Redirect" */ + if (op == HT_Redirect && 0 == strcasecomp(word1, "redirect") && + cond_op && + strcasecomp(cond_op, "if") && + strcasecomp(cond_op, "unless")) { + if (0 == strcmp(word2, "301") || + 0 == strcasecomp(word2, "permanent")) { + op = HT_RedirectPerm; + } else if (!(0 == strcmp(word2, "302") || + 0 == strcmp(word2, "303") || + 0 == strcasecomp(word2, "temp") || + 0 == strcasecomp(word2, "seeother"))) { + CTRACE((tfp, "Rule: Ignoring `%s' in Redirect\n", word2)); + } + word2 = word3; + word3 = cond_op; /* cond_op isn't condition op after all */ + cond_op = HTNextField(&pointer); + } + if (cond_op && *cond_op) + cond = HTNextField(&pointer); + break; + + case HT_Progress: + case HT_InfoMsg: + case HT_UserMsg: + case HT_Alert: + case HT_AlwaysAlert: + cond_op = HTNextField(&pointer); + if (cond_op && *cond_op) + cond = HTNextField(&pointer); + if (word3) { /* Fix string with too may %s - kw */ + const char *cp = word3; + char *cp1, *cp2; + + while ((cp1 = strchr(cp, '%'))) { + if (cp1[1] == '\0') { + *cp1 = '\0'; + break; + } else if (cp1[1] == '%') { + cp = cp1 + 2; + continue; + } else + while ((cp2 = strchr(cp1 + 2, '%'))) { + if (cp2[1] == '\0') { + *cp2 = '\0'; + break; + } else if (cp2[1] == '%') { + cp1 = cp2; + } else { + *cp2 = '?'; /* replace bad % */ + cp1 = cp2; + } + } + break; + } + } + break; + + default: + break; + } + if (cond_op && cond && *cond && !strcasecomp(cond_op, "unless")) { + cond_op = "unless"; + } else if (cond_op && cond && *cond && + !strcasecomp(cond_op, "if")) { + cond_op = "if"; + } else if (cond_op || cond) { + fprintf(stderr, "HTRule: %s '%s'\n", RULE_INCORRECT, config); + FREE(line); /* syntax error, condition is a mess - kw */ + return -2; /* NB unrecognized cond passes here - kw */ + } + if (cond && !strncasecomp(cond, "redirected", (int) strlen(cond))) { + cond = "redirected"; /* recognized, canonical case - kw */ + } else if (cond && strlen(cond) >= 8 && + !strncasecomp(cond, "userspecified", (int) strlen(cond))) { + cond = "userspec"; /* also allow abbreviation - kw */ + } + HTAddRule(op, word2, word3, cond_op, cond); + } + } + FREE(line); + return 0; +} + +/* Load the rules from a file HTLoadRules() + * -------------------------- + * + * On entry, + * Rules can be in any state + * On exit, + * Any existing rules will have been kept. + * Any new rules will have been loaded. + * Returns 0 if no error, 0 if error! + * + * Bugs: + * The strings may not contain spaces. + */ + +int HTLoadRules(const char *filename) +{ + FILE *fp = fopen(filename, TXT_R); + char line[LINE_LENGTH + 1]; + + if (!fp) { + CTRACE((tfp, "HTRules: Can't open rules file %s\n", filename)); + return -1; /* File open error */ + } + for (;;) { + if (!fgets(line, LINE_LENGTH + 1, fp)) + break; /* EOF or error */ + (void) HTSetConfiguration(line); + } + fclose(fp); + return 0; /* No error or syntax errors ignored */ +} + +#endif /* NO_RULES */ diff --git a/WWW/Library/Implementation/HTRules.h b/WWW/Library/Implementation/HTRules.h new file mode 100644 index 00000000..fa5130e5 --- /dev/null +++ b/WWW/Library/Implementation/HTRules.h @@ -0,0 +1,169 @@ +/* Configuration Manager for libwww + * CONFIGURATION MANAGER + * + * Author Tim Berners-Lee/CERN. Public domain. Please mail changes to + * timbl@info.cern.ch. + * + * The configuration information loaded includes tables (file suffixes, + * presentation methods) in other modules. The most likely routines needed by + * developers will be: + * + * HTSetConfiguration to load configuration information. + * + * HTLoadRules to load a whole file of configuration information + * + * HTTranslate to translate a URL using the rule table. + * + */ +#ifndef HTRULE_H +#define HTRULE_H + +#ifndef HTUTILS_H +#include <HTUtils.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + typedef enum { + HT_Invalid, + HT_Map, + HT_Pass, + HT_Fail, + HT_DefProt, + HT_Protect, + HT_Progress, + HT_InfoMsg, + HT_UserMsg, + HT_Alert, + HT_AlwaysAlert, + HT_Redirect, + HT_RedirectPerm, + HT_PermitRedir, + HT_UseProxy + } HTRuleOp; + +#ifndef NO_RULES + +/* + +Server Side Script Execution + + If a URL starts with /htbin/ it is understood to mean a script execution request on + server. This feature needs to be turned on by setting HTBinDir by the htbin rule. + Index searching is enabled by setting HTSearchScript into the name of script in BinDir + doing the actual search by search rule (BinDir must also be set in this case, of + course). + + */ + + extern char *HTBinDir; /* Physical /htbin location */ + extern char *HTSearchScript; /* Search script name */ + +/* + +HTAddRule: Add rule to the list + + ON ENTRY, + + pattern points to 0-terminated string containing a single "*" + + equiv points to the equivalent string with * for the place where the + text matched by * goes; or to other 2nd parameter + meaning depends on op). + + cond_op, additional condition for applying rule; cond_op should + cond be either NULL (no additional condition), or one of + the strings "if" or "unless"; if cond_op is not NULL, + cond should point to a recognized condition keyword + (as a string) such as "userspec", "redirected". + + ON EXIT, + + returns 0 if success, -1 if error. + + Note that if BYTE_ADDRESSING is set, the three blocks required are allocated and + deallocated as one. This will save time and storage, when malloc's allocation units are + large. + + */ + extern int HTAddRule(HTRuleOp op, const char *pattern, + const char *equiv, + const char *cond_op, + const char *cond); + +/* + +HTClearRules: Clear all rules + + ON EXIT, + + Rule file There are no rules + + */ + + extern void HTClearRules(void); + +/* + +HTTranslate: Translate by rules + + */ + +/* + + ON ENTRY, + + required points to a string whose equivalent value is neeed + + ON EXIT, + + returns the address of the equivalent string allocated from the heap + which the CALLER MUST FREE. If no translation occurred, then it is + a copy of the original. + + */ + extern char *HTTranslate(const char *required); + +/* + +HTSetConfiguration: Load one line of configuration information + + ON ENTRY, + + config is a string in the syntax of a rule file line. + + This routine may be used for loading configuration information from sources other than + the rule file, for example INI files for X resources. + + */ + extern int HTSetConfiguration(char *config); + +/* + +HtLoadRules: Load the rules from a file + + ON ENTRY, + + Rule table Rules can be in any state + + ON EXIT, + + Rule table Any existing rules will have been kept. Any new rules will have + been loaded on top, so as to be tried first. + + Returns 0 if no error. + + */ + + extern int HTLoadRules(const char *filename); + +/* + + */ + +#endif /* NO_RULES */ +#ifdef __cplusplus +} +#endif +#endif /* HTRULE_H */ diff --git a/WWW/Library/Implementation/HTStream.h b/WWW/Library/Implementation/HTStream.h new file mode 100644 index 00000000..a7532666 --- /dev/null +++ b/WWW/Library/Implementation/HTStream.h @@ -0,0 +1,69 @@ +/* + * $LynxId: HTStream.h,v 1.16 2011/06/11 12:08:40 tom Exp $ + * + * The Stream class definition -- libwww + STREAM OBJECT DEFINITION + + A Stream object is something which accepts a stream of text. + + The creation methods will vary on the type of Stream Object. All creation + methods return a pointer to the stream type below. + + As you can see, but the methods used to write to the stream and close it are + pointed to be the object itself. + + */ +#ifndef HTSTREAM_H +#define HTSTREAM_H + +#ifndef HTUTILS_H +#include <HTUtils.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + typedef struct _HTStream HTStream; + +/* + + These are the common methods of all streams. They should be + self-explanatory. + + */ + typedef struct _HTStreamClass { + + const char *name; /* Just for diagnostics */ + + void (*_free) (HTStream *me); + + void (*_abort) (HTStream *me, HTError e); + + void (*put_character) (HTStream *me, int ch); + + void (*put_string) (HTStream *me, const char *str); + + void (*put_block) (HTStream *me, const char *str, int len); + + } HTStreamClass; + +#ifndef HTSTREAM_INTERNAL + struct _HTStream { + HTStreamClass *isa; + }; +#endif +/* + + Generic Error Stream + + The Error stream simply signals an error on all output methods. + This can be used to stop a stream as soon as data arrives, for + example from the network. + + */ + extern HTStream *HTErrorStream(void); + +#ifdef __cplusplus +} +#endif +#endif /* HTSTREAM_H */ diff --git a/WWW/Library/Implementation/HTString.c b/WWW/Library/Implementation/HTString.c new file mode 100644 index 00000000..e22c937e --- /dev/null +++ b/WWW/Library/Implementation/HTString.c @@ -0,0 +1,1403 @@ +/* + * $LynxId: HTString.c,v 1.69 2012/02/09 22:02:21 tom Exp $ + * + * Case-independent string comparison HTString.c + * + * Original version came with listserv implementation. + * Version TBL Oct 91 replaces one which modified the strings. + * 02-Dec-91 (JFG) Added stralloccopy and stralloccat + * 23 Jan 92 (TBL) Changed strallocc* to 8 char HTSAC* for VM and suchlike + * 6 Oct 92 (TBL) Moved WWW_TraceFlag in here to be in library + * 15 Nov 98 (TD) Added HTSprintf. + */ + +#include <HTUtils.h> + +#include <LYLeaks.h> +#include <LYUtils.h> +#include <LYStrings.h> + +#ifdef USE_IGNORE_RC +int ignore_unused; +#endif + +#ifndef NO_LYNX_TRACE +BOOLEAN WWW_TraceFlag = 0; /* Global trace flag for ALL W3 code */ +int WWW_TraceMask = 0; /* Global trace flag for ALL W3 code */ +#endif + +#ifdef _WINDOWS +#undef VC +#define VC "2.14FM" +#endif + +#ifndef VC +#define VC "2.14" +#endif /* !VC */ + +const char *HTLibraryVersion = VC; /* String for help screen etc */ + +/* + * strcasecomp8 is a variant of strcasecomp (below) + * ------------ ----------- + * but uses 8bit upper/lower case information + * from the current display charset. + * It returns 0 if exact match. + */ +int strcasecomp8(const char *a, + const char *b) +{ + const char *p = a; + const char *q = b; + + for (; *p && *q; p++, q++) { + int diff = UPPER8(*p, *q); + + if (diff) + return diff; + } + if (*p) + return 1; /* p was longer than q */ + if (*q) + return -1; /* p was shorter than q */ + return 0; /* Exact match */ +} + +/* + * strncasecomp8 is a variant of strncasecomp (below) + * ------------- ------------ + * but uses 8bit upper/lower case information + * from the current display charset. + * It returns 0 if exact match. + */ +int strncasecomp8(const char *a, + const char *b, + int n) +{ + const char *p = a; + const char *q = b; + + for (;; p++, q++) { + int diff; + + if (p == (a + n)) + return 0; /* Match up to n characters */ + if (!(*p && *q)) + return (*p - *q); + diff = UPPER8(*p, *q); + if (diff) + return diff; + } + /*NOTREACHED */ +} + +#ifndef VM /* VM has these already it seems */ +/* Strings of any length + * --------------------- + */ +int strcasecomp(const char *a, + const char *b) +{ + const char *p = a; + const char *q = b; + + for (; *p && *q; p++, q++) { + int diff = TOLOWER(*p) - TOLOWER(*q); + + if (diff) + return diff; + } + if (*p) + return 1; /* p was longer than q */ + if (*q) + return -1; /* p was shorter than q */ + return 0; /* Exact match */ +} + +/* With count limit + * ---------------- + */ +int strncasecomp(const char *a, + const char *b, + int n) +{ + const char *p = a; + const char *q = b; + + for (;; p++, q++) { + int diff; + + if (p == (a + n)) + return 0; /* Match up to n characters */ + if (!(*p && *q)) + return (*p - *q); + diff = TOLOWER(*p) - TOLOWER(*q); + if (diff) + return diff; + } + /*NOTREACHED */ +} +#endif /* VM */ + +#define end_component(p) (*(p) == '.' || *(p) == '\0') + +#ifdef DEBUG_ASTERISK +#define SHOW_ASTERISK CTRACE +#else +#define SHOW_ASTERISK(p) /* nothing */ +#endif + +#define SHOW_ASTERISK_NUM(a,b,c) \ + SHOW_ASTERISK((tfp, "test @%d, '%s' vs '%s' (%d)\n", __LINE__, a,b,c)) + +#define SHOW_ASTERISK_TXT(a,b,c) \ + SHOW_ASTERISK((tfp, "test @%d, '%s' vs '%s' %s\n", __LINE__, a,b,c)) + +/* + * Compare names as described in RFC 2818: ignore case, allow wildcards. + * Return zero on a match, nonzero on mismatch -TD + * + * From RFC 2818: + * Names may contain the wildcard character * which is considered to match any + * single domain name component or component fragment. E.g., *.a.com matches + * foo.a.com but not bar.foo.a.com. f*.com matches foo.com but not bar.com. + */ +int strcasecomp_asterisk(const char *a, const char *b) +{ + const char *p; + int result = 0; + int done = FALSE; + + while (!result && !done) { + SHOW_ASTERISK_TXT(a, b, "main"); + if (*a == '*') { + p = b; + for (;;) { + SHOW_ASTERISK_TXT(a, p, "loop"); + if (end_component(p)) { + if (end_component(a + 1)) { + b = p - 1; + result = 0; + } else { + result = 1; + } + break; + } else if (strcasecomp_asterisk(a + 1, p)) { + ++p; + } else { + b = p - 1; + result = 0; /* found a match starting at 'p' */ + done = TRUE; + break; + } + } + SHOW_ASTERISK_NUM(a, b, result); + } else if (*b == '*') { + result = strcasecomp_asterisk(b, a); + SHOW_ASTERISK_NUM(a, b, result); + done = (result == 0); + } else if (*a == '\0' || *b == '\0') { + result = (*a != *b); + SHOW_ASTERISK_NUM(a, b, result); + break; + } else if (TOLOWER(UCH(*a)) != TOLOWER(UCH(*b))) { + result = 1; + SHOW_ASTERISK_NUM(a, b, result); + break; + } + ++a; + ++b; + } + return result; +} + +#ifdef DEBUG_ASTERISK +void mismatch_asterisk(void) +{ + /* *INDENT-OFF* */ + static struct { + const char *a; + const char *b; + int code; + } table[] = { + { "foo.bar", "*.*", 0 }, + { "foo.bar", "*.b*", 0 }, + { "foo.bar", "*.ba*", 0 }, + { "foo.bar", "*.bar*", 0 }, + { "foo.bar", "*.*bar*", 0 }, + { "foo.bar", "*.*.", 1 }, + { "foo.bar", "fo*.b*", 0 }, + { "*oo.bar", "fo*.b*", 0 }, + { "*oo.bar.com", "fo*.b*", 1 }, + { "*oo.bar.com", "fo*.b*m", 1 }, + { "*oo.bar.com", "fo*.b*.c*", 0 }, + }; + /* *INDENT-ON* */ + + unsigned n; + int code; + + CTRACE((tfp, "mismatch_asterisk testing\n")); + for (n = 0; n < TABLESIZE(table); ++n) { + CTRACE((tfp, "-------%d\n", n)); + code = strcasecomp_asterisk(table[n].a, table[n].b); + if (code != table[n].code) { + CTRACE((tfp, "mismatch_asterisk '%s' '%s' got %d, want %d\n", + table[n].a, table[n].b, code, table[n].code)); + } + } +} +#endif + +#ifdef NOT_ASCII + +/* Case-insensitive with ASCII collating sequence + * ---------------- + */ +int AS_casecomp(const char *p, + const char *q) +{ + int diff; + + for (;; p++, q++) { + if (!(*p && *q)) + return (UCH(*p) - UCH(*q)); + diff = TOASCII(TOLOWER(*p)) + - TOASCII(TOLOWER(*q)); + if (diff) + return diff; + } + /*NOTREACHED */ +} + +/* With count limit and ASCII collating sequence + * ---------------- + * AS_cmp uses n == -1 to compare indefinite length. + */ +int AS_ncmp(const char *p, + const char *q, + unsigned int n) +{ + const char *a = p; + int diff; + + for (; (unsigned) (p - a) < n; p++, q++) { + if (!(*p && *q)) + return (UCH(*p) - UCH(*q)); + diff = TOASCII(*p) + - TOASCII(*q); + if (diff) + return diff; + } + return 0; /* Match up to n characters */ +} +#endif /* NOT_ASCII */ + +/* Allocate a new copy of a string, and returns it +*/ +char *HTSACopy(char **dest, + const char *src) +{ + if (src != 0) { + if (src != *dest) { + size_t size = strlen(src) + 1; + + FREE(*dest); + *dest = (char *) malloc(size); + if (*dest == NULL) + outofmem(__FILE__, "HTSACopy"); + assert(*dest != NULL); + MemCpy(*dest, src, size); + } + } else { + FREE(*dest); + } + return *dest; +} + +/* String Allocate and Concatenate +*/ +char *HTSACat(char **dest, + const char *src) +{ + if (src && *src && (src != *dest)) { + if (*dest) { + size_t length = strlen(*dest); + + *dest = (char *) realloc(*dest, length + strlen(src) + 1); + if (*dest == NULL) + outofmem(__FILE__, "HTSACat"); + assert(*dest != NULL); + strcpy(*dest + length, src); + } else { + *dest = (char *) malloc(strlen(src) + 1); + if (*dest == NULL) + outofmem(__FILE__, "HTSACat"); + assert(*dest != NULL); + strcpy(*dest, src); + } + } + return *dest; +} + +/* optimized for heavily realloc'd strings, store length inside */ + +#define EXTRA_TYPE size_t /* type we use for length */ +#define EXTRA_SIZE sizeof(void *) /* alignment >= sizeof(EXTRA_TYPE) */ + +void HTSAFree_extra(char *s) +{ + free(s - EXTRA_SIZE); +} + +/* never shrink */ +char *HTSACopy_extra(char **dest, + const char *src) +{ + if (src != 0) { + size_t srcsize = strlen(src) + 1; + EXTRA_TYPE size = 0; + + if (*dest != 0) { + size = *(EXTRA_TYPE *) (void *) ((*dest) - EXTRA_SIZE); + } + if ((*dest == 0) || (size < srcsize)) { + FREE_extra(*dest); + size = srcsize * 2; /* x2 step */ + *dest = (char *) malloc(size + EXTRA_SIZE); + if (*dest == NULL) + outofmem(__FILE__, "HTSACopy_extra"); + assert(*dest != NULL); + *(EXTRA_TYPE *) (void *) (*dest) = size; + *dest += EXTRA_SIZE; + } + MemCpy(*dest, src, srcsize); + } else { + Clear_extra(*dest); + } + return *dest; +} + +/* Find next Field + * --------------- + * + * On entry, + * *pstr points to a string containig white space separated + * field, optionlly quoted. + * + * On exit, + * *pstr has been moved to the first delimiter past the + * field + * THE STRING HAS BEEN MUTILATED by a 0 terminator + * + * returns a pointer to the first field + */ +char *HTNextField(char **pstr) +{ + char *p = *pstr; + char *start; /* start of field */ + + while (*p && WHITE(*p)) + p++; /* Strip white space */ + if (!*p) { + *pstr = p; + return NULL; /* No first field */ + } + if (*p == '"') { /* quoted field */ + p++; + start = p; + for (; *p && *p != '"'; p++) { + if (*p == '\\' && p[1]) + p++; /* Skip escaped chars */ + } + } else { + start = p; + while (*p && !WHITE(*p)) + p++; /* Skip first field */ + } + if (*p) + *p++ = '\0'; + *pstr = p; + return start; +} + +/* Find next Token + * --------------- + * Finds the next token in a string + * On entry, + * *pstr points to a string to be parsed. + * delims lists characters to be recognized as delimiters. + * If NULL, default is white space "," ";" or "=". + * The word can optionally be quoted or enclosed with + * chars from bracks. + * Comments surrrounded by '(' ')' are filtered out + * unless they are specifically reqested by including + * ' ' or '(' in delims or bracks. + * bracks lists bracketing chars. Some are recognized as + * special, for those give the opening char. + * If NULL, defaults to <"> and "<" ">". + * found points to location to fill with the ending delimiter + * found, or is NULL. + * + * On exit, + * *pstr has been moved to the first delimiter past the + * field + * THE STRING HAS BEEN MUTILATED by a 0 terminator + * found points to the delimiter found unless it was NULL. + * Returns a pointer to the first word or NULL on error + */ +char *HTNextTok(char **pstr, + const char *delims, + const char *bracks, + char *found) +{ + char *p = *pstr; + char *start = NULL; + BOOL get_blanks, skip_comments; + BOOL get_comments; + BOOL get_closing_char_too = FALSE; + char closer; + + if (isEmpty(pstr)) + return NULL; + if (!delims) + delims = " ;,="; + if (!bracks) + bracks = "<\""; + + get_blanks = (BOOL) (!strchr(delims, ' ') && !strchr(bracks, ' ')); + get_comments = (BOOL) (strchr(bracks, '(') != NULL); + skip_comments = (BOOL) (!get_comments && !strchr(delims, '(') && !get_blanks); +#define skipWHITE(c) (!get_blanks && WHITE(c)) + + while (*p && skipWHITE(*p)) + p++; /* Strip white space */ + if (!*p) { + *pstr = p; + if (found) + *found = '\0'; + return NULL; /* No first field */ + } + while (1) { + /* Strip white space and other delimiters */ + while (*p && (skipWHITE(*p) || strchr(delims, *p))) + p++; + if (!*p) { + *pstr = p; + if (found) + *found = *(p - 1); + return NULL; /* No field */ + } + + if (*p == '(' && (skip_comments || get_comments)) { /* Comment */ + int comment_level = 0; + + if (get_comments && !start) + start = p + 1; + for (; *p && (*p != ')' || --comment_level > 0); p++) { + if (*p == '(') + comment_level++; + else if (*p == '"') { /* quoted field within Comment */ + for (p++; *p && *p != '"'; p++) + if (*p == '\\' && *(p + 1)) + p++; /* Skip escaped chars */ + if (!*p) + break; /* (invalid) end of string found, leave */ + } + if (*p == '\\' && *(p + 1)) + p++; /* Skip escaped chars */ + } + if (get_comments) + break; + if (*p) + p++; + if (get_closing_char_too) { + if (!*p || (!strchr(bracks, *p) && strchr(delims, *p))) { + break; + } else + get_closing_char_too = (BOOL) (strchr(bracks, *p) != NULL); + } + } else if (strchr(bracks, *p)) { /* quoted or bracketed field */ + switch (*p) { + case '<': + closer = '>'; + break; + case '[': + closer = ']'; + break; + case '{': + closer = '}'; + break; + case ':': + closer = ';'; + break; + default: + closer = *p; + } + if (!start) + start = ++p; + for (; *p && *p != closer; p++) + if (*p == '\\' && *(p + 1)) + p++; /* Skip escaped chars */ + if (get_closing_char_too) { + p++; + if (!*p || (!strchr(bracks, *p) && strchr(delims, *p))) { + break; + } else + get_closing_char_too = (BOOL) (strchr(bracks, *p) != NULL); + } else + break; /* kr95-10-9: needs to stop here */ + } else { /* Spool field */ + if (!start) + start = p; + while (*p && !skipWHITE(*p) && !strchr(bracks, *p) && + !strchr(delims, *p)) + p++; + if (*p && strchr(bracks, *p)) { + get_closing_char_too = TRUE; + } else { + if (*p == '(' && skip_comments) { + *pstr = p; + HTNextTok(pstr, NULL, "(", found); /* Advance pstr */ + *p = '\0'; + if (*pstr && **pstr) + (*pstr)++; + return start; + } + break; /* Got it */ + } + } + } + if (found) + *found = *p; + + if (*p) + *p++ = '\0'; + *pstr = p; + return start; +} + +static char *HTAlloc(char *ptr, size_t length) +{ + if (ptr != 0) + ptr = (char *) realloc(ptr, length); + else + ptr = (char *) malloc(length); + if (ptr == 0) + outofmem(__FILE__, "HTAlloc"); + assert(ptr != NULL); + return ptr; +} + +/* + * If SAVE_TIME_NOT_SPACE is defined, StrAllocVsprintf will hang on to + * its temporary string buffers instead of allocating and freeing them + * in each invocation. They only grow and never shrink, and won't be + * cleaned up on exit. - kw + */ +#if defined(_REENTRANT) || defined(_THREAD_SAFE) || defined(LY_FIND_LEAKS) +#undef SAVE_TIME_NOT_SPACE +#endif + +/* + * Replacement for sprintf, allocates buffer on the fly according to what's + * needed for its arguments. Unlike sprintf, this always concatenates to the + * destination buffer, so we do not have to provide both flavors. + */ +typedef enum { + Flags, + Width, + Prec, + Type, + Format +} PRINTF; + +#define VA_INTGR(type) ival = (int) va_arg((*ap), type) +#define VA_FLOAT(type) fval = (double) va_arg((*ap), type) +#define VA_POINT(type) pval = (char *) va_arg((*ap), type) + +#define NUM_WIDTH 10 /* allow for width substituted for "*" in "%*s" */ + /* also number of chars assumed to be needed in addition + to a given precision in floating point formats */ + +#define GROW_EXPR(n) (((n) * 3) / 2) +#define GROW_SIZE 256 + +PUBLIC_IF_FIND_LEAKS char *StrAllocVsprintf(char **pstr, + size_t dst_len, + const char *fmt, + va_list * ap) +{ +#ifdef HAVE_VASPRINTF + /* + * Use vasprintf() if we have it, since it is simplest. + */ + char *result = 0; + char *temp = 0; + + /* discard old destination if no length was given */ + if (pstr && !dst_len) { + if (*pstr) + FREE(*pstr); + } + + if (vasprintf(&temp, fmt, *ap) >= 0) { + if (dst_len != 0) { + size_t src_len = strlen(temp); + size_t new_len = dst_len + src_len + 1; + + result = HTAlloc(pstr ? *pstr : 0, new_len); + if (result != 0) { + strcpy(result + dst_len, temp); + } + (free) (temp); + } else { + result = temp; + mark_malloced(temp, strlen(temp)); + } + } + + if (pstr != 0) + *pstr = result; + + return result; +#else /* !HAVE_VASPRINTF */ + /* + * If vasprintf() is not available, this works - but does not implement + * the POSIX '$' formatting character which may be used in some of the + * ".po" files. + */ +#ifdef SAVE_TIME_NOT_SPACE + static size_t tmp_len = 0; + static size_t fmt_len = 0; + static char *tmp_ptr = NULL; + static char *fmt_ptr = NULL; + +#else + size_t tmp_len = GROW_SIZE; + char *tmp_ptr = 0; + char *fmt_ptr; +#endif /* SAVE_TIME_NOT_SPACE */ + size_t have, need; + char *dst_ptr = *pstr; + const char *format = fmt; + + if (isEmpty(fmt)) + return 0; + + need = strlen(fmt) + 1; +#ifdef SAVE_TIME_NOT_SPACE + if (!fmt_ptr || fmt_len < need * NUM_WIDTH) { + fmt_ptr = HTAlloc(fmt_ptr, fmt_len = need * NUM_WIDTH); + } + if (!tmp_ptr || tmp_len < GROW_SIZE) { + tmp_ptr = HTAlloc(tmp_ptr, tmp_len = GROW_SIZE); + } +#else + if ((fmt_ptr = malloc(need * NUM_WIDTH)) == 0 + || (tmp_ptr = malloc(tmp_len)) == 0) { + outofmem(__FILE__, "StrAllocVsprintf"); + assert(fmt_ptr != NULL); + assert(tmp_ptr != NULL); + } +#endif /* SAVE_TIME_NOT_SPACE */ + + if (dst_ptr == 0) { + dst_ptr = HTAlloc(dst_ptr, have = GROW_SIZE + need); + } else { + have = strlen(dst_ptr) + 1; + need += dst_len; + if (have < need) + dst_ptr = HTAlloc(dst_ptr, have = GROW_SIZE + need); + } + + while (*fmt != '\0') { + if (*fmt == '%') { + static char dummy[] = ""; + PRINTF state = Flags; + char *pval = dummy; /* avoid const-cast */ + double fval = 0.0; + int done = FALSE; + int ival = 0; + int prec = -1; + int type = 0; + int used = 0; + int width = -1; + size_t f = 0; + + fmt_ptr[f++] = *fmt; + while (*++fmt != '\0' && !done) { + fmt_ptr[f++] = *fmt; + + if (isdigit(UCH(*fmt))) { + int num = *fmt - '0'; + + if (state == Flags && num != 0) + state = Width; + if (state == Width) { + if (width < 0) + width = 0; + width = (width * 10) + num; + } else if (state == Prec) { + if (prec < 0) + prec = 0; + prec = (prec * 10) + num; + } + } else if (*fmt == '*') { + VA_INTGR(int); + + if (state == Flags) + state = Width; + if (state == Width) { + width = ival; + } else if (state == Prec) { + prec = ival; + } + sprintf(&fmt_ptr[--f], "%d", ival); + f = strlen(fmt_ptr); + } else if (isalpha(UCH(*fmt))) { + done = TRUE; + switch (*fmt) { + case 'Z': /* FALLTHRU */ + case 'h': /* FALLTHRU */ + case 'l': /* FALLTHRU */ + case 'L': /* FALLTHRU */ + done = FALSE; + type = *fmt; + break; + case 'o': /* FALLTHRU */ + case 'i': /* FALLTHRU */ + case 'd': /* FALLTHRU */ + case 'u': /* FALLTHRU */ + case 'x': /* FALLTHRU */ + case 'X': /* FALLTHRU */ + if (type == 'l') + VA_INTGR(long); + + else if (type == 'Z') + VA_INTGR(size_t); + + else + VA_INTGR(int); + + used = 'i'; + break; + case 'f': /* FALLTHRU */ + case 'e': /* FALLTHRU */ + case 'E': /* FALLTHRU */ + case 'g': /* FALLTHRU */ + case 'G': /* FALLTHRU */ + VA_FLOAT(double); + + used = 'f'; + break; + case 'c': + VA_INTGR(int); + + used = 'c'; + break; + case 's': + VA_POINT(char *); + + if (prec < 0) + prec = strlen(pval); + used = 's'; + break; + case 'p': + VA_POINT(void *); + + used = 'p'; + break; + case 'n': + VA_POINT(int *); + + used = 0; + break; + default: + CTRACE((tfp, "unknown format character '%c' in %s\n", + *fmt, format)); + break; + } + } else if (*fmt == '.') { + state = Prec; + } else if (*fmt == '%') { + done = TRUE; + used = '%'; + } + } + fmt_ptr[f] = '\0'; + + if (prec > 0) { + switch (used) { + case 'f': + if (width < prec + NUM_WIDTH) + width = prec + NUM_WIDTH; + /* FALLTHRU */ + case 'i': + /* FALLTHRU */ + case 'p': + if (width < prec + 2) + width = prec + 2; /* leading sign/space/zero, "0x" */ + break; + case 'c': + break; + case '%': + break; + default: + if (width < prec) + width = prec; + break; + } + } + if (width >= (int) tmp_len) { + tmp_len = GROW_EXPR(tmp_len + width); + tmp_ptr = HTAlloc(tmp_ptr, tmp_len); + } + + switch (used) { + case 'i': + case 'c': + sprintf(tmp_ptr, fmt_ptr, ival); + break; + case 'f': + sprintf(tmp_ptr, fmt_ptr, fval); + break; + default: + sprintf(tmp_ptr, fmt_ptr, pval); + break; + } + need = dst_len + strlen(tmp_ptr) + 1; + if (need >= have) { + dst_ptr = HTAlloc(dst_ptr, have = GROW_EXPR(need)); + } + strcpy(dst_ptr + dst_len, tmp_ptr); + dst_len += strlen(tmp_ptr); + } else { + if ((dst_len + 2) >= have) { + dst_ptr = HTAlloc(dst_ptr, (have += GROW_SIZE)); + } + dst_ptr[dst_len++] = *fmt++; + } + } + +#ifndef SAVE_TIME_NOT_SPACE + FREE(tmp_ptr); + FREE(fmt_ptr); +#endif + dst_ptr[dst_len] = '\0'; + if (pstr) + *pstr = dst_ptr; + return (dst_ptr); +#endif /* HAVE_VASPRINTF */ +} +#undef SAVE_TIME_NOT_SPACE + +/* + * Replacement for sprintf, allocates buffer on the fly according to what's + * needed for its arguments. Unlike sprintf, this always concatenates to the + * destination buffer. + */ +/* Note: if making changes, also check the memory tracking version + * LYLeakHTSprintf in LYLeaks.c. - kw */ +#ifdef HTSprintf /* if hidden by LYLeaks stuff */ +#undef HTSprintf +#endif +char *HTSprintf(char **pstr, const char *fmt,...) +{ + char *result = 0; + size_t inuse = 0; + va_list ap; + + LYva_start(ap, fmt); + { + if (pstr != 0 && *pstr != 0) + inuse = strlen(*pstr); + result = StrAllocVsprintf(pstr, inuse, fmt, &ap); + } + va_end(ap); + + return (result); +} + +/* + * Replacement for sprintf, allocates buffer on the fly according to what's + * needed for its arguments. Like sprintf, this always resets the destination + * buffer. + */ +/* Note: if making changes, also check the memory tracking version + * LYLeakHTSprintf0 in LYLeaks.c. - kw */ +#ifdef HTSprintf0 /* if hidden by LYLeaks stuff */ +#undef HTSprintf0 +#endif +char *HTSprintf0(char **pstr, const char *fmt,...) +{ + char *result = 0; + va_list ap; + + LYva_start(ap, fmt); + { + result = StrAllocVsprintf(pstr, (size_t) 0, fmt, &ap); + } + va_end(ap); + + return (result); +} + +/* + * Returns a quoted or escaped form of the given parameter, suitable for use in + * a command string. + */ +#if USE_QUOTED_PARAMETER +#define S_QUOTE '\'' +#define D_QUOTE '"' +char *HTQuoteParameter(const char *parameter) +{ + size_t i; + size_t last; + size_t n = 0; + size_t quoted = 0; + char *result; + + if (parameter == 0) + parameter = ""; + + last = strlen(parameter); + for (i = 0; i < last; ++i) + if (strchr("\\&#$^*?(){}<>\"';`|", parameter[i]) != 0 + || isspace(UCH(parameter[i]))) + ++quoted; + + result = (char *) malloc(last + 5 * quoted + 3); + if (result == NULL) + outofmem(__FILE__, "HTQuoteParameter"); + + assert(result != NULL); + + n = 0; +#if (USE_QUOTED_PARAMETER == 1) + /* + * Only double-quotes are used in Win32/DOS -TD + */ + if (quoted) + result[n++] = D_QUOTE; + for (i = 0; i < last; i++) { + result[n++] = parameter[i]; + } + if (quoted) + result[n++] = D_QUOTE; +#else + if (quoted) + result[n++] = S_QUOTE; + for (i = 0; i < last; i++) { + if (parameter[i] == S_QUOTE) { + result[n++] = S_QUOTE; + result[n++] = D_QUOTE; + result[n++] = parameter[i]; + result[n++] = D_QUOTE; + result[n++] = S_QUOTE; + } else { + /* Note: No special handling of other characters, including + backslash, since we are constructing a single-quoted string! + Backslash has no special escape meaning within those for sh + and compatible shells, so trying to escape a backslash by + doubling it is unnecessary and would be interpreted by the + shell as an additional data character. - kw 2000-05-02 + */ + result[n++] = parameter[i]; + } + } + if (quoted) + result[n++] = S_QUOTE; +#endif + result[n] = '\0'; + return result; +} +#endif + +#define HTIsParam(string) ((string[0] == '%' && string[1] == 's')) + +/* + * Returns the number of "%s" tokens in a system command-template. + */ +int HTCountCommandArgs(const char *command) +{ + int number = 0; + + while (command[0] != 0) { + if (HTIsParam(command)) + number++; + command++; + } + return number; +} + +/* + * Returns a pointer into the given string after the given parameter number + */ +static const char *HTAfterCommandArg(const char *command, + int number) +{ + while (number > 0) { + if (command[0] != 0) { + if (HTIsParam(command)) { + number--; + command++; + } + command++; + } else { + break; + } + } + return command; +} + +/* + * Like HTAddParam, but the parameter may be an environment variable, which we + * will expand and append. Do this only for things like the command-verb, + * where we obtain the parameter from the user's configuration. Any quoting + * required for the environment variable has to be done within its value, e.g., + * + * setenv EDITOR 'xvile -name "No such class"' + * + * This is useful only when we quote parameters, of course. + */ +#if USE_QUOTED_PARAMETER +void HTAddXpand(char **result, + const char *command, + int number, + const char *parameter) +{ + if (number > 0) { + const char *last = HTAfterCommandArg(command, number - 1); + const char *next = last; + + if (number <= 1) { + FREE(*result); + } + + while (next[0] != 0) { + if (HTIsParam(next)) { + if (next != last) { + size_t len = ((size_t) (next - last) + + ((*result != 0) + ? strlen(*result) + : 0)); + + HTSACat(result, last); + (*result)[len] = 0; + } + HTSACat(result, parameter); + CTRACE((tfp, "PARAM-EXP:%s\n", *result)); + return; + } + next++; + } + } +} +#endif /* USE_QUOTED_PARAMETER */ + +/* + * Append string to a system command that we are constructing, without quoting. + * We're given the index of the newest parameter we're processing. Zero + * indicates none, so a value of '1' indicates that we copy from the beginning + * of the command string up to the first parameter, substitute the quoted + * parameter and return the result. + * + * Parameters are substituted at "%s" tokens, like printf. Other printf-style + * tokens are not substituted; they are passed through without change. + */ +void HTAddToCmd(char **result, + const char *command, + int number, + const char *string) +{ + if (number > 0) { + const char *last = HTAfterCommandArg(command, number - 1); + const char *next = last; + + if (number <= 1) { + FREE(*result); + } + if (string == 0) + string = ""; + while (next[0] != 0) { + if (HTIsParam(next)) { + if (next != last) { + size_t len = ((size_t) (next - last) + + ((*result != 0) + ? strlen(*result) + : 0)); + + HTSACat(result, last); + (*result)[len] = 0; + } + HTSACat(result, string); + CTRACE((tfp, "PARAM-ADD:%s\n", *result)); + return; + } + next++; + } + } +} + +/* + * Append string-parameter to a system command that we are constructing. The + * string is a complete parameter (which is a necessary assumption so we can + * quote it properly). + */ +void HTAddParam(char **result, + const char *command, + int number, + const char *parameter) +{ + if (number > 0) { +#if USE_QUOTED_PARAMETER + char *quoted = HTQuoteParameter(parameter); + + HTAddToCmd(result, command, number, quoted); + FREE(quoted); +#else + HTAddToCmd(result, command, number, parameter); +#endif + } +} + +/* + * Append the remaining command-string to a system command (compare with + * HTAddParam). Any remaining "%s" tokens are copied as empty strings. + */ +void HTEndParam(char **result, + const char *command, + int number) +{ + const char *last; + int count; + + count = HTCountCommandArgs(command); + if (count < number) + number = count; + last = HTAfterCommandArg(command, number); + if (last[0] != 0) { + HTSACat(result, last); + } + CTRACE((tfp, "PARAM-END:%s\n", *result)); +} + +/* Binary-strings (may have embedded nulls). Some modules (HTGopher) assume + * there is a null on the end, anyway. + */ + +/* (Re)allocate a bstring, e.g., to increase its buffer size for ad hoc + * operations. + */ +void HTSABAlloc(bstring **dest, int len) +{ + if (*dest == 0) + *dest = typecalloc(bstring); + + if ((*dest)->len != len) { + (*dest)->str = typeRealloc(char, (*dest)->str, len); + + (*dest)->len = len; + } +} + +/* Allocate a new bstring, and return it. +*/ +void HTSABCopy(bstring **dest, const char *src, + int len) +{ + bstring *t; + unsigned need = (unsigned) (len + 1); + + CTRACE2(TRACE_BSTRING, + (tfp, "HTSABCopy(%p, %p, %d)\n", + (void *) dest, (const void *) src, len)); + HTSABFree(dest); + if (src) { + if (TRACE_BSTRING) { + CTRACE((tfp, "=== %4d:", len)); + trace_bstring2(src, len); + CTRACE((tfp, "\n")); + } + if ((t = (bstring *) malloc(sizeof(bstring))) == NULL) + outofmem(__FILE__, "HTSABCopy"); + + assert(t != NULL); + + if ((t->str = typeMallocn(char, need)) == NULL) + outofmem(__FILE__, "HTSABCopy"); + + assert(t->str != NULL); + + MemCpy(t->str, src, len); + t->len = len; + t->str[t->len] = '\0'; + *dest = t; + } + if (TRACE_BSTRING) { + CTRACE((tfp, "=> %4d:", BStrLen(*dest))); + trace_bstring(*dest); + CTRACE((tfp, "\n")); + } +} + +/* + * Initialize with a null-terminated string (discards the null). + */ +void HTSABCopy0(bstring **dest, const char *src) +{ + HTSABCopy(dest, src, (int) strlen(src)); +} + +/* + * Append a block of memory to a bstring. + */ +void HTSABCat(bstring **dest, const char *src, + int len) +{ + bstring *t = *dest; + + CTRACE2(TRACE_BSTRING, + (tfp, "HTSABCat(%p, %p, %d)\n", + (void *) dest, (const void *) src, len)); + if (src) { + unsigned need = (unsigned) (len + 1); + + if (TRACE_BSTRING) { + CTRACE((tfp, "=== %4d:", len)); + trace_bstring2(src, len); + CTRACE((tfp, "\n")); + } + if (t) { + unsigned length = (unsigned) t->len + need; + + t->str = typeRealloc(char, t->str, length); + } else { + if ((t = typecalloc(bstring)) == NULL) + outofmem(__FILE__, "HTSACat"); + + assert(t != NULL); + + t->str = typeMallocn(char, need); + } + if (t->str == NULL) + outofmem(__FILE__, "HTSACat"); + + assert(t->str != NULL); + + MemCpy(t->str + t->len, src, len); + t->len += len; + t->str[t->len] = '\0'; + *dest = t; + } + if (TRACE_BSTRING) { + CTRACE((tfp, "=> %4d:", BStrLen(*dest))); + trace_bstring(*dest); + CTRACE((tfp, "\n")); + } +} + +/* + * Append a null-terminated string (discards the null). + */ +void HTSABCat0(bstring **dest, const char *src) +{ + HTSABCat(dest, src, (int) strlen(src)); +} + +/* + * Compare two bstring's for equality + */ +BOOL HTSABEql(bstring *a, bstring *b) +{ + unsigned len_a = (unsigned) ((a != 0) ? a->len : 0); + unsigned len_b = (unsigned) ((b != 0) ? b->len : 0); + + if (len_a == len_b) { + if (len_a == 0 + || MemCmp(a->str, b->str, a->len) == 0) + return TRUE; + } + return FALSE; +} + +/* + * Deallocate a bstring. + */ +void HTSABFree(bstring **ptr) +{ + if (*ptr != NULL) { + FREE((*ptr)->str); + FREE(*ptr); + *ptr = NULL; + } +} + +/* + * Use this function to perform formatted sprintf's onto the end of a bstring. + * The bstring may contain embedded nulls; the formatted portions must not. + */ +bstring *HTBprintf(bstring **pstr, const char *fmt,...) +{ + bstring *result = 0; + char *temp = 0; + va_list ap; + + LYva_start(ap, fmt); + { + temp = StrAllocVsprintf(&temp, (size_t) 0, fmt, &ap); + if (non_empty(temp)) { + HTSABCat(pstr, temp, (int) strlen(temp)); + } + FREE(temp); + result = *pstr; + } + va_end(ap); + + return (result); +} + +/* + * Write binary-data to the logfile, making it safe for most editors to view. + * That is most, since we do not restrict line-length. Nulls and other + * non-printing characters are addressed. + */ +void trace_bstring2(const char *text, + int size) +{ + int n; + + if (text != 0) { + for (n = 0; n < size; ++n) { + int ch = UCH(text[n]); + + switch (ch) { + case '\\': + fputs("\\\\", tfp); + break; + case '\r': + fputs("\\r", tfp); + break; + case '\t': + fputs("\\t", tfp); + break; + case '\f': + fputs("\\f", tfp); + break; + default: + if (isprint(ch) || isspace(ch)) { + fputc(ch, tfp); + } else { + fprintf(tfp, "\\%03o", ch); + } + break; + } + } + } +} + +void trace_bstring(bstring *data) +{ + trace_bstring2(BStrData(data), BStrLen(data)); +} diff --git a/WWW/Library/Implementation/HTString.h b/WWW/Library/Implementation/HTString.h new file mode 100644 index 00000000..86cd9382 --- /dev/null +++ b/WWW/Library/Implementation/HTString.h @@ -0,0 +1,162 @@ +/* + * $LynxId: HTString.h,v 1.37 2012/02/07 23:41:25 tom Exp $ + * String handling for libwww + * STRINGS + * + * Case-independent string comparison and allocations with copies etc + */ +#ifndef HTSTRING_H +#define HTSTRING_H + +#ifndef HTUTILS_H +#include <HTUtils.h> +#endif /* HTUTILS_H */ + +#ifdef __cplusplus +extern "C" { +#endif + extern const char *HTLibraryVersion; /* String for help screen etc */ + +/* + * EBCDIC string comparison using ASCII collating sequence + */ +#ifdef NOT_ASCII + extern int AS_casecomp(const char *a, const char *b); + extern int AS_ncmp(const char *a, const char *b, unsigned int n); + +#define AS_cmp( a, b ) ( AS_ncmp( ( a ), ( b ), -1 ) ) + +#else +#define AS_casecomp( a, b ) ( strcasecomp( ( a ), ( b ) ) ) +#define AS_ncmp( a, b, c ) ( StrNCmp( ( a ), ( b ), ( c ) ) ) +#define AS_cmp strcmp + +#endif /* NOT_ASCII */ + +#define StrNCat(a,b,c) strncat((a),(b),(size_t)(c)) +#define StrNCpy(a,b,c) strncpy((a),(b),(size_t)(c)) +#define StrNCmp(a,b,c) strncmp((a),(b),(size_t)(c)) + +#define MemCpy(a,b,c) memcpy((a),(b),(size_t)(c)) +#define MemCmp(a,b,c) memcmp((a),(b),(size_t)(c)) + + /* + * Case-insensitive string comparison + * + * The usual routines (comp instead of cmp) had some problem. + */ + extern int strcasecomp(const char *a, const char *b); + extern int strncasecomp(const char *a, const char *b, int n); + + extern int strcasecomp8(const char *a, const char *b); + extern int strncasecomp8(const char *a, const char *b, int n); + + extern int strcasecomp_asterisk(const char *a, const char *b); + + /* + * strcasecomp8 and strncasecomp8 are variants of strcasecomp and + * strncasecomp, but use 8bit upper/lower case information from the + * current display charset + */ + + /* + * Malloced string manipulation + */ +#define StrAllocCopy(dest, src) HTSACopy (&(dest), src) +#define StrAllocCat(dest, src) HTSACat (&(dest), src) + extern char *HTSACopy(char **dest, const char *src); + extern char *HTSACat(char **dest, const char *src); + + /* + * optimized for heavily realloc'd strings in temp objects + */ +#define StrAllocCopy_extra(dest, src) HTSACopy_extra (&(dest), src) +#define FREE_extra(x) {if (x != NULL) {HTSAFree_extra(x); x = NULL;}} +#define Clear_extra(x) {if (x != NULL) {*x = '\0';}} + extern char *HTSACopy_extra(char **dest, const char *src); + extern void HTSAFree_extra(char *s); + + /* + * Next word or quoted string + */ + extern char *HTNextField(char **pstr); + + /* A more general parser - kw */ + extern char *HTNextTok(char **pstr, + const char *delims, const char *bracks, char *found); + + extern char *HTSprintf(char **pstr, const char *fmt,...) GCC_PRINTFLIKE(2,3); + extern char *HTSprintf0(char **pstr, const char *fmt,...) GCC_PRINTFLIKE(2,3); + +#if defined(LY_FIND_LEAKS) /* private otherwise */ + extern char *StrAllocVsprintf(char **pstr, + size_t len, + const char *fmt, + va_list * ap); +#endif + +#if defined(__CYGWIN__) +#define USE_QUOTED_PARAMETER 2 /* single and double-quoting */ +#elif defined(DOSPATH) +#define USE_QUOTED_PARAMETER 1 /* double-quoting only */ +#elif (defined(VMS) || defined(__EMX__)) +#define USE_QUOTED_PARAMETER 0 /* no quoting */ +#else +#define USE_QUOTED_PARAMETER 2 /* single and double-quoting */ +#endif + +#if USE_QUOTED_PARAMETER + extern char *HTQuoteParameter(const char *parameter); + extern void HTAddXpand(char **result, const char *command, int number, const char *parameter); + +#else +#define HTQuoteParameter(parameter) parameter /* simplify ifdef'ing */ +#define HTAddXpand(result,command,number,parameter) HTAddParam(result,command,number,parameter) +#endif + + extern int HTCountCommandArgs(const char *command); + extern void HTAddToCmd(char **result, const char *command, int number, const char *string); + extern void HTAddParam(char **result, const char *command, int number, const char *parameter); + extern void HTEndParam(char **result, const char *command, int number); + +/* Force an option, with leading blanks, to be appended without quoting them */ +#define HTOptParam(result, command, number, parameter) HTSACat(result, parameter) + +/* Binary copy and concat */ + typedef struct { + char *str; + int len; + } bstring; + + extern void HTSABAlloc(bstring **dest, int len); + extern void HTSABCopy(bstring **dest, const char *src, int len); + extern void HTSABCopy0(bstring **dest, const char *src); + extern void HTSABCat(bstring **dest, const char *src, int len); + extern void HTSABCat0(bstring **dest, const char *src); + extern BOOL HTSABEql(bstring *a, bstring *b); + extern void HTSABFree(bstring **ptr); + +#define BStrLen(s) (((s) != 0) ? (s)->len : 0) +#define BStrData(s) (((s) != 0) ? (s)->str : 0) + +#define BINEQ(a,b) (HTSABEql(a,b)) /* like STREQ() */ + +#define isBEmpty(p) ((p) == 0 || BStrLen(p) == 0) + +#define BStrAlloc(d,n) HTSABAlloc( &(d), n) +#define BStrCopy(d,s) HTSABCopy( &(d), BStrData(s), BStrLen(s)) +#define BStrCopy0(d,s) HTSABCopy0( &(d), s) +#define BStrCopy1(d,s,n) HTSABCopy( &(d), s, n) +#define BStrCat(d,s) HTSABCat( &(d), BStrData(s), BStrLen(s)) +#define BStrCat0(d,s) HTSABCat0( &(d), s) +#define BStrFree(d) HTSABFree( &(d)) + + extern bstring *HTBprintf(bstring **pstr, const char *fmt,...) GCC_PRINTFLIKE(2,3); + + extern void trace_bstring(bstring *data); + extern void trace_bstring2(const char *text, int size); + +#ifdef __cplusplus +} +#endif +#endif /* HTSTRING_H */ diff --git a/WWW/Library/Implementation/HTStyle.c b/WWW/Library/Implementation/HTStyle.c new file mode 100644 index 00000000..54c0bc52 --- /dev/null +++ b/WWW/Library/Implementation/HTStyle.c @@ -0,0 +1,378 @@ +/* + * $LynxId: HTStyle.c,v 1.16 2009/11/27 13:01:48 tom Exp $ + * + * Style Implementation for Hypertext HTStyle.c + * ================================== + * + * Styles allow the translation between a logical property + * of a piece of text and its physical representation. + * + * A StyleSheet is a collection of styles, defining the + * translation necessary to + * represent a document. It is a linked list of styles. + */ + +#include <HTUtils.h> +#include <HTStyle.h> + +#include <LYLeaks.h> + +/* Create a new style +*/ +HTStyle *HTStyleNew(void) +{ + HTStyle *self = typecalloc(HTStyle); + + if (self == NULL) + outofmem(__FILE__, "HTStyleNew"); + return self; +} + +/* Create a new style with a name +*/ +HTStyle *HTStyleNewNamed(const char *name) +{ + HTStyle *self = HTStyleNew(); + + StrAllocCopy(self->w_name, name); + self->id = -1; /* <0 */ + return self; +} + +/* Free a style +*/ +HTStyle *HTStyleFree(HTStyle *self) +{ + FREE(self->w_name); + FREE(self->w_SGMLTag); + FREE(self); + return NULL; +} + +#ifdef SUPPRESS /* Only on the NeXT */ +/* Read a style from a stream (without its name) + * -------------------------- + * + * Reads a style with paragraph information from a stream. + * The style name is not read or written by these routines. + */ +#define NONE_STRING "(None)" +#define HTStream NXStream + +HTStyle *HTStyleRead(HTStyle *style, HTStream *stream) +{ + char myTag[STYLE_NAME_LENGTH]; + char fontName[STYLE_NAME_LENGTH]; + NXTextStyle *p; + int tab; + int gotpara; /* flag: have we got a paragraph definition? */ + + NXScanf(stream, "%s%s%f%d", + myTag, + fontName, + &style->fontSize, + &gotpara); + if (gotpara) { + if (!style->paragraph) { + style->paragraph = malloc(sizeof(*(style->paragraph))); + if (!style->paragraph) + outofmem(__FILE__, "HTStyleRead"); + style->paragraph->tabs = 0; + } + p = style->paragraph; + NXScanf(stream, "%f%f%f%f%hd%f%f%hd", + &p->indent1st, + &p->indent2nd, + &p->lineHt, + &p->descentLine, + &p->alignment, + &style->spaceBefore, + &style->spaceAfter, + &p->numTabs); + FREE(p->tabs); + p->tabs = malloc(p->numTabs * sizeof(p->tabs[0])); + if (!p->tabs) + outofmem(__FILE__, "HTStyleRead"); + for (tab = 0; tab < p->numTabs; tab++) { + NXScanf(stream, "%hd%f", + &p->tabs[tab].kind, + &p->tabs[tab].x); + } + } else { /* No paragraph */ + FREE(style->paragraph); + } /* if no paragraph */ + StrAllocCopy(style->SGMLTag, myTag); + if (strcmp(fontName, NONE_STRING) == 0) + style->font = 0; + else + style->font =[Font newFont: fontName size:style->fontSize]; + return NULL; +} + +/* Write a style to a stream in a compatible way +*/ +HTStyle *HTStyleWrite(HTStyle *style, NXStream * stream) +{ + int tab; + NXTextStyle *p = style->paragraph; + + NXPrintf(stream, "%s %s %f %d\n", + style->SGMLTag, + style->font ?[style->font name] : NONE_STRING, + style->fontSize, + p != 0); + + if (p) { + NXPrintf(stream, "\t%f %f %f %f %d %f %f\t%d\n", + p->indent1st, + p->indent2nd, + p->lineHt, + p->descentLine, + p->alignment, + style->spaceBefore, + style->spaceAfter, + p->numTabs); + + for (tab = 0; tab < p->numTabs; tab++) + NXPrintf(stream, "\t%d %f\n", + p->tabs[tab].kind, + p->tabs[tab].x); + } + return style; +} + +/* Write a style to stdout for diagnostics +*/ +HTStyle *HTStyleDump(HTStyle *style) +{ + int tab; + NXTextStyle *p = style->paragraph; + + printf(STYLE_DUMP_FONT, + style, + style->name, + style->SGMLTag, + [style->font name], + style->fontSize); + if (p) { + printf(STYLE_DUMP_IDENT, + p->indent1st, + p->indent2nd, + p->lineHt, + p->descentLine); + printf(STYLE_DUMP_ALIGN, + p->alignment, + p->numTabs, + style->spaceBefore, + style->spaceAfter); + + for (tab = 0; tab < p->numTabs; tab++) { + printf(STYLE_DUMP_TAB, + p->tabs[tab].kind, + p->tabs[tab].x); + } + printf("\n"); + } /* if paragraph */ + return style; +} +#endif /* SUPPRESS */ + +/* StyleSheet Functions + * ==================== + */ + +/* Searching for styles: +*/ +HTStyle *HTStyleNamed(HTStyleSheet *self, const char *name) +{ + HTStyle *scan; + + for (scan = self->styles; scan; scan = scan->next) + if (0 == strcmp(GetHTStyleName(scan), name)) + return scan; + CTRACE((tfp, "StyleSheet: No style named `%s'\n", name)); + return NULL; +} + +#ifdef NEXT_SUPRESS /* Not in general common code */ + +HTStyle *HTStyleMatching(HTStyleSheet *self, HTStyle *style) +{ + HTStyle *scan; + + for (scan = self->styles; scan; scan = scan->next) + if (scan->paragraph == para) + return scan; + return NULL; +} + +/* Find the style which best fits a given run + * ------------------------------------------ + * + * This heuristic is used for guessing the style for a run of + * text which has been pasted in. In order, we try: + * + * A style whose paragraph structure is actually used by the run. + * A style matching in font + * A style matching in paragraph style exactly + * A style matching in paragraph to a degree + */ + +HTStyle *HTStyleForRun(HTStyleSheet *self, NXRun * run) +{ + HTStyle *scan; + HTStyle *best = 0; + int bestMatch = 0; + NXTextStyle *rp = run->paraStyle; + + for (scan = self->styles; scan; scan = scan->next) + if (scan->paragraph == run->paraStyle) + return scan; /* Exact */ + + for (scan = self->styles; scan; scan = scan->next) { + NXTextStyle *sp = scan->paragraph; + + if (sp) { + int match = 0; + + if (sp->indent1st == rp->indent1st) + match = match + 1; + if (sp->indent2nd == rp->indent2nd) + match = match + 2; + if (sp->lineHt == rp->lineHt) + match = match + 1; + if (sp->numTabs == rp->numTabs) + match = match + 1; + if (sp->alignment == rp->alignment) + match = match + 3; + if (scan->font == run->font) + match = match + 10; + if (match > bestMatch) { + best = scan; + bestMatch = match; + } + } + } + CTRACE((tfp, "HTStyleForRun: Best match for style is %d out of 18\n", + bestMatch)); + return best; +} +#endif /* NEXT_SUPRESS */ + +/* Add a style to a sheet + * ---------------------- + */ +HTStyleSheet *HTStyleSheetAddStyle(HTStyleSheet *self, HTStyle *style) +{ + style->next = 0; /* The style will go on the end */ + if (!self->styles) { + self->styles = style; + } else { + HTStyle *scan; + + for (scan = self->styles; scan->next; scan = scan->next) ; /* Find end */ + scan->next = style; + } + return self; +} + +/* Remove the given object from a style sheet if it exists +*/ +HTStyleSheet *HTStyleSheetRemoveStyle(HTStyleSheet *self, HTStyle *style) +{ + if (self->styles == style) { + self->styles = style->next; + return self; + } else { + HTStyle *scan; + + for (scan = self->styles; scan; scan = scan->next) { + if (scan->next == style) { + scan->next = style->next; + return self; + } + } + } + return NULL; +} + +/* Create new style sheet +*/ + +HTStyleSheet *HTStyleSheetNew(void) +{ + HTStyleSheet *self = typecalloc(HTStyleSheet); + + if (self == NULL) + outofmem(__FILE__, "HTStyleSheetNew"); + return self; +} + +/* Free off a style sheet pointer +*/ +HTStyleSheet *HTStyleSheetFree(HTStyleSheet *self) +{ + HTStyle *style; + + while ((style = self->styles) != 0) { + self->styles = style->next; + HTStyleFree(style); + } + FREE(self); + return NULL; +} + +/* Read a stylesheet from a typed stream + * ------------------------------------- + * + * Reads a style sheet from a stream. If new styles have the same names + * as existing styles, they replace the old ones without changing the ids. + */ + +#ifdef NEXT_SUPRESS /* Only on the NeXT */ +HTStyleSheet *HTStyleSheetRead(HTStyleSheet *self, NXStream * stream) +{ + int numStyles; + int i; + HTStyle *style; + char styleName[80]; + + NXScanf(stream, " %d ", &numStyles); + CTRACE((tfp, "Stylesheet: Reading %d styles\n", numStyles)); + for (i = 0; i < numStyles; i++) { + NXScanf(stream, "%s", styleName); + style = HTStyleNamed(self, styleName); + if (!style) { + style = HTStyleNewNamed(styleName); + (void) HTStyleSheetAddStyle(self, style); + } + (void) HTStyleRead(style, stream); + if (TRACE) + HTStyleDump(style); + } + return self; +} + +/* Write a stylesheet to a typed stream + * ------------------------------------ + * + * Writes a style sheet to a stream. + */ + +HTStyleSheet *HTStyleSheetWrite(HTStyleSheet *self, NXStream * stream) +{ + int numStyles = 0; + HTStyle *style; + + for (style = self->styles; style; style = style->next) + numStyles++; + NXPrintf(stream, "%d\n", numStyles); + + CTRACE((tfp, "StyleSheet: Writing %d styles\n", numStyles)); + for (style = self->styles; style; style = style->next) { + NXPrintf(stream, "%s ", style->name); + (void) HTStyleWrite(style, stream); + } + return self; +} +#endif /* NEXT_SUPRESS */ diff --git a/WWW/Library/Implementation/HTStyle.h b/WWW/Library/Implementation/HTStyle.h new file mode 100644 index 00000000..0e51e39c --- /dev/null +++ b/WWW/Library/Implementation/HTStyle.h @@ -0,0 +1,241 @@ +/* + * $LynxId: HTStyle.h,v 1.17 2011/06/06 09:12:01 tom Exp $ + HTStyle: Style management for libwww + STYLE DEFINITION FOR HYPERTEXT + + Styles allow the translation between a logical property of a piece of text + and its physical representation. + + A StyleSheet is a collection of styles, defining the translation necessary + to represent a document. It is a linked list of styles. + +Overriding this module + + Why is the style structure declared in the HTStyle.h module, instead of + having the user browser define the structure, and the HTStyle routines just + use sizeof() for copying? + + It's not obvious whether HTStyle.c should be common code. It's useful to + have common code for loading style sheets, especially if the movement toward + standard style sheets gets going. + + If it IS common code, then both the hypertext object and HTStyle.c must know + the structure of a style, so HTStyle.h is a suitable place to put that. + HTStyle.c has to be compiled with a knowledge of the + + It we take it out of the library, then of course HTStyle could be declared + as an undefined structure. The only references to it are in the + structure-flattening code HTML.c and HTPlain.c, which only use + HTStypeNamed(). + + You can in any case override this function in your own code, which will + prevent the HTStyle from being loaded. You will be able to redefine your + style structure in this case without problems, as no other moule needs to + know it. + + */ +#ifndef HTStyle_H +#define HTStyle_H + +#include <HTAnchor.h> + +typedef long int HTFont; /* Dummy definition instead */ + +#ifdef NeXT_suppressed +#include <appkit/appkit.h> +typedef NXCoord HTCoord; + +#define HTParagraphStyle NXTextStyle +#define HTCoord NXCoord +typedef struct _color { + float grey; + int RGBColor; +} HTColor; + +#else + +typedef int HTCoord; /* changed from float to int - kw */ + +typedef struct _HTParagraphStyle { + HTCoord left_indent; /* @@@@ junk! etc etc */ +} HTParagraphStyle; + +typedef int HTColor; /* Sorry about the US spelling! */ + +#endif + +#ifdef __cplusplus +extern "C" { +#endif +#define STYLE_NAME_LENGTH 80 /* @@@@@@@@@@@ */ + typedef struct { + short kind; /* only NX_LEFTTAB implemented */ + HTCoord position; /* x coordinate for stop */ + } HTTabStop; + +/* The Style Structure + * ------------------- + */ + + typedef struct _HTStyle { + +/* Style management information +*/ + struct _HTStyle *next; /* Link for putting into stylesheet */ + char *w_name; /* Style name */ + const char *c_name; /* Style name */ + int id; /* equivalent of name, for speed */ + char *w_SGMLTag; /* Tag name to start */ + const char *c_SGMLTag; /* Tag name to start */ + +/* Character attributes (a la NXRun) +*/ + HTFont font; /* Font id */ + HTCoord fontSize; /* The size of font, not independent */ + HTColor color; /* text gray of current run */ + int superscript; /* superscript (-sub) in points */ + + HTAnchor *anchor; /* Anchor id if any, else zero */ + +/* Paragraph Attribtes (a la NXTextStyle) +*/ + HTCoord indent1st; /* how far first line in paragraph is + * indented */ + HTCoord leftIndent; /* how far second line is indented */ + HTCoord rightIndent; /* (Missing from NeXT version */ + short alignment; /* quad justification */ + HTCoord lineHt; /* line height */ + HTCoord descentLine; /* descender bottom from baseline */ + const HTTabStop *tabs; /* array of tab stops, 0 terminated */ + + BOOL wordWrap; /* Yes means wrap at space not char */ + BOOL freeFormat; /* Yes means \n is just white space */ + HTCoord spaceBefore; /* Omissions from NXTextStyle */ + HTCoord spaceAfter; + int paraFlags; /* Paragraph flags, bits as follows: */ + +#define PARA_KEEP 1 /* Do not break page within this paragraph */ +#define PARA_WITH_NEXT 2 /* Do not break page after this paragraph */ + +#define HT_JUSTIFY 0 /* For alignment */ +#define HT_LEFT 1 +#define HT_RIGHT 2 +#define HT_CENTER 3 + + } HTStyle; + +#define GetHTStyleName(p) ((p)->w_name ? (p)->w_name : (p)->c_name) +#define GetHTStyleSGML(p) ((p)->w_SGMLTag ? (p)->w_SGMLTag : (p)->c_SGMLTag) + +#define HTStyleInit( \ + next, name, SGML_tag, \ + font, fontsize, color, superscript, \ + anchor, indent1st, leftIndent, rightIndent, \ + alignment, lineHt, descentLine, \ + tabs, wordWrap, freeFormat, spaceBefore, spaceAfter, paraFlags) \ + { \ + next, NULL, #name, ST_##name, NULL, SGML_tag, \ + font, fontsize, color, superscript, \ + anchor, indent1st, leftIndent, rightIndent, \ + alignment, lineHt, descentLine, \ + tabs, wordWrap, freeFormat, spaceBefore, spaceAfter, paraFlags } + +#define HT_ALIGN_NONE (-1) + +/* Style functions: +*/ + extern HTStyle *HTStyleNew(void); + extern HTStyle *HTStyleNewNamed(const char *name); + extern HTStyle *HTStyleFree(HTStyle *self); + +#ifdef SUPRESS + extern HTStyle *HTStyleRead(HTStyle *self, HTStream *stream); + extern HTStyle *HTStyleWrite(HTStyle *self, HTStream *stream); +#endif +/* Style Sheet + * ----------- + */ + typedef struct _HTStyleSheet { + const char *name; + HTStyle *styles; + } HTStyleSheet; + +/* Stylesheet functions: +*/ + extern HTStyleSheet *HTStyleSheetNew(void); + extern HTStyleSheet *HTStyleSheetFree(HTStyleSheet *self); + extern HTStyle *HTStyleNamed(HTStyleSheet *self, const char *name); + extern HTStyle *HTStyleForParagraph(HTStyleSheet *self, HTParagraphStyle * paraStyle); + extern HTStyle *HTStyleMatching(HTStyleSheet *self, HTStyle *style); + +/* extern HTStyle * HTStyleForRun (HTStyleSheet *self, NXRun * run); */ + extern HTStyleSheet *HTStyleSheetAddStyle(HTStyleSheet *self, HTStyle *style); + extern HTStyleSheet *HTStyleSheetRemoveStyle(HTStyleSheet *self, HTStyle *style); + +#ifdef SUPPRESS + extern HTStyleSheet *HTStyleSheetRead(HTStyleSheet *self, HTStream *stream); + extern HTStyleSheet *HTStyleSheetWrite(HTStyleSheet *self, HTStream *stream); +#endif +#define CLEAR_POINTER ((void *)-1) /* Pointer value means "clear me" */ + +/* DefaultStyle.c */ + extern HTStyleSheet *DefaultStyle(HTStyle ***result_array); + +/* enum, use this instead of HTStyle name comparisons */ + enum HTStyle_Enum { + ST_Normal = 0, + ST_DivCenter, + ST_DivLeft, + ST_DivRight, + ST_Banner, + ST_Blockquote, + ST_Bq, + ST_Footnote, + ST_List, + ST_List1, + ST_List2, + ST_List3, + ST_List4, + ST_List5, + ST_List6, + ST_Menu, + ST_Menu1, + ST_Menu2, + ST_Menu3, + ST_Menu4, + ST_Menu5, + ST_Menu6, + ST_Glossary, + ST_Glossary1, + ST_Glossary2, + ST_Glossary3, + ST_Glossary4, + ST_Glossary5, + ST_Glossary6, + ST_GlossaryCompact, + ST_GlossaryCompact1, + ST_GlossaryCompact2, + ST_GlossaryCompact3, + ST_GlossaryCompact4, + ST_GlossaryCompact5, + ST_GlossaryCompact6, + ST_Example, + ST_Preformatted, + ST_Listing, + ST_Address, + ST_Note, + ST_Heading1, + ST_Heading2, + ST_Heading3, + ST_Heading4, + ST_Heading5, + ST_Heading6, + ST_HeadingCenter, + ST_HeadingLeft, + ST_HeadingRight + }; + +#ifdef __cplusplus +} +#endif +#endif /* HTStyle_H */ diff --git a/WWW/Library/Implementation/HTTCP.c b/WWW/Library/Implementation/HTTCP.c new file mode 100644 index 00000000..2723422c --- /dev/null +++ b/WWW/Library/Implementation/HTTCP.c @@ -0,0 +1,2182 @@ +/* + * $LynxId: HTTCP.c,v 1.107 2012/02/09 12:36:45 tom Exp $ + * + * Generic Communication Code HTTCP.c + * ========================== + * + * This code is in common between client and server sides. + * + * 16 Jan 92 TBL Fix strtol() undefined on CMU Mach. + * 25 Jun 92 JFG Added DECNET option through TCP socket emulation. + * 13 Sep 93 MD Added correct return of vmserrorno for HTInetStatus. + * Added decoding of vms error message for MULTINET. + * 7-DEC-1993 Bjorn S. Nilsson, ALEPH, CERN, VMS UCX ioctl() changes + * (done of Mosaic) + * 19 Feb 94 Danny Mayer Added Bjorn Fixes to Lynx version + * 7 Mar 94 Danny Mayer Added Fix UCX version for full domain name + * 20 May 94 Andy Harper Added support for CMU TCP/IP transport + * 17 Nov 94 Andy Harper Added support for SOCKETSHR transport + * 16 Jul 95 S. Bjorndahl added kluge to deal with LIBCMU bug + */ + +#include <HTUtils.h> +#include <HTParse.h> +#include <HTAlert.h> +#include <HTTCP.h> +#include <LYGlobalDefs.h> /* added for no_suspend */ +#include <LYUtils.h> + +#ifdef NSL_FORK +#include <signal.h> +#include <www_wait.h> +#endif /* NSL_FORK */ + +#ifdef HAVE_RESOLV_H +#include <resolv.h> +#endif + +#ifdef __DJGPP__ +#include <netdb.h> +#endif /* __DJGPP__ */ + +#define LYNX_ADDRINFO struct addrinfo +#define LYNX_HOSTENT struct hostent + +#define OK_HOST(p) ((p) != 0 && ((p)->h_length) != 0) + +#ifdef SVR4_BSDSELECT +int BSDselect(int nfds, + fd_set * readfds, + fd_set * writefds, + fd_set * exceptfds, + struct timeval *select_timeout); + +#ifdef select +#undef select +#endif /* select */ +#define select BSDselect +#ifdef SOCKS +#ifdef Rselect +#undef Rselect +#endif /* Rselect */ +#define Rselect BSDselect +#endif /* SOCKS */ +#endif /* SVR4_BSDSELECT */ + +#include <LYLeaks.h> + +/* + * Module-Wide variables + */ +static char *hostname = NULL; /* The name of this host */ + +/* + * PUBLIC VARIABLES + */ +#ifdef SOCKS +unsigned long socks_bind_remoteAddr; /* for long Rbind */ +#endif /* SOCKS */ + +/* Encode INET status (as in sys/errno.h) inet_status() + * ------------------ + * + * On entry, + * where gives a description of what caused the error + * global errno gives the error number in the Unix way. + * + * On return, + * returns a negative status in the Unix way. + */ + +#ifdef DECL_SYS_ERRLIST +extern char *sys_errlist[]; /* see man perror on cernvax */ +extern int sys_nerr; +#endif /* DECL_SYS_ERRLIST */ + +#ifdef __DJGPP__ +static int ResolveYield(void) +{ + return HTCheckForInterrupt()? 0 : 1; +} +#endif + +#if defined(VMS) && defined(UCX) +/* + * A routine to mimic the ioctl function for UCX. + * Bjorn S. Nilsson, 25-Nov-1993. Based on an example in the UCX manual. + */ +#include <HTioctl.h> + +int HTioctl(int d, + int request, + int *argp) +{ + int sdc, status; + unsigned short fun, iosb[4]; + char *p5, *p6; + struct comm { + int command; + char *addr; + } ioctl_comm; + struct it2 { + unsigned short len; + unsigned short opt; + struct comm *addr; + } ioctl_desc; + + if ((sdc = vaxc$get_sdc(d)) == 0) { + set_errno(EBADF); + return -1; + } + ioctl_desc.opt = UCX$C_IOCTL; + ioctl_desc.len = sizeof(struct comm); + + ioctl_desc.addr = &ioctl_comm; + if (request & IOC_OUT) { + fun = IO$_SENSEMODE; + p5 = 0; + p6 = (char *) &ioctl_desc; + } else { + fun = IO$_SETMODE; + p5 = (char *) &ioctl_desc; + p6 = 0; + } + ioctl_comm.command = request; + ioctl_comm.addr = (char *) argp; + status = sys$qiow(0, sdc, fun, iosb, 0, 0, 0, 0, 0, 0, p5, p6); + if (!(status & 01)) { + set_errno(status); + return -1; + } + if (!(iosb[0] & 01)) { + set_errno(iosb[0]); + return -1; + } + return 0; +} +#endif /* VMS && UCX */ + +#define MY_FORMAT "TCP: Error %d in `SOCKET_ERRNO' after call to %s() failed.\n\t%s\n" + /* third arg is transport/platform specific */ + +/* Report Internet Error + * --------------------- + */ +int HTInetStatus(const char *where) +{ + int status; + int saved_errno = errno; + +#ifdef VMS +#ifdef MULTINET + SOCKET_ERRNO = vmserrno; +#endif /* MULTINET */ +#endif /* VMS */ + +#ifdef VM + CTRACE((tfp, MY_FORMAT, SOCKET_ERRNO, where, + "(Error number not translated)")); /* What Is the VM equiv? */ +#define ER_NO_TRANS_DONE +#endif /* VM */ + +#ifdef VMS +#ifdef MULTINET + CTRACE((tfp, MY_FORMAT, SOCKET_ERRNO, where, + vms_errno_string())); +#else + CTRACE((tfp, MY_FORMAT, SOCKET_ERRNO, where, + ((SOCKET_ERRNO > 0 && SOCKET_ERRNO <= 65) ? + strerror(SOCKET_ERRNO) : "(Error number not translated)"))); +#endif /* MULTINET */ +#define ER_NO_TRANS_DONE +#endif /* VMS */ + +#ifdef HAVE_STRERROR + CTRACE((tfp, MY_FORMAT, SOCKET_ERRNO, where, + strerror(SOCKET_ERRNO))); +#define ER_NO_TRANS_DONE +#endif /* HAVE_STRERROR */ + +#ifndef ER_NO_TRANS_DONE + CTRACE((tfp, MY_FORMAT, SOCKET_ERRNO, where, + (SOCKET_ERRNO < sys_nerr ? + sys_errlist[SOCKET_ERRNO] : "Unknown error"))); +#endif /* !ER_NO_TRANS_DONE */ + +#ifdef VMS +#ifndef MULTINET + CTRACE((tfp, + " Unix error number (SOCKET_ERRNO) = %ld dec\n", + SOCKET_ERRNO)); + CTRACE((tfp, + " VMS error (vaxc$errno) = %lx hex\n", + vaxc$errno)); +#endif /* MULTINET */ +#endif /* VMS */ + + set_errno(saved_errno); + +#ifdef VMS + /* + * uerrno and errno happen to be zero if vmserrno <> 0 + */ +#ifdef MULTINET + status = -vmserrno; +#else + status = -vaxc$errno; +#endif /* MULTINET */ +#else + status = -SOCKET_ERRNO; +#endif /* VMS */ + return status; +} + +/* Parse a cardinal value parse_cardinal() + * ---------------------- + * + * On entry, + * *pp points to first character to be interpreted, terminated by + * non 0:9 character. + * *pstatus points to status already valid + * maxvalue gives the largest allowable value. + * + * On exit, + * *pp points to first unread character + * *pstatus points to status updated iff bad + */ +unsigned int HTCardinal(int *pstatus, + char **pp, + unsigned int max_value) +{ + unsigned int n; + + if ((**pp < '0') || (**pp > '9')) { /* Null string is error */ + *pstatus = -3; /* No number where one expected */ + return 0; + } + + n = 0; + while ((**pp >= '0') && (**pp <= '9')) + n = n * 10 + (unsigned) (*((*pp)++) - '0'); + + if (n > max_value) { + *pstatus = -4; /* Cardinal outside range */ + return 0; + } + + return n; +} + +#ifndef DECNET /* Function only used below for a trace message */ +/* Produce a string for an Internet address + * ---------------------------------------- + * + * On exit, + * returns a pointer to a static string which must be copied if + * it is to be kept. + */ +const char *HTInetString(SockA * soc_in) +{ +#ifdef INET6 + static char hostbuf[MAXHOSTNAMELEN]; + + getnameinfo((struct sockaddr *) soc_in, + SOCKADDR_LEN(soc_in), + hostbuf, (socklen_t) sizeof(hostbuf), + NULL, 0, + NI_NUMERICHOST); + return hostbuf; +#else + static char string[20]; + + sprintf(string, "%d.%d.%d.%d", + (int) *((unsigned char *) (&soc_in->sin_addr) + 0), + (int) *((unsigned char *) (&soc_in->sin_addr) + 1), + (int) *((unsigned char *) (&soc_in->sin_addr) + 2), + (int) *((unsigned char *) (&soc_in->sin_addr) + 3)); + return string; +#endif /* INET6 */ +} +#endif /* !DECNET */ + +/* Check whether string is a valid Internet hostname - kw + * ------------------------------------------------- + * + * Checks whether + * - contains only valid chars for domain names (actually, the + * restrictions are somewhat relaxed), + * - no leading dots or empty segments, + * - no segment starts with '-' or '+' [this protects telnet command], + * - max. length of dot-separated segment <= 63 (RFC 1034,1035), + * - total length <= 254 (if it ends with dot) or 253 (otherwise) + * [an interpretation of RFC 1034,1035, although RFC 1123 + * suggests 255 as limit - kw]. + * + * Note: user (before '@') and port (after ':') components from + * host part of URL should be already stripped (if appropriate) + * from the input string. + * + * On exit, + * returns 1 if valid, otherwise 0. + */ +BOOL valid_hostname(char *name) +{ + int i = 1, iseg = 0; + char *cp = name; + + if (!(name && *name)) + return NO; + for (; (*cp && i <= 253); cp++, i++) { + if (*cp == '.') { + if (iseg == 0) { + return NO; + } else { + iseg = 0; + continue; + } + } else if (iseg == 0 && (*cp == '-' || *cp == '+')) { + return NO; + } else if (++iseg > 63) { + return NO; + } + if (!isalnum(UCH(*cp)) && + *cp != '-' && *cp != '_' && + *cp != '$' && *cp != '+') { + return NO; + } + } + return (BOOL) (*cp == '\0' || (*cp == '.' && iseg != 0 && cp[1] == '\0')); +} + +#ifdef NSL_FORK +/* + * Function to allow us to be killed with a normal signal (not + * SIGKILL), but don't go through normal libc exit() processing, which + * would screw up parent's stdio. -BL + */ +static void quench(int sig GCC_UNUSED) +{ + _exit(2); +} +#endif /* NSL_FORK */ + +int lynx_nsl_status = HT_OK; + +#define DEBUG_HOSTENT /* disable in case of problems */ +#define DEBUG_HOSTENT_CHILD /* for NSL_FORK, may screw up trace file */ + +/* + * Two auxiliary functions for name lookup and LYNX_HOSTENT. + * + * dump_hostent - dumps the contents of a LYNX_HOSTENT to the + * trace log or stderr, including all pointer values, strings, and + * addresses, in a format inspired by gdb's print format. - kw + */ +static void dump_hostent(const char *msgprefix, + const LYNX_HOSTENT *phost) +{ + if (TRACE) { + int i; + char **pcnt; + + CTRACE((tfp, "%s: %p ", msgprefix, (const void *) phost)); + if (phost) { + CTRACE((tfp, "{ h_name = %p", phost->h_name)); + if (phost->h_name) { + CTRACE((tfp, " \"%s\",", phost->h_name)); + } else { + CTRACE((tfp, ",")); + } + CTRACE((tfp, "\n\t h_aliases = %p", (void *) phost->h_aliases)); + if (phost->h_aliases) { + CTRACE((tfp, " {")); + for (pcnt = phost->h_aliases; *pcnt; pcnt++) { + CTRACE((tfp, "%s %p \"%s\"", + (pcnt == phost->h_aliases ? " " : ", "), + *pcnt, *pcnt)); + } + CTRACE((tfp, "%s0x0 },\n\t", + (*phost->h_aliases ? ", " : " "))); + } else { + CTRACE((tfp, ",\n\t")); + } + CTRACE((tfp, " h_addrtype = %d,", phost->h_addrtype)); + CTRACE((tfp, " h_length = %d,\n\t", phost->h_length)); + CTRACE((tfp, " h_addr_list = %p", (void *) phost->h_addr_list)); + if (phost->h_addr_list) { + CTRACE((tfp, " {")); + for (pcnt = phost->h_addr_list; *pcnt; pcnt++) { + CTRACE((tfp, "%s %p", + (pcnt == phost->h_addr_list ? "" : ","), + *pcnt)); + for (i = 0; i < phost->h_length; i++) { + CTRACE((tfp, "%s%d%s", (i == 0 ? " \"" : "."), + (int) *((unsigned char *) (*pcnt) + i), + (i + 1 == phost->h_length ? "\"" : ""))); + } + } + if (*phost->h_addr_list) { + CTRACE((tfp, ", 0x0 } }")); + } else { + CTRACE((tfp, " 0x0 } }")); + } + } else { + CTRACE((tfp, "}")); + } + } + CTRACE((tfp, "\n")); + fflush(tfp); + } +} + +/* + * fill_rehostent - copies as much as possible relevant content from + * the LYNX_HOSTENT pointed to by phost to the char buffer given + * by rehostent, subject to maximum output length rehostentsize, + * following pointers and building self-contained output which can be + * cast to a LYNX_HOSTENT. - kw + * See also description of LYGetHostByName. + */ +#if defined(NSL_FORK) || defined(_WINDOWS_NSL) + +#define REHOSTENT_SIZE 128 /* not bigger than pipe buffer! */ + +typedef struct { + LYNX_HOSTENT h; + char rest[REHOSTENT_SIZE]; +} AlignedHOSTENT; + +static size_t fill_rehostent(char *rehostent, + size_t rehostentsize, + const LYNX_HOSTENT *phost) +{ + AlignedHOSTENT *data = (AlignedHOSTENT *) (void *) rehostent; + int num_addrs = 0; + int num_aliases = 0; + char **pcnt; + char *p_next_char; + char **p_next_charptr; + size_t name_len = 0; + size_t required_per_addr; + size_t curlen = sizeof(LYNX_HOSTENT); + size_t available = rehostentsize - curlen; + size_t chk_available, mem_this_alias, required_this_alias; + int i_addr, i_alias; + + if (!phost) + return 0; + required_per_addr = (size_t) phost->h_length + sizeof(char *); + + if (phost->h_addr_list) + available -= sizeof(phost->h_addr_list[0]); + if (phost->h_aliases) + available -= sizeof(phost->h_aliases[0]); + if (phost->h_name) + available--; + if (phost->h_addr_list) { + if (phost->h_addr_list[0]) { + if (available >= required_per_addr) { + num_addrs++; + available -= required_per_addr; + } + } + } + if (phost->h_name) { + name_len = strlen(phost->h_name); + if (available >= name_len) { + available -= name_len; + } else { + name_len = 0; + } + } + if (num_addrs) { + for (pcnt = phost->h_addr_list + 1; *pcnt; pcnt++) { + if (available >= required_per_addr) { + num_addrs++; + available -= required_per_addr; + } else { + break; + } + } + } + chk_available = available; + if (phost->h_aliases) { + for (pcnt = phost->h_aliases; *pcnt; pcnt++) { + required_this_alias = sizeof(phost->h_aliases[0]) + + strlen(*pcnt) + 1; + if (chk_available >= required_this_alias) { + num_aliases++; + chk_available -= required_this_alias; + } + } + } + + data->h.h_addrtype = phost->h_addrtype; + data->h.h_length = phost->h_length; + p_next_charptr = (char **) (void *) (rehostent + curlen); + p_next_char = rehostent + curlen; + if (phost->h_addr_list) + p_next_char += (size_t) (num_addrs + 1) * sizeof(phost->h_addr_list[0]); + if (phost->h_aliases) + p_next_char += (size_t) (num_aliases + 1) * sizeof(phost->h_aliases[0]); + + if (phost->h_addr_list) { + data->h.h_addr_list = p_next_charptr; + for (pcnt = phost->h_addr_list, i_addr = 0; + i_addr < num_addrs; + pcnt++, i_addr++) { + MemCpy(p_next_char, *pcnt, sizeof(phost->h_addr_list[0])); + *p_next_charptr++ = p_next_char; + p_next_char += sizeof(phost->h_addr_list[0]); + } + *p_next_charptr = NULL; + } else { + data->h.h_addr_list = NULL; + } + + if (phost->h_name) { + data->h.h_name = p_next_char; + if (name_len) { + strcpy(p_next_char, phost->h_name); + p_next_char += name_len + 1; + } else { + *p_next_char++ = '\0'; + } + } else { + data->h.h_name = NULL; + } + + if (phost->h_aliases) { + data->h.h_aliases = p_next_charptr; + for (pcnt = phost->h_aliases, i_alias = 0; + (*pcnt && i_alias < num_addrs); + pcnt++, i_alias++) { + mem_this_alias = strlen(*pcnt) + 1; + required_this_alias = sizeof(phost->h_aliases[0]) + + mem_this_alias; + if (available >= required_this_alias) { + i_alias++; + available -= required_this_alias; + strcpy(p_next_char, *pcnt); + *p_next_charptr++ = p_next_char; + p_next_char += mem_this_alias; + } + p_next_char += sizeof(phost->h_aliases[0]); + } + *p_next_charptr = NULL; + } else { + data->h.h_aliases = NULL; + } + curlen = (size_t) (p_next_char - (char *) rehostent); + return curlen; +} + +/* + * This chunk of code is used in both win32 and cygwin. + */ +#if defined(_WINDOWS_NSL) +static LYNX_HOSTENT *gbl_phost; /* Pointer to host - See netdb.h */ + +#if !(defined(__CYGWIN__) && defined(NSL_FORK)) +static int donelookup; + +static unsigned long __stdcall _fork_func(void *arg) +{ + const char *host = (const char *) arg; + static AlignedHOSTENT aligned_full_rehostent; + char *rehostent = (char *) &aligned_full_rehostent; + size_t rehostentlen = 0; + +#ifdef SH_EX + unsigned long addr; + + addr = (unsigned long) inet_addr(host); + if (addr != INADDR_NONE) + gbl_phost = gethostbyaddr((char *) &addr, sizeof(addr), AF_INET); + else + gbl_phost = gethostbyname(host); +#else + gbl_phost = gethostbyname(host); +#endif + + if (gbl_phost) { + rehostentlen = fill_rehostent(rehostent, + (size_t) REHOSTENT_SIZE, + gbl_phost); + if (rehostentlen == 0) { + gbl_phost = (LYNX_HOSTENT *) NULL; + } else { + gbl_phost = (LYNX_HOSTENT *) rehostent; + } + } + + donelookup = TRUE; + return (unsigned long) (gbl_phost); +} +#endif /* __CYGWIN__ */ +#endif /* _WINDOWS_NSL */ +#endif /* NSL_FORK */ + +#ifndef HAVE_H_ERRNO +#undef h_errno +#define h_errno my_errno +static int my_errno; + +#else /* we do HAVE_H_ERRNO: */ +#ifndef h_errno /* there may be a macro as well as the extern data */ +extern int h_errno; +#endif +#endif + +/* + * Even though it is a small amount, we cannot count on reading the whole + * struct via a pipe in one read -TD + */ +#ifdef NSL_FORK +static unsigned readit(int fd, char *buffer, size_t length) +{ + unsigned result = 0; + + while (length != 0) { + unsigned got = (unsigned) read(fd, buffer, length); + + if ((int) got > 0) { + result += got; + buffer += got; + length -= got; + } else { + break; + } + } + return result; +} +#endif /* NSL_FORK */ + +/* Resolve an internet hostname, like gethostbyname + * ------------------------------------------------ + * + * On entry, + * str points to the given host name, not numeric address, + * without colon or port number. + * + * On exit, + * returns a pointer to a LYNX_HOSTENT in static storage, + * or NULL in case of error or user interruption. + * + * The interface is intended to be exactly the same as for (Unix) + * gethostbyname(), except for the following: + * + * If NSL_FORK is not used, the result of gethostbyname is returned + * directly. Otherwise: + * All lists, addresses, and strings referred to by pointers in the + * returned struct are located, together with the returned struct + * itself, in a buffer of size REHOSTENT_SIZE. If not everything fits, + * some info is omitted, but the function is careful to still return + * a valid structure, without truncating strings; it tries to return, + * in order of decreasing priority, the first address (h_addr_list[0]), the + * official name (h_name), the additional addresses, then alias names. + * + * If NULL is returned, the reason is made available in the global + * variable lynx_nsl_status, with one of the following values: + * HT_INTERRUPTED Interrupted by user + * HT_NOT_ACCEPTABLE Hostname detected as invalid + * (also sets h_errno) + * HT_H_ERRNO_VALID An error occurred, and h_errno holds + * an appropriate value + * HT_ERROR Resolver error, reason not known + * HT_INTERNAL Internal error + */ +LYNX_HOSTENT *LYGetHostByName(char *str) +{ + char *host = str; + +#ifdef NSL_FORK + /* for transfer of result between from child to parent: */ + static AlignedHOSTENT aligned_full_rehostent; + + /* + * We could define rehosten directly as a static char + * rehostent[REHOSTENT_SIZE], but the indirect approach via the above + * struct should automatically take care of alignment requirements. + * Note that, in addition, + * - this must be static, as we shall return a pointer to it which must + * remain valid, and + * - we have to use the same rehostent in the child process as in the + * parent (its address in the parent's address space must be the same as + * in the child's, otherwise the internal pointers built by the child's + * call to fill_rehostent would be invalid when seen by the parent). -kw + */ + void *rehostent = (void *) &aligned_full_rehostent; + + /* for transfer of status from child to parent: */ + struct _statuses { + size_t rehostentlen; + int h_length; + int child_errno; /* sometimes useful to pass this on */ + int child_h_errno; + BOOL h_errno_valid; + } statuses; + + size_t rehostentlen = 0; +#endif /* NSL_FORK */ + + LYNX_HOSTENT *result_phost = NULL; + +#ifdef __DJGPP__ + _resolve_hook = ResolveYield; +#endif + + if (!str) { + CTRACE((tfp, "LYGetHostByName: Can't parse `NULL'.\n")); + lynx_nsl_status = HT_INTERNAL; + return NULL; + } + CTRACE((tfp, "LYGetHostByName: parsing `%s'.\n", str)); + + /* Could disable this if all our callers already check - kw */ + if (HTCheckForInterrupt()) { + CTRACE((tfp, "LYGetHostByName: INTERRUPTED for '%s'.\n", str)); + lynx_nsl_status = HT_INTERRUPTED; + return NULL; + } + + if (!valid_hostname(host)) { + lynx_nsl_status = HT_NOT_ACCEPTABLE; +#ifdef NO_RECOVERY +#ifdef _WINDOWS + WSASetLastError(NO_RECOVERY); +#else + h_errno = NO_RECOVERY; +#endif +#endif + return NULL; + } +#ifdef MVS /* Outstanding problem with crash in MVS gethostbyname */ + CTRACE((tfp, "LYGetHostByName: Calling gethostbyname(%s)\n", host)); +#endif /* MVS */ + + CTRACE_FLUSH(tfp); /* so child messages will not mess up parent log */ + + lynx_nsl_status = HT_INTERNAL; /* should be set to something else below */ + +#ifdef NSL_FORK + statuses.h_errno_valid = NO; + /* + * Start block for fork-based gethostbyname() with checks for interrupts. + * - Tom Zerucha (tz@execpc.com) & FM + */ + { + int got_rehostent = 0; + +#if HAVE_SIGACTION + sigset_t old_sigset; + sigset_t new_sigset; +#endif + /* + * Pipe, child pid, status buffers, start time, select() control + * variables. + */ + int fpid, waitret; + int pfd[2], selret; + unsigned readret; + +#ifdef HAVE_TYPE_UNIONWAIT + union wait waitstat; + +#else + int waitstat = 0; +#endif + time_t start_time = time((time_t *) 0); + fd_set readfds; + struct timeval one_second; + long dns_patience = 30; /* how many seconds will we wait for DNS? */ + int child_exited = 0; + + /* + * Reap any children that have terminated since last time through. + * This might include children that we killed, then waited with WNOHANG + * before they were actually ready to be reaped. (Should be max of 1 + * in this state, but the loop is safe if waitpid() is implemented + * correctly: returns 0 when children exist but none have exited; -1 + * with errno == ECHILD when no children.) -BL + */ + do { + waitret = waitpid(-1, 0, WNOHANG); + } while (waitret > 0 || (waitret == -1 && errno == EINTR)); + waitret = 0; + + IGNORE_RC(pipe(pfd)); + +#if HAVE_SIGACTION + /* + * Attempt to prevent a rare situation where the child could execute + * the Lynx signal handlers because it gets killed before it even has a + * chance to reset its handlers, resulting in bogus 'Exiting via + * interrupt' message and screen corruption or worse. + * Should that continue to be reported, for systems without + * sigprocmask(), we need to find a different solutions for those. - + * kw 19990430 + */ + sigemptyset(&new_sigset); + sigaddset(&new_sigset, SIGTERM); + sigaddset(&new_sigset, SIGINT); +#ifndef NOSIGHUP + sigaddset(&new_sigset, SIGHUP); +#endif /* NOSIGHUP */ +#ifdef SIGTSTP + sigaddset(&new_sigset, SIGTSTP); +#endif /* SIGTSTP */ +#ifdef SIGWINCH + sigaddset(&new_sigset, SIGWINCH); +#endif /* SIGWINCH */ + sigprocmask(SIG_BLOCK, &new_sigset, &old_sigset); +#endif /* HAVE_SIGACTION */ + + if ((fpid = fork()) == 0) { + LYNX_HOSTENT *phost; /* Pointer to host - See netdb.h */ + + /* + * Child - for the long call. + * + * Make sure parent can kill us at will. -BL + */ + (void) signal(SIGTERM, quench); + + /* + * Also make sure the child does not run one of the signal handlers + * that may have been installed by Lynx if one of those signals + * occurs. For example we don't want the child to remove temp + * files on ^C, let the parent deal with that. - kw + */ + (void) signal(SIGINT, quench); +#ifndef NOSIGHUP + (void) signal(SIGHUP, quench); +#endif /* NOSIGHUP */ +#ifdef SIGTSTP + if (no_suspend) + (void) signal(SIGTSTP, SIG_IGN); + else + (void) signal(SIGTSTP, SIG_DFL); +#endif /* SIGTSTP */ +#ifdef SIGWINCH + (void) signal(SIGWINCH, SIG_IGN); +#endif /* SIGWINCH */ +#ifndef __linux__ +#ifndef DOSPATH + signal(SIGBUS, SIG_DFL); +#endif /* DOSPATH */ +#endif /* !__linux__ */ + signal(SIGSEGV, SIG_DFL); + signal(SIGILL, SIG_DFL); + +#if HAVE_SIGACTION + /* Restore signal mask to whatever it was before the fork. -kw */ + sigprocmask(SIG_SETMASK, &old_sigset, NULL); +#endif /* HAVE_SIGACTION */ + + /* + * Child won't use read side. -BL + */ + close(pfd[0]); +#ifdef HAVE_H_ERRNO + /* to detect cases when it doesn't get set although it should */ + h_errno = -2; +#endif + set_errno(0); + phost = gethostbyname(host); + statuses.child_errno = errno; + statuses.child_h_errno = h_errno; +#ifdef HAVE_H_ERRNO + statuses.h_errno_valid = YES; +#endif +#ifdef MVS + CTRACE((tfp, "LYGetHostByName: gethostbyname() returned %d\n", phost)); +#endif /* MVS */ + +#ifdef DEBUG_HOSTENT_CHILD + dump_hostent("CHILD gethostbyname", phost); +#endif + if (OK_HOST(phost)) { + rehostentlen = fill_rehostent(rehostent, + (size_t) REHOSTENT_SIZE, + phost); +#ifdef DEBUG_HOSTENT_CHILD + dump_hostent("CHILD fill_rehostent", (LYNX_HOSTENT *) rehostent); +#endif + } + if (rehostentlen <= sizeof(LYNX_HOSTENT) || + !OK_HOST((LYNX_HOSTENT *) rehostent)) { + rehostentlen = 0; + statuses.h_length = 0; + } else { + statuses.h_length = ((LYNX_HOSTENT *) rehostent)->h_length; +#ifdef HAVE_H_ERRNO + if (h_errno == -2) /* success, but h_errno unchanged? */ + statuses.h_errno_valid = NO; +#endif + } + /* + * Send variables indicating status of lookup to parent. That + * includes rehostentlen, which the parent will use as the size for + * the second read (if > 0). + */ + if (!statuses.child_errno) + statuses.child_errno = errno; + statuses.rehostentlen = rehostentlen; + IGNORE_RC(write(pfd[1], &statuses, sizeof(statuses))); + + if (rehostentlen) { + /* + * Return our resulting rehostent through pipe... + */ + IGNORE_RC(write(pfd[1], rehostent, rehostentlen)); + close(pfd[1]); + _exit(0); + } else { + /* + * ... or return error as exit code. + */ + _exit(1); + } + } +#if HAVE_SIGACTION + /* + * (parent) Restore signal mask to whatever it was before the fork. - + * kw + */ + sigprocmask(SIG_SETMASK, &old_sigset, NULL); +#endif /* HAVE_SIGACTION */ + + /* + * (parent) Wait until lookup finishes, or interrupt, or cycled too + * many times (just in case) -BL + */ + + close(pfd[1]); /* parent won't use write side -BL */ + + if (fpid < 0) { /* fork failed */ + close(pfd[0]); + goto failed; + } + + while (child_exited || (long) (time((time_t *) 0) - start_time) < dns_patience) { + + FD_ZERO(&readfds); + /* + * This allows us to abort immediately, not after 1-second timeout, + * when user hits abort key. Can't do this when using SLANG (or at + * least I don't know how), so SLANG users must live with up-to-1s + * timeout. -BL + * + * Whoops -- we need to make sure stdin is actually selectable! + * /dev/null isn't, on some systems, which makes some useful Lynx + * invocations fail. -BL + */ + { + int kbd_fd = LYConsoleInputFD(TRUE); + + if (kbd_fd != INVSOC) { + FD_SET(kbd_fd, &readfds); + } + } + + one_second.tv_sec = 1; + one_second.tv_usec = 0; + FD_SET(pfd[0], &readfds); + + /* + * Return when data received, interrupted, or failed. If nothing + * is waiting, we sleep for 1 second in select(), to be nice to the + * system. -BL + */ +#ifdef SOCKS + if (socks_flag) + selret = Rselect(pfd[0] + 1, &readfds, NULL, NULL, &one_second); + else +#endif /* SOCKS */ + selret = select(pfd[0] + 1, &readfds, NULL, NULL, &one_second); + + if ((selret > 0) && FD_ISSET(pfd[0], &readfds)) { + /* + * First get status, including length of address. -BL, kw + */ + readret = readit(pfd[0], (char *) &statuses, sizeof(statuses)); + if (readret == sizeof(statuses)) { + h_errno = statuses.child_h_errno; + set_errno(statuses.child_errno); +#ifdef HAVE_H_ERRNO + if (statuses.h_errno_valid) { + lynx_nsl_status = HT_H_ERRNO_VALID; + /* + * If something went wrong in the child process other + * than normal lookup errors, and it appears that we + * have enough info to know what went wrong, generate + * diagnostic output. ENOMEM observed on linux in + * processes constrained with ulimit. It would be too + * unkind to abort the session, access to local files + * or through a proxy may still work. - kw + */ + if ( +#ifdef NETDB_INTERNAL /* linux glibc: defined in netdb.h */ + (errno && h_errno == NETDB_INTERNAL) || +#endif + (errno == ENOMEM && + statuses.rehostentlen == 0 && + /* should probably be NETDB_INTERNAL if child + memory exhausted, but we may find that + h_errno remains unchanged. - kw */ + h_errno == -2)) { +#ifndef MULTINET + HTInetStatus("CHILD gethostbyname"); +#endif + HTAlert(LYStrerror(statuses.child_errno)); + if (errno == ENOMEM) { + /* + * Not much point in continuing, right? Fake a + * 'z', should shorten pointless guessing + * cycle. - kw + */ + LYFakeZap(YES); + } + } + } +#endif /* HAVE_H_ERRNO */ + if (statuses.rehostentlen > sizeof(LYNX_HOSTENT)) { + /* + * Then get the full reorganized hostent. -BL, kw + */ + readret = readit(pfd[0], rehostent, statuses.rehostentlen); +#ifdef DEBUG_HOSTENT + dump_hostent("Read from pipe", (LYNX_HOSTENT *) rehostent); +#endif + if (readret == statuses.rehostentlen) { + got_rehostent = 1; + result_phost = (LYNX_HOSTENT *) rehostent; + lynx_nsl_status = HT_OK; + } else if (!statuses.h_errno_valid) { + lynx_nsl_status = HT_INTERNAL; + } + } + } else { + lynx_nsl_status = HT_ERROR; + } + /* + * Make sure child is cleaned up. -BL + */ + if (!child_exited) + waitret = waitpid(fpid, &waitstat, WNOHANG); + if (!WIFEXITED(waitstat) && !WIFSIGNALED(waitstat)) { + kill(fpid, SIGTERM); + waitret = waitpid(fpid, &waitstat, WNOHANG); + } + break; + } + + /* + * Clean up if child exited before & no data received. -BL + */ + if (child_exited) { + waitret = waitpid(fpid, &waitstat, WNOHANG); + break; + } + /* + * If child exited, loop once more looking for data. -BL + */ + if ((waitret = waitpid(fpid, &waitstat, WNOHANG)) > 0) { + /* + * Data will be arriving right now, so make sure we don't + * short-circuit out for too many loops, and skip the interrupt + * check. -BL + */ + child_exited = 1; + continue; + } + + /* + * Abort if interrupt key pressed. + */ + if (HTCheckForInterrupt()) { + CTRACE((tfp, "LYGetHostByName: INTERRUPTED gethostbyname.\n")); + kill(fpid, SIGTERM); + waitpid(fpid, NULL, WNOHANG); + close(pfd[0]); + lynx_nsl_status = HT_INTERRUPTED; + return NULL; + } + } + close(pfd[0]); + if (waitret <= 0) { + kill(fpid, SIGTERM); + waitret = waitpid(fpid, &waitstat, WNOHANG); + } + if (waitret > 0) { + if (WIFEXITED(waitstat)) { + CTRACE((tfp, + "LYGetHostByName: NSL_FORK child %d exited, status 0x%x.\n", + (int) waitret, WEXITSTATUS(waitstat))); + } else if (WIFSIGNALED(waitstat)) { + CTRACE((tfp, + "LYGetHostByName: NSL_FORK child %d got signal, status 0x%x!\n", + (int) waitret, WTERMSIG(waitstat))); +#ifdef WCOREDUMP + if (WCOREDUMP(waitstat)) { + CTRACE((tfp, + "LYGetHostByName: NSL_FORK child %d dumped core!\n", + (int) waitret)); + } +#endif /* WCOREDUMP */ + } else if (WIFSTOPPED(waitstat)) { + CTRACE((tfp, + "LYGetHostByName: NSL_FORK child %d is stopped, status 0x%x!\n", + (int) waitret, WSTOPSIG(waitstat))); + } + } + if (!got_rehostent) { + goto failed; + } + } +#else /* Not NSL_FORK: */ + +#ifdef _WINDOWS_NSL + { + HANDLE hThread; + DWORD dwThreadID; + +#ifndef __CYGWIN__ + if (!system_is_NT) { /* for Windows9x */ + unsigned long t; + + t = (unsigned long) inet_addr(host); + if (t != INADDR_NONE) + gbl_phost = gethostbyaddr((char *) &t, sizeof(t), AF_INET); + else + gbl_phost = gethostbyname(host); + } else { /* for Windows NT */ +#endif /* !__CYGWIN__ */ + gbl_phost = (LYNX_HOSTENT *) NULL; + donelookup = FALSE; + +#if defined(__CYGWIN__) || defined(USE_WINSOCK2_H) + SetLastError(WSAHOST_NOT_FOUND); +#else + WSASetLastError(WSAHOST_NOT_FOUND); +#endif + + hThread = CreateThread(NULL, 4096UL, _fork_func, host, 0UL, + &dwThreadID); + if (!hThread) + MessageBox(NULL, "CreateThread", + "CreateThread Failed", 0L); + + while (!donelookup) { + if (HTCheckForInterrupt()) { + /* Note that host is a character array and is not freed */ + /* to avoid possible subthread problems: */ + if (!CloseHandle(hThread)) { + MessageBox((void *) NULL, + "CloseHandle", "CloseHandle Failed", 0L); + } + lynx_nsl_status = HT_INTERRUPTED; + return NULL; + } + } +#ifndef __CYGWIN__ + } +#endif /* !__CYGWIN__ */ + if (gbl_phost) { + lynx_nsl_status = HT_OK; + result_phost = gbl_phost; + } else { + lynx_nsl_status = HT_ERROR; + goto failed; + } + } + +#else /* !NSL_FORK, !_WINDOWS_NSL: */ + { + LYNX_HOSTENT *phost; + + phost = gethostbyname(host); /* See netdb.h */ +#ifdef MVS + CTRACE((tfp, "LYGetHostByName: gethostbyname() returned %d\n", phost)); +#endif /* MVS */ + if (phost) { + lynx_nsl_status = HT_OK; + result_phost = phost; + } else { + lynx_nsl_status = HT_H_ERRNO_VALID; + goto failed; + } + } +#endif /* !NSL_FORK, !_WINDOWS_NSL */ +#endif /* !NSL_FORK */ + +#ifdef DEBUG_HOSTENT + dump_hostent("End of LYGetHostByName", result_phost); + CTRACE((tfp, "LYGetHostByName: Resolved name to a hostent.\n")); +#endif + + return result_phost; /* OK */ + + failed: + CTRACE((tfp, "LYGetHostByName: Can't find internet node name `%s'.\n", + host)); + return NULL; +} + +/* Parse a network node address and port + * ------------------------------------- + * + * On entry, + * str points to a string with a node name or number, + * with optional trailing colon and port number. + * soc_in points to the binary internet or decnet address field. + * + * On exit, + * *soc_in is filled in. If no port is specified in str, that + * field is left unchanged in *soc_in. + */ +#ifndef INET6 +static int HTParseInet(SockA * soc_in, const char *str) +{ + char *port; + int dotcount_ip = 0; /* for dotted decimal IP addr */ + char *strptr; + char *host = NULL; + + if (!str) { + CTRACE((tfp, "HTParseInet: Can't parse `NULL'.\n")); + return -1; + } + CTRACE((tfp, "HTParseInet: parsing `%s'.\n", str)); + if (HTCheckForInterrupt()) { + CTRACE((tfp, "HTParseInet: INTERRUPTED for '%s'.\n", str)); + return -1; + } + StrAllocCopy(host, str); /* Make a copy we can mutilate */ + /* + * Parse port number if present. + */ + if ((port = strchr(host, ':')) != NULL) { + *port++ = 0; /* Chop off port */ + strptr = port; + if (port[0] >= '0' && port[0] <= '9') { +#ifdef UNIX + soc_in->sin_port = (PortNumber) htons(strtol(port, &strptr, 10)); +#else /* VMS: */ +#ifdef DECNET + soc_in->sdn_objnum = (unsigned char) (strtol(port, &strptr, 10)); +#else + soc_in->sin_port = htons((PortNumber) strtol(port, &strptr, 10)); +#endif /* Decnet */ +#endif /* Unix vs. VMS */ +#ifdef SUPPRESS /* 1. crashes!?!. 2. URL syntax has number not name */ + } else { + struct servent *serv = getservbyname(port, (char *) 0); + + if (serv) { + soc_in->sin_port = serv->s_port; + } else { + CTRACE((tfp, "TCP: Unknown service %s\n", port)); + } +#endif /* SUPPRESS */ + } + if (strptr && *strptr != '\0') { + FREE(host); + HTAlwaysAlert(NULL, gettext("Address has invalid port")); + return -1; + } + } +#ifdef DECNET + /* + * Read Decnet node name. @@ Should know about DECnet addresses, but it's + * probably worth waiting until the Phase transition from IV to V. + */ + soc_in->sdn_nam.n_len = min(DN_MAXNAML, strlen(host)); /* <=6 in phase 4 */ + StrNCpy(soc_in->sdn_nam.n_name, host, soc_in->sdn_nam.n_len + 1); + CTRACE((tfp, + "DECnet: Parsed address as object number %d on host %.6s...\n", + soc_in->sdn_objnum, host)); +#else /* parse Internet host: */ + + if (*host >= '0' && *host <= '9') { /* Test for numeric node address: */ + strptr = host; + while (*strptr) { + if (*strptr == '.') { + dotcount_ip++; + } else if (!isdigit(UCH(*strptr))) { + break; + } + strptr++; + } + if (*strptr) { /* found non-numeric, assume domain name */ + dotcount_ip = 0; + } + } + + /* + * Parse host number if present. + */ + if (dotcount_ip == 3) /* Numeric node address: */ + { +#ifdef DGUX_OLD + soc_in->sin_addr.s_addr = inet_addr(host).s_addr; /* See arpa/inet.h */ +#else +#ifdef GUSI + soc_in->sin_addr = inet_addr(host); /* See netinet/in.h */ +#else +#ifdef HAVE_INET_ATON + if (!inet_aton(host, &(soc_in->sin_addr))) { + CTRACE((tfp, "inet_aton(%s) returns error\n", host)); + FREE(host); + return -1; + } +#else + soc_in->sin_addr.s_addr = inet_addr(host); /* See arpa/inet.h */ +#endif /* HAVE_INET_ATON */ +#endif /* GUSI */ +#endif /* DGUX_OLD */ + FREE(host); + } else { /* Alphanumeric node name: */ + +#ifdef MVS /* Outstanding problem with crash in MVS gethostbyname */ + CTRACE((tfp, "HTParseInet: Calling LYGetHostByName(%s)\n", host)); +#endif /* MVS */ + +#ifdef _WINDOWS_NSL + gbl_phost = LYGetHostByName(host); /* See above */ + if (!gbl_phost) + goto failed; + MemCpy((void *) &soc_in->sin_addr, gbl_phost->h_addr_list[0], gbl_phost->h_length); +#else /* !_WINDOWS_NSL */ + { + LYNX_HOSTENT *phost; + + phost = LYGetHostByName(host); /* See above */ + + if (!phost) + goto failed; + if (!phost) + goto failed; + if (phost->h_length != sizeof soc_in->sin_addr) { + HTAlwaysAlert(host, gettext("Address length looks invalid")); + } + MemCpy((void *) &soc_in->sin_addr, phost->h_addr_list[0], phost->h_length); + } +#endif /* _WINDOWS_NSL */ + + FREE(host); + } /* Alphanumeric node name */ + + CTRACE((tfp, + "HTParseInet: Parsed address as port %d, IP address %d.%d.%d.%d\n", + (int) ntohs(soc_in->sin_port), + (int) *((unsigned char *) (&soc_in->sin_addr) + 0), + (int) *((unsigned char *) (&soc_in->sin_addr) + 1), + (int) *((unsigned char *) (&soc_in->sin_addr) + 2), + (int) *((unsigned char *) (&soc_in->sin_addr) + 3))); +#endif /* Internet vs. Decnet */ + + return 0; /* OK */ + + failed: + CTRACE((tfp, "HTParseInet: Can't find internet node name `%s'.\n", + host)); + FREE(host); + switch (lynx_nsl_status) { + case HT_NOT_ACCEPTABLE: + case HT_INTERRUPTED: + return lynx_nsl_status; + default: + return -1; + } +} +#endif /* !INET6 */ + +#ifdef INET6 +static LYNX_ADDRINFO *HTGetAddrInfo(const char *str, + const int defport) +{ + LYNX_ADDRINFO hints, *res; + int error; + char *p; + char *s = NULL; + char *host, *port; + char pbuf[80]; + + StrAllocCopy(s, str); + + if (s[0] == '[' && (p = strchr(s, ']')) != NULL) { + *p++ = '\0'; + host = s + 1; + } else { + p = s; + host = &s[0]; + } + port = strrchr(p, ':'); + if (port) { + *port++ = '\0'; + } else { + sprintf(pbuf, "%d", defport); + port = pbuf; + } + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = PF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + error = getaddrinfo(host, port, &hints, &res); + if (error || !res) { + CTRACE((tfp, "HTGetAddrInfo: getaddrinfo(%s, %s): %s\n", host, port, + gai_strerror(error))); + res = NULL; + } + + free(s); + return res; +} +#endif /* INET6 */ + +#ifdef LY_FIND_LEAKS +/* Free our name for the host on which we are - FM + * ------------------------------------------- + * + */ +static void free_HTTCP_hostname(void) +{ + FREE(hostname); +} +#endif /* LY_FIND_LEAKS */ + +/* Derive the name of the host on which we are + * ------------------------------------------- + * + */ +static void get_host_details(void) +{ + char name[MAXHOSTNAMELEN + 1]; /* The name of this host */ + +#ifdef UCX + char *domain_name; /* The name of this host domain */ +#endif /* UCX */ +#ifdef NEED_HOST_ADDRESS /* no -- needs name server! */ +#ifdef INET6 + LYNX_ADDRINFO hints, *res; + int error; + +#else + LYNX_HOSTENT *phost; /* Pointer to host -- See netdb.h */ +#endif /* INET6 */ +#endif /* NEED_HOST_ADDRESS */ + size_t namelength = sizeof(name); + + if (hostname) + return; /* Already done */ + gethostname(name, namelength); /* Without domain */ + StrAllocCopy(hostname, name); +#ifdef LY_FIND_LEAKS + atexit(free_HTTCP_hostname); +#endif +#ifdef UCX + /* + * UCX doesn't give the complete domain name. Get rest from UCX$BIND_DOM + * logical. + */ + if (strchr(hostname, '.') == NULL) { /* Not full address */ + domain_name = LYGetEnv("UCX$BIND_DOMAIN"); + if (domain_name == NULL) + domain_name = LYGetEnv("TCPIP$BIND_DOMAIN"); + if (domain_name != NULL) { + StrAllocCat(hostname, "."); + StrAllocCat(hostname, domain_name); + } + } +#endif /* UCX */ + CTRACE((tfp, "TCP: Local host name is %s\n", hostname)); + +#ifndef DECNET /* Decnet ain't got no damn name server 8#OO */ +#ifdef NEED_HOST_ADDRESS /* no -- needs name server! */ +#ifdef INET6 + memset(&hints, 0, sizeof(hints)); + hints.ai_family = PF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags = AI_CANONNAME; + error = getaddrinfo(name, NULL, &hints, &res); + if (error || !res || !res->ai_canonname) { + CTRACE((tfp, "TCP: %s: `%s'\n", gai_strerror(error), name)); + if (res) + freeaddrinfo(res); + return; /* Fail! */ + } + StrAllocCopy(hostname, res->ai_canonname); + MemCpy(&HTHostAddress, res->ai_addr, res->ai_addrlen); + freeaddrinfo(res); +#else + phost = gethostbyname(name); /* See netdb.h */ + if (!OK_HOST(phost)) { + CTRACE((tfp, + "TCP: Can't find my own internet node address for `%s'!!\n", + name)); + return; /* Fail! */ + } + StrAllocCopy(hostname, phost->h_name); + MemCpy(&HTHostAddress, &phost->h_addr_list[0], phost->h_length); +#endif /* INET6 */ + CTRACE((tfp, " Name server says that I am `%s' = %s\n", + hostname, HTInetString(&HTHostAddress))); +#endif /* NEED_HOST_ADDRESS */ + +#endif /* !DECNET */ +} + +const char *HTHostName(void) +{ + get_host_details(); + return hostname; +} + +#ifdef _WINDOWS +#define SET_EINTR WSASetLastError(EINTR) +#else +#define SET_EINTR SOCKET_ERRNO = EINTR +#endif + +static BOOL HTWasInterrupted(int *status) +{ + BOOL result = FALSE; + + if (HTCheckForInterrupt()) { + result = TRUE; + *status = HT_INTERRUPTED; + SET_EINTR; + } + return result; +} + +#define TRIES_PER_SECOND 10 + +/* + * Set the select-timeout to 0.1 seconds. + */ +static void set_timeout(struct timeval *timeoutp) +{ + timeoutp->tv_sec = 0; + timeoutp->tv_usec = 100000; +} + +#ifndef MULTINET /* SOCKET_ERRNO != errno ? */ +#if !defined(UCX) || !defined(VAXC) /* errno not modifiable ? */ +#define SOCKET_DEBUG_TRACE /* show errno status after some system calls */ +#endif /* UCX && VAXC */ +#endif /* MULTINET */ +/* + * Interruptible connect as implemented for Mosaic by Marc Andreesen + * and hacked in for Lynx years ago by Lou Montulli, and further + * modified over the years by numerous Lynx lovers. - FM + */ +int HTDoConnect(const char *url, + const char *protocol, + int default_port, + int *s) +{ + int status = 0; + char *line = NULL; + char *p1 = NULL; + char *at_sign = NULL; + char *host = NULL; + +#ifdef INET6 + LYNX_ADDRINFO *res = 0, *res0 = 0; + +#else + struct sockaddr_in soc_address; + struct sockaddr_in *soc_in = &soc_address; + + /* + * Set up defaults. + */ + memset(soc_in, 0, sizeof(*soc_in)); + soc_in->sin_family = AF_INET; + soc_in->sin_port = htons((PortNumber) default_port); +#endif /* INET6 */ + + /* + * Get node name and optional port number. + */ + p1 = HTParse(url, "", PARSE_HOST); + if ((at_sign = strchr(p1, '@')) != NULL) { + /* + * If there's an @ then use the stuff after it as a hostname. + */ + StrAllocCopy(host, (at_sign + 1)); + } else { + StrAllocCopy(host, p1); + } + FREE(p1); + + HTSprintf0(&line, "%s%s", WWW_FIND_MESSAGE, host); + _HTProgress(line); +#ifdef INET6 + /* HTParseInet() is useless! */ + res0 = HTGetAddrInfo(host, default_port); + if (res0 == NULL) { + HTSprintf0(&line, gettext("Unable to locate remote host %s."), host); + _HTProgress(line); + FREE(host); + FREE(line); + return HT_NO_DATA; + } +#else + status = HTParseInet(soc_in, host); + if (status) { + if (status != HT_INTERRUPTED) { + if (status == HT_NOT_ACCEPTABLE) { + /* Not HTProgress, so warning won't be overwritten immediately; + * but not HTAlert, because typically there will be other + * alerts from the callers. - kw + */ + HTUserMsg2(gettext("Invalid hostname %s"), host); + } else { + HTSprintf0(&line, + gettext("Unable to locate remote host %s."), host); + _HTProgress(line); + } + status = HT_NO_DATA; + } + FREE(host); + FREE(line); + return status; + } +#endif /* INET6 */ + + HTSprintf0(&line, gettext("Making %s connection to %s"), protocol, host); + _HTProgress(line); + FREE(host); + FREE(line); + + /* + * Now, let's get a socket set up from the server for the data. + */ +#ifndef INET6 + *s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (*s == -1) { + HTAlert(gettext("socket failed.")); + return HT_NO_DATA; + } +#else + for (res = res0; res; res = res->ai_next) { + *s = socket(res->ai_family, res->ai_socktype, res->ai_protocol); + if (*s == -1) { + char hostbuf[1024], portbuf[1024]; + + getnameinfo(res->ai_addr, res->ai_addrlen, + hostbuf, (socklen_t) sizeof(hostbuf), + portbuf, (socklen_t) sizeof(portbuf), + NI_NUMERICHOST | NI_NUMERICSERV); + HTSprintf0(&line, + gettext("socket failed: family %d addr %s port %s."), + res->ai_family, hostbuf, portbuf); + _HTProgress(line); + FREE(line); + continue; + } +#endif /* INET6 */ + +#if !defined(DOSPATH) || defined(__DJGPP__) +#if !defined(NO_IOCTL) || defined(USE_FCNTL) + /* + * Make the socket non-blocking, so the connect can be canceled. This + * means that when we issue the connect we should NOT have to wait for + * the accept on the other end. + */ + { +#ifdef USE_FCNTL + int ret = fcntl(*s, F_SETFL, O_NONBLOCK); + +#else + int val = 1; + int ret = IOCTL(*s, FIONBIO, &val); +#endif /* USE_FCNTL */ + if (ret == -1) + _HTProgress(gettext("Could not make connection non-blocking.")); + } +#endif /* !NO_IOCTL || USE_FCNTL */ +#endif /* !DOSPATH || __DJGPP__ */ + + /* + * Issue the connect. Since the server can't do an instantaneous + * accept and we are non-blocking, this will almost certainly return a + * negative status. + */ +#ifdef SOCKS + if (socks_flag) { +#ifdef INET6 + status = Rconnect(*s, res->ai_addr, res->ai_addrlen); +#else + status = Rconnect(*s, (struct sockaddr *) &soc_address, + sizeof(soc_address)); +#ifndef SHORTENED_RBIND + socks_bind_remoteAddr = soc_address.sin_addr.s_addr; +#endif +#endif /* INET6 */ + } else +#endif /* SOCKS */ +#ifdef INET6 + status = connect(*s, res->ai_addr, res->ai_addrlen); +#else + status = connect(*s, (struct sockaddr *) &soc_address, sizeof(soc_address)); +#endif /* INET6 */ + + /* + * According to the Sun man page for connect: + * EINPROGRESS The socket is non-blocking and the con- + * nection cannot be completed immediately. + * It is possible to select(2) for comple- + * tion by selecting the socket for writ- + * ing. + * According to the Motorola SVR4 man page for connect: + * EAGAIN The socket is non-blocking and the con- + * nection cannot be completed immediately. + * It is possible to select for completion + * by selecting the socket for writing. + * However, this is only possible if the + * socket STREAMS module is the topmost + * module on the protocol stack with a + * write service procedure. This will be + * the normal case. + */ + if ((status < 0) && + (SOCKET_ERRNO == EINPROGRESS +#ifdef EAGAIN + || SOCKET_ERRNO == EAGAIN +#endif + )) { + struct timeval select_timeout; + int ret; + int tries = 0; + +#ifdef SOCKET_DEBUG_TRACE + HTInetStatus("this socket's first connect"); +#endif /* SOCKET_DEBUG_TRACE */ + ret = 0; + while (ret <= 0) { + fd_set writefds; + + /* + * Protect against an infinite loop. + */ + if ((tries++ / TRIES_PER_SECOND) >= connect_timeout) { + HTAlert(gettext("Connection failed (too many retries).")); +#ifdef INET6 + FREE(line); + if (res0) + freeaddrinfo(res0); +#endif /* INET6 */ + return HT_NO_DATA; + } + set_timeout(&select_timeout); + FD_ZERO(&writefds); + FD_SET((unsigned) *s, &writefds); +#ifdef SOCKS + if (socks_flag) + ret = Rselect(*s + 1, NULL, + &writefds, NULL, &select_timeout); + else +#endif /* SOCKS */ + ret = select(*s + 1, + NULL, + &writefds, + NULL, + &select_timeout); + +#ifdef SOCKET_DEBUG_TRACE + if (tries == 1) { + HTInetStatus("this socket's first select"); + } +#endif /* SOCKET_DEBUG_TRACE */ + /* + * If we suspend, then it is possible that select will be + * interrupted. Allow for this possibility. - JED + */ + if ((ret == -1) && (errno == EINTR)) + continue; + +#ifdef SOCKET_DEBUG_TRACE + if (ret < 0) { + HTInetStatus("failed select"); + } +#endif /* SOCKET_DEBUG_TRACE */ + /* + * Again according to the Sun and Motorola man pages for + * connect: + * EALREADY The socket is non-blocking and a previ- + * ous connection attempt has not yet been + * completed. + * Thus if the SOCKET_ERRNO is NOT EALREADY we have a real + * error, and should break out here and return that error. + * Otherwise if it is EALREADY keep on trying to complete the + * connection. + */ + if ((ret < 0) && (SOCKET_ERRNO != EALREADY)) { + status = ret; + break; + } else if (ret > 0) { + /* + * Extra check here for connection success, if we try to + * connect again, and get EISCONN, it means we have a + * successful connection. But don't check with SOCKS. + */ +#ifdef SOCKS + if (socks_flag) { + status = 0; + } else { +#endif /* SOCKS */ +#ifdef INET6 + status = connect(*s, res->ai_addr, res->ai_addrlen); +#else + status = connect(*s, (struct sockaddr *) &soc_address, + sizeof(soc_address)); +#endif /* INET6 */ +#ifdef UCX + /* + * A UCX feature: Instead of returning EISCONN UCX + * returns EADDRINUSE. Test for this status also. + */ + if ((status < 0) && ((SOCKET_ERRNO == EISCONN) || + (SOCKET_ERRNO == EADDRINUSE))) +#else + if ((status < 0) && (SOCKET_ERRNO == EISCONN)) +#endif /* UCX */ + { + status = 0; + } + + if (status && (SOCKET_ERRNO == EALREADY)) /* new stuff LJM */ + ret = 0; /* keep going */ + else { +#ifdef SOCKET_DEBUG_TRACE + if (status < 0) { + HTInetStatus("confirm-ready connect"); + } +#endif /* SOCKET_DEBUG_TRACE */ + break; + } +#ifdef SOCKS + } +#endif /* SOCKS */ + } +#ifdef SOCKS + else if (!socks_flag) +#else + else +#endif /* SOCKS */ + { + /* + * The select says we aren't ready yet. Try to connect + * again to make sure. If we don't get EALREADY or + * EISCONN, something has gone wrong. Break out and report + * it. + * + * For some reason, SVR4 returns EAGAIN here instead of + * EALREADY, even though the man page says it should be + * EALREADY. + * + * For some reason, UCX pre 3 apparently returns errno = + * 18242 instead of EALREADY or EISCONN. + */ +#ifdef INET6 + status = connect(*s, res->ai_addr, res->ai_addrlen); +#else + status = connect(*s, (struct sockaddr *) &soc_address, + sizeof(soc_address)); +#endif /* INET6 */ + if ((status < 0) && + (SOCKET_ERRNO != EALREADY +#ifdef EAGAIN + && SOCKET_ERRNO != EAGAIN +#endif + ) && +#ifdef UCX + (SOCKET_ERRNO != 18242) && +#endif /* UCX */ + (SOCKET_ERRNO != EISCONN)) { +#ifdef SOCKET_DEBUG_TRACE + HTInetStatus("confirm-not-ready connect"); +#endif /* SOCKET_DEBUG_TRACE */ + break; + } + } + if (HTWasInterrupted(&status)) { + CTRACE((tfp, "*** INTERRUPTED in middle of connect.\n")); + break; + } + } + } +#ifdef SOCKET_DEBUG_TRACE + else if (status < 0) { + HTInetStatus("this socket's first and only connect"); + } +#endif /* SOCKET_DEBUG_TRACE */ +#ifdef INET6 + if (status < 0) { + NETCLOSE(*s); + *s = -1; + continue; + } + break; + } +#endif /* INET6 */ + +#ifdef INET6 + if (*s < 0) +#else + if (status < 0) +#endif /* INET6 */ + { + /* + * The connect attempt failed or was interrupted, so close up the + * socket. + */ + NETCLOSE(*s); + } +#if !defined(DOSPATH) || defined(__DJGPP__) +#if !defined(NO_IOCTL) || defined(USE_FCNTL) + else { + /* + * Make the socket blocking again on good connect. + */ +#ifdef USE_FCNTL + int ret = fcntl(*s, F_SETFL, 0); + +#else + int val = 0; + int ret = IOCTL(*s, FIONBIO, &val); +#endif /* USE_FCNTL */ + if (ret == -1) + _HTProgress(gettext("Could not restore socket to blocking.")); + } +#endif /* !NO_IOCTL || USE_FCNTL */ +#endif /* !DOSPATH || __DJGPP__ */ + +#ifdef INET6 + FREE(line); + if (res0) + freeaddrinfo(res0); +#endif /* INET6 */ + return status; +} + +/* + * This is so interruptible reads can be implemented cleanly. + */ +int HTDoRead(int fildes, + void *buf, + unsigned nbyte) +{ + int result; + BOOL ready; + +#if !defined(NO_IOCTL) + int ret; + fd_set readfds; + struct timeval select_timeout; + int tries = 0; + +#ifdef USE_READPROGRESS + int otries = 0; + time_t otime = time((time_t *) 0); + time_t start = otime; +#endif +#endif /* !NO_IOCTL */ + +#if defined(UNIX) && !defined(__BEOS__) + if (fildes == 0) { + /* + * 0 can be a valid socket fd, but if it's a tty something must have + * gone wrong. - kw + */ + if (isatty(fildes)) { + CTRACE((tfp, "HTDoRead - refusing to read fd 0 which is a tty!\n")); + return -1; + } + } else +#endif + if (fildes <= 0) { + CTRACE((tfp, "HTDoRead - no file descriptor!\n")); + return -1; + } + + if (HTWasInterrupted(&result)) { + CTRACE((tfp, "HTDoRead - interrupted before starting!\n")); + return (result); + } +#if defined(NO_IOCTL) + ready = TRUE; +#else + ready = FALSE; + while (!ready) { + /* + * Protect against an infinite loop. + */ + if ((tries++ / TRIES_PER_SECOND) >= reading_timeout) { + HTAlert(gettext("Socket read failed (too many tries).")); + SET_EINTR; + result = HT_INTERRUPTED; + break; + } +#ifdef USE_READPROGRESS + if (tries - otries > TRIES_PER_SECOND) { + time_t t = time((time_t *) 0); + + otries = tries; + if (t - otime >= 5) { + otime = t; + HTReadProgress((off_t) (-1), (off_t) 0); /* Put "stalled" message */ + } + } +#endif + + /* + * If we suspend, then it is possible that select will be interrupted. + * Allow for this possibility. - JED + */ + do { + set_timeout(&select_timeout); + FD_ZERO(&readfds); + FD_SET((unsigned) fildes, &readfds); +#ifdef SOCKS + if (socks_flag) + ret = Rselect(fildes + 1, + &readfds, NULL, NULL, &select_timeout); + else +#endif /* SOCKS */ + ret = select(fildes + 1, + &readfds, NULL, NULL, &select_timeout); + } while ((ret == -1) && (errno == EINTR)); + + if (ret < 0) { + result = -1; + break; + } else if (ret > 0) { + ready = TRUE; + } else if (HTWasInterrupted(&result)) { + break; + } + } +#endif /* !NO_IOCTL */ + + if (ready) { +#if defined(UCX) && defined(VAXC) + /* + * VAXC and UCX problem only. + */ + errno = vaxc$errno = 0; + result = SOCKET_READ(fildes, buf, nbyte); + CTRACE((tfp, + "Read - result,errno,vaxc$errno: %d %d %d\n", result, errno, vaxc$errno)); + if ((result <= 0) && TRACE) + perror("HTTCP.C:HTDoRead:read"); /* RJF */ + /* + * An errno value of EPIPE and result < 0 indicates end-of-file on VAXC. + */ + if ((result <= 0) && (errno == EPIPE)) { + result = 0; + set_errno(0); + } +#else +#ifdef UNIX + while ((result = (int) SOCKET_READ(fildes, buf, nbyte)) == -1) { + if (errno == EINTR) + continue; +#ifdef ERESTARTSYS + if (errno == ERESTARTSYS) + continue; +#endif /* ERESTARTSYS */ + HTInetStatus("read"); + break; + } +#else /* UNIX */ + result = SOCKET_READ(fildes, buf, nbyte); +#endif /* !UNIX */ +#endif /* UCX && VAXC */ + } +#ifdef USE_READPROGRESS + CTRACE2(TRACE_TIMING, (tfp, "...HTDoRead returns %d (%" PRI_time_t + " seconds)\n", + result, CAST_time_t (time((time_t *)0) - start))); +#endif + return result; +} + +#ifdef SVR4_BSDSELECT +/* + * This is a fix for the difference between BSD's select() and + * SVR4's select(). SVR4's select() can never return a value larger + * than the total number of file descriptors being checked. So, if + * you select for read and write on one file descriptor, and both + * are true, SVR4 select() will only return 1. BSD select in the + * same situation will return 2. + * + * Additionally, BSD select() on timing out, will zero the masks, + * while SVR4 does not. This is fixed here as well. + * + * Set your tabstops to 4 characters to have this code nicely formatted. + * + * Jerry Whelan, guru@bradley.edu, June 12th, 1993 + */ +#ifdef select +#undef select +#endif /* select */ + +#ifdef SOCKS +#ifdef Rselect +#undef Rselect +#endif /* Rselect */ +#endif /* SOCKS */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/select.h> + +int BSDselect(int nfds, + fd_set * readfds, + fd_set * writefds, + fd_set * exceptfds, + struct timeval *select_timeout) +{ + int rval, i; + +#ifdef SOCKS + if (socks_flag) + rval = Rselect(nfds, readfds, writefds, exceptfds, select_timeout); + else +#endif /* SOCKS */ + rval = select(nfds, readfds, writefds, exceptfds, select_timeout); + + switch (rval) { + case -1: + return (rval); + + case 0: + if (readfds != NULL) + FD_ZERO(readfds); + if (writefds != NULL) + FD_ZERO(writefds); + if (exceptfds != NULL) + FD_ZERO(exceptfds); + return (rval); + + default: + for (i = 0, rval = 0; i < nfds; i++) { + if ((readfds != NULL) && FD_ISSET(i, readfds)) + rval++; + if ((writefds != NULL) && FD_ISSET(i, writefds)) + rval++; + if ((exceptfds != NULL) && FD_ISSET(i, exceptfds)) + rval++; + + } + return (rval); + } +/* Should never get here */ +} +#endif /* SVR4_BSDSELECT */ diff --git a/WWW/Library/Implementation/HTTCP.h b/WWW/Library/Implementation/HTTCP.h new file mode 100644 index 00000000..abbc956f --- /dev/null +++ b/WWW/Library/Implementation/HTTCP.h @@ -0,0 +1,110 @@ +/* /Net/dxcern/userd/timbl/hypertext/WWW/Library/src/HTTCP.html + GENERIC TCP/IP COMMUNICATION + + This module has the common code for handling TCP/IP connections etc. + + */ +#ifndef HTTCP_H +#define HTTCP_H + +#ifndef HTUTILS_H +#include <HTUtils.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif +/* Produce a string for an internet address + * --------------------------------------- + * + * On exit: + * returns a pointer to a static string which must be copied if + * it is to be kept. + */ +#ifdef INET6 + extern const char *HTInetString(SockA * mysin); + +#else + extern const char *HTInetString(struct sockaddr_in *mysin); +#endif /* INET6 */ + +/* Encode INET status (as in sys/errno.h) inet_status() + * ------------------ + * + * On entry: + * where gives a description of what caused the error + * global errno gives the error number in the unix way. + * + * On return: + * returns a negative status in the unix way. + */ + extern int HTInetStatus(const char *where); + +/* Publicly accessible variables +*/ +/* extern struct sockaddr_in HTHostAddress; */ + /* The internet address of the host */ + /* Valid after call to HTHostName() */ + +/* Parse a cardinal value parse_cardinal() + * ---------------------- + * + * On entry: + * *pp points to first character to be interpreted, terminated by + * non 0..9 character. + * *pstatus points to status already valid, + * maxvalue gives the largest allowable value. + * + * On exit: + * *pp points to first unread character, + * *pstatus points to status updated iff bad + */ + + extern unsigned int HTCardinal(int *pstatus, + char **pp, + unsigned int max_value); + +/* Check whether string is a valid Internet hostname + * ------------------------------------------------- + */ + + extern BOOL valid_hostname(char *name); + +/* Resolve an internet hostname, like gethostbyname + * ------------------------------------------------ + * + * On entry, + * str points to the given host name, not numeric address, + * without colon or port number. + * + * On exit, + * returns a pointer to a struct hostent in static storage, + * or NULL in case of error or user interruption. + * + * The interface is intended to be the same as for gethostbyname(), + * but additional status is returned in lynx_nsl_status. + */ + extern int lynx_nsl_status; + + extern struct hostent *LYGetHostByName(char *str); + +/* Get Name of This Machine + * ------------------------ + * + */ + + extern const char *HTHostName(void); + + extern int HTDoConnect(const char *url, + const char *protocol, + int default_port, + int *s); + + extern int HTDoRead(int fildes, + void *buf, + unsigned nbyte); + +#ifdef __cplusplus +} +#endif +#endif /* HTTCP_H */ diff --git a/WWW/Library/Implementation/HTTP.c b/WWW/Library/Implementation/HTTP.c new file mode 100644 index 00000000..86398e71 --- /dev/null +++ b/WWW/Library/Implementation/HTTP.c @@ -0,0 +1,2522 @@ +/* + * $LynxId: HTTP.c,v 1.123 2011/06/11 12:09:21 tom Exp $ + * + * HyperText Tranfer Protocol - Client implementation HTTP.c + * ========================== + * Modified: + * 27 Jan 1994 PDM Added Ari Luotonen's Fix for Reload when using proxy + * servers. + * 28 Apr 1997 AJL,FM Do Proxy Authorisation. + */ + +#include <HTUtils.h> +#include <HTTP.h> +#include <LYUtils.h> + +#ifdef USE_SSL +#include <HTNews.h> +#endif + +#define HTTP_VERSION "HTTP/1.0" + +#define HTTP_PORT 80 +#define HTTPS_PORT 443 +#define SNEWS_PORT 563 + +#define INIT_LINE_SIZE 1536 /* Start with line buffer this big */ +#define LINE_EXTEND_THRESH 256 /* Minimum read size */ +#define VERSION_LENGTH 20 /* for returned protocol version */ + +#include <HTParse.h> +#include <HTTCP.h> +#include <HTFormat.h> +#include <HTFile.h> +#include <HTAlert.h> +#include <HTMIME.h> +#include <HTML.h> +#include <HTInit.h> +#include <HTAABrow.h> +#include <HTAccess.h> /* Are we using an HTTP gateway? */ + +#include <LYCookie.h> +#include <LYGlobalDefs.h> +#include <GridText.h> +#include <LYStrings.h> +#include <LYUtils.h> +#include <LYrcFile.h> +#include <LYLeaks.h> + +#ifdef USE_SSL +#ifdef USE_OPENSSL_INCL +#include <openssl/x509v3.h> +#endif +#ifdef USE_GNUTLS_INCL +#include <gnutls/x509.h> +#endif +#endif + +BOOLEAN reloading = FALSE; /* Reloading => send no-cache pragma to proxy */ +char *redirecting_url = NULL; /* Location: value. */ +BOOL permanent_redirection = FALSE; /* Got 301 status? */ +BOOL redirect_post_content = FALSE; /* Don't convert to GET? */ + +#ifdef USE_SSL +SSL_CTX *ssl_ctx = NULL; /* SSL ctx */ +SSL *SSL_handle = NULL; +static int ssl_okay; + +static void free_ssl_ctx(void) +{ + if (ssl_ctx != NULL) + SSL_CTX_free(ssl_ctx); +} + +static int HTSSLCallback(int preverify_ok, X509_STORE_CTX * x509_ctx GCC_UNUSED) +{ + char *msg = NULL; + int result = 1; + +#ifdef USE_X509_SUPPORT + HTSprintf0(&msg, + gettext("SSL callback:%s, preverify_ok=%d, ssl_okay=%d"), + X509_verify_cert_error_string((long) X509_STORE_CTX_get_error(x509_ctx)), + preverify_ok, ssl_okay); + _HTProgress(msg); + FREE(msg); +#endif + +#ifndef USE_NSS_COMPAT_INCL + if (!(preverify_ok || ssl_okay || ssl_noprompt)) { +#ifdef USE_X509_SUPPORT + HTSprintf0(&msg, SSL_FORCED_PROMPT, + X509_verify_cert_error_string((long) + X509_STORE_CTX_get_error(x509_ctx))); + if (HTForcedPrompt(ssl_noprompt, msg, YES)) + ssl_okay = 1; + else + result = 0; +#endif + + FREE(msg); + } +#endif + return result; +} + +SSL *HTGetSSLHandle(void) +{ +#ifdef USE_GNUTLS_INCL + static char *certfile = NULL; +#endif + + if (ssl_ctx == NULL) { + /* + * First time only. + */ +#if SSLEAY_VERSION_NUMBER < 0x0800 + ssl_ctx = SSL_CTX_new(); + X509_set_default_verify_paths(ssl_ctx->cert); +#else + SSLeay_add_ssl_algorithms(); + ssl_ctx = SSL_CTX_new(SSLv23_client_method()); + SSL_CTX_set_options(ssl_ctx, SSL_OP_ALL); + SSL_CTX_set_default_verify_paths(ssl_ctx); + SSL_CTX_set_verify(ssl_ctx, SSL_VERIFY_PEER, HTSSLCallback); +#endif /* SSLEAY_VERSION_NUMBER < 0x0800 */ +#if defined(USE_PROGRAM_DIR) & !defined(USE_GNUTLS_INCL) + { + X509_LOOKUP *lookup; + + lookup = X509_STORE_add_lookup(ssl_ctx->cert_store, + X509_LOOKUP_file()); + if (lookup != NULL) { + char *certfile = NULL; + + HTSprintf0(&certfile, "%s\\cert.pem", program_dir); + X509_LOOKUP_load_file(lookup, certfile, X509_FILETYPE_PEM); + FREE(certfile); + } + } +#endif +#ifdef USE_GNUTLS_INCL + if ((certfile = LYGetEnv("SSL_CERT_FILE")) != NULL) { + CTRACE((tfp, + "HTGetSSLHandle: certfile is set to %s by SSL_CERT_FILE\n", + certfile)); + } else { + if (non_empty(SSL_cert_file)) { + certfile = SSL_cert_file; + CTRACE((tfp, + "HTGetSSLHandle: certfile is set to %s by config SSL_CERT_FILE\n", + certfile)); + } +#if defined(USE_PROGRAM_DIR) + else { + HTSprintf0(&(certfile), "%s\\cert.pem", program_dir); + CTRACE((tfp, + "HTGetSSLHandle: certfile is set to %s by installed directory\n", certfile)); + } +#endif + } +#endif + atexit(free_ssl_ctx); + } +#ifdef USE_GNUTLS_INCL + ssl_ctx->certfile = certfile; + ssl_ctx->certfile_type = GNUTLS_X509_FMT_PEM; +#endif + ssl_okay = 0; + return (SSL_new(ssl_ctx)); +} + +void HTSSLInitPRNG(void) +{ +#if SSLEAY_VERSION_NUMBER >= 0x00905100 + if (RAND_status() == 0) { + char rand_file[256]; + time_t t; + long l, seed; + +#ifndef _WINDOWS + pid_t pid; + +#else + DWORD pid; +#endif + + t = time(NULL); + +#ifndef _WINDOWS + pid = getpid(); +#else + pid = GetCurrentThreadId(); +#endif + + RAND_file_name(rand_file, 256L); + CTRACE((tfp, "HTTP: Seeding PRNG\n")); + if (rand_file != NULL) { + /* Seed as much as 1024 bytes from RAND_file_name */ + RAND_load_file(rand_file, 1024L); + } + /* Seed in time (mod_ssl does this) */ + RAND_seed((unsigned char *) &t, (int) sizeof(time_t)); + + /* Seed in pid (mod_ssl does this) */ + RAND_seed((unsigned char *) &pid, (int) sizeof(pid)); + /* Initialize system's random number generator */ + RAND_bytes((unsigned char *) &seed, (int) sizeof(long)); + + lynx_srand((unsigned) seed); + while (RAND_status() == 0) { + /* Repeatedly seed the PRNG using the system's random number generator until it has been seeded with enough data */ + l = lynx_rand(); + RAND_seed((unsigned char *) &l, (int) sizeof(long)); + } + if (rand_file != NULL) { + /* Write a rand_file */ + RAND_write_file(rand_file); + } + } +#endif /* SSLEAY_VERSION_NUMBER >= 0x00905100 */ + return; +} + +#define HTTP_NETREAD(sock, buff, size, handle) \ + (handle \ + ? SSL_read(handle, buff, size) \ + : NETREAD(sock, buff, size)) + +#define HTTP_NETWRITE(sock, buff, size, handle) \ + (handle \ + ? SSL_write(handle, buff, size) \ + : NETWRITE(sock, buff, size)) + +#define HTTP_NETCLOSE(sock, handle) \ + { (void)NETCLOSE(sock); \ + if (handle) \ + SSL_free(handle); \ + SSL_handle = handle = NULL; \ + } + +#else +#define HTTP_NETREAD(a, b, c, d) NETREAD(a, b, c) +#define HTTP_NETWRITE(a, b, c, d) NETWRITE(a, b, c) +#define HTTP_NETCLOSE(a, b) (void)NETCLOSE(a) +#endif /* USE_SSL */ + +#ifdef _WINDOWS /* 1997/11/06 (Thu) 13:00:08 */ + +#define BOX_TITLE "Lynx " __FILE__ +#define BOX_FLAG (MB_ICONINFORMATION | MB_SETFOREGROUND) + +typedef struct { + int fd; + char *buf; + int len; +} recv_data_t; + +int ws_read_per_sec = 0; +static int ws_errno = 0; + +static DWORD g_total_times = 0; +static DWORD g_total_bytes = 0; + +/* The same like read, but takes care of EINTR and uses select to + timeout the stale connections. */ + +static int ws_read(int fd, char *buf, int len) +{ + int res; + int retry = 3; + + do { + res = recv(fd, buf, len, 0); + if (WSAEWOULDBLOCK == WSAGetLastError()) { + Sleep(100); + if (retry-- > 0) + continue; + } + } while (res == SOCKET_ERROR && SOCKET_ERRNO == EINTR); + + return res; +} + +#define DWORD_ERR ((DWORD)-1) + +static DWORD __stdcall _thread_func(void *p) +{ + DWORD result; + int i, val; + recv_data_t *q = (recv_data_t *) p; + + i = 0; + i++; + val = ws_read(q->fd, q->buf, q->len); + + if (val == SOCKET_ERROR) { + ws_errno = WSAGetLastError(); +#if 0 + char buff[256]; + + sprintf(buff, "Thread read: %d, error (%ld), fd = %d, len = %d", + i, ws_errno, q->fd, q->len); + MessageBox(NULL, buff, BOX_TITLE, BOX_FLAG); +#endif + result = DWORD_ERR; + } else { + result = val; + } + + return result; +} + +/* The same like read, but takes care of EINTR and uses select to + timeout the stale connections. */ + +int ws_netread(int fd, char *buf, int len) +{ + int i; + char buff[256]; + + /* 1998/03/30 (Mon) 09:01:21 */ + HANDLE hThread; + DWORD dwThreadID; + DWORD exitcode = 0; + DWORD ret_val = DWORD_ERR; + DWORD val, process_time, now_TickCount, save_TickCount; + + static recv_data_t para; + +#define TICK 5 +#define STACK_SIZE 0x2000uL + + InitializeCriticalSection(&critSec_READ); + + para.fd = fd; + para.buf = buf; + para.len = len; + + ws_read_per_sec = 0; + save_TickCount = GetTickCount(); + + hThread = CreateThread(NULL, STACK_SIZE, + _thread_func, + (void *) ¶, 0UL, &dwThreadID); + + if (hThread == 0) { + HTInfoMsg("CreateThread Failed (read)"); + goto read_exit; + } + + i = 0; + while (1) { + val = WaitForSingleObject(hThread, 1000 / TICK); + i++; + if (val == WAIT_FAILED) { + HTInfoMsg("Wait Failed"); + ret_val = DWORD_ERR; + break; + } else if (val == WAIT_TIMEOUT) { + i++; + if (i / TICK > (AlertSecs + 2)) { + sprintf(buff, "Read Waiting (%2d.%01d) for %d Bytes", + i / TICK, (i % TICK) * 10 / TICK, len); + SetConsoleTitle(buff); + } + if (win32_check_interrupt() || ((i / TICK) > lynx_timeout)) { + if (CloseHandle(hThread) == FALSE) { + HTInfoMsg("Thread terminate Failed"); + } + WSASetLastError(ETIMEDOUT); + ret_val = HT_INTERRUPTED; + break; + } + } else if (val == WAIT_OBJECT_0) { + if (GetExitCodeThread(hThread, &exitcode) == FALSE) { + exitcode = DWORD_ERR; + } + if (CloseHandle(hThread) == FALSE) { + HTInfoMsg("Thread terminate Failed"); + } + now_TickCount = GetTickCount(); + if (now_TickCount >= save_TickCount) + process_time = now_TickCount - save_TickCount; + else + process_time = now_TickCount + (0xffffffff - save_TickCount); + + if (process_time == 0) + process_time = 1; + g_total_times += process_time; + + /* + * DWORD is unsigned, and could be an error code which is signed. + */ + if ((long) exitcode > 0) + g_total_bytes += exitcode; + + ws_read_per_sec = g_total_bytes; + if (ws_read_per_sec > 2000000) { + if (g_total_times > 1000) + ws_read_per_sec /= (g_total_times / 1000); + } else { + ws_read_per_sec *= 1000; + ws_read_per_sec /= g_total_times; + } + + ret_val = exitcode; + break; + } + } /* end while(1) */ + + read_exit: + LeaveCriticalSection(&critSec_READ); + return ret_val; +} +#endif /* _WINDOWS */ + +/* + * Strip any username from the given string so we retain only the host. + */ +static void strip_userid(char *host) +{ + char *p1 = host; + char *p2 = strchr(host, '@'); + char *fake; + + if (p2 != 0) { + *p2++ = '\0'; + if ((fake = HTParse(host, "", PARSE_HOST)) != NULL) { + char *msg = NULL; + + CTRACE((tfp, "parsed:%s\n", fake)); + HTSprintf0(&msg, gettext("Address contains a username: %s"), host); + HTAlert(msg); + FREE(msg); + } + while ((*p1++ = *p2++) != '\0') { + ; + } + } +} + +/* + * Check if the user's options specified to use the given encoding. Normally + * all encodings with compiled-in support are specified (encodingALL). + */ +static BOOL acceptEncoding(int code) +{ + BOOL result = FALSE; + + if ((code & LYAcceptEncoding) != 0) { + const char *program = 0; + + switch (code) { + case encodingGZIP: + program = HTGetProgramPath(ppGZIP); + break; + case encodingDEFLATE: + program = HTGetProgramPath(ppINFLATE); + break; + case encodingCOMPRESS: + program = HTGetProgramPath(ppCOMPRESS); + break; + case encodingBZIP2: + program = HTGetProgramPath(ppBZIP2); + break; + default: + break; + } + /* + * FIXME: if lynx did not rely upon external programs to decompress + * files for external viewers, this check could be relaxed. + */ + result = (BOOL) (program != 0); + } + return result; +} + +#ifdef USE_SSL +static void show_cert_issuer(X509 * peer_cert GCC_UNUSED) +{ +#if defined(USE_OPENSSL_INCL) || defined(USE_GNUTLS_FUNCS) + char ssl_dn[1024]; + char *msg = NULL; + + X509_NAME_oneline(X509_get_issuer_name(peer_cert), ssl_dn, (int) sizeof(ssl_dn)); + HTSprintf0(&msg, gettext("Certificate issued by: %s"), ssl_dn); + _HTProgress(msg); + FREE(msg); +#elif defined(USE_GNUTLS_INCL) + /* the OpenSSL "compat" code compiles but dumps core with GNU TLS */ +#endif +} +#endif + +/* + * Remove IPv6 brackets (and any port-number) from the given host-string. + */ +#ifdef USE_SSL +static char *StripIpv6Brackets(char *host) +{ + int port_number; + char *p; + + if ((p = HTParsePort(host, &port_number)) != 0) + *p = '\0'; + + if (*host == '[') { + p = host + strlen(host) - 1; + if (*p == ']') { + *p = '\0'; + ++host; + } + } + return host; +} +#endif + +/* Load Document from HTTP Server HTLoadHTTP() + * ============================== + * + * Given a hypertext address, this routine loads a document. + * + * + * On entry, + * arg is the hypertext reference of the article to be loaded. + * + * On exit, + * returns >=0 If no error, a good socket number + * <0 Error. + * + * The socket must be closed by the caller after the document has been + * read. + * + */ +static int HTLoadHTTP(const char *arg, + HTParentAnchor *anAnchor, + HTFormat format_out, + HTStream *sink) +{ + static char empty[1]; + int s; /* Socket number for returned data */ + const char *url = arg; /* The URL which get_physical() returned */ + bstring *command = NULL; /* The whole command */ + char *eol; /* End of line if found */ + char *start_of_data; /* Start of body of reply */ + int status; /* tcp return */ + off_t bytes_already_read; + char crlf[3]; /* A CR LF equivalent string */ + HTStream *target; /* Unconverted data */ + HTFormat format_in; /* Format arriving in the message */ + BOOL do_head = FALSE; /* Whether or not we should do a head */ + BOOL do_post = FALSE; /* ARE WE posting ? */ + const char *METHOD; + + char *line_buffer; + char *line_kept_clean; + +#ifdef SH_EX /* FIX BUG by kaz@maczuka.hitachi.ibaraki.jp */ + int real_length_of_line = 0; +#endif + BOOL extensions; /* Assume good HTTP server */ + char *linebuf = NULL; + char temp[80]; + BOOL first_Accept = TRUE; + BOOL show_401 = FALSE; + BOOL show_407 = FALSE; + BOOL auth_proxy = NO; /* Generate a proxy authorization. - AJL */ + + int length, rawlength, rv; + int server_status = 0; + BOOL doing_redirect, already_retrying = FALSE; + int len = 0; + +#ifdef USE_SSL + unsigned long SSLerror; + BOOL do_connect = FALSE; /* ARE WE going to use a proxy tunnel ? */ + BOOL did_connect = FALSE; /* ARE WE actually using a proxy tunnel ? */ + const char *connect_url = NULL; /* The URL being proxied */ + char *connect_host = NULL; /* The host being proxied */ + SSL *handle = NULL; /* The SSL handle */ + X509 *peer_cert; /* The peer certificate */ + char ssl_dn[1024]; + char *cert_host; + char *ssl_host; + char *p; + char *msg = NULL; + int status_sslcertcheck; + char *ssl_dn_start; + char *ssl_all_cns = NULL; + +#ifdef USE_GNUTLS_INCL + int ret; + unsigned tls_status; +#endif + +#if SSLEAY_VERSION_NUMBER >= 0x0900 + BOOL try_tls = TRUE; +#endif /* SSLEAY_VERSION_NUMBER >= 0x0900 */ + SSL_handle = NULL; +#else + void *handle = NULL; +#endif /* USE_SSL */ + + if (anAnchor->isHEAD) + do_head = TRUE; + else if (anAnchor->post_data) + do_post = TRUE; + + if (!url) { + status = -3; + _HTProgress(BAD_REQUEST); + goto done; + } + if (!*url) { + status = -2; + _HTProgress(BAD_REQUEST); + goto done; + } +#ifdef USE_SSL + if (using_proxy && !StrNCmp(url, "http://", 7)) { + int portnumber; + + if ((connect_url = strstr((url + 7), "https://"))) { + do_connect = TRUE; + connect_host = HTParse(connect_url, "https", PARSE_HOST); + if (!HTParsePort(connect_host, &portnumber)) { + sprintf(temp, ":%d", HTTPS_PORT); + StrAllocCat(connect_host, temp); + } + CTRACE((tfp, "HTTP: connect_url = '%s'\n", connect_url)); + CTRACE((tfp, "HTTP: connect_host = '%s'\n", connect_host)); + } else if ((connect_url = strstr((url + 7), "snews://"))) { + do_connect = TRUE; + connect_host = HTParse(connect_url, "snews", PARSE_HOST); + if (!HTParsePort(connect_host, &portnumber)) { + sprintf(temp, ":%d", SNEWS_PORT); + StrAllocCat(connect_host, temp); + } + CTRACE((tfp, "HTTP: connect_url = '%s'\n", connect_url)); + CTRACE((tfp, "HTTP: connect_host = '%s'\n", connect_host)); + } + } +#endif /* USE_SSL */ + + sprintf(crlf, "%c%c", CR, LF); + + /* + * At this point, we're talking HTTP/1.0. + */ + extensions = YES; + + try_again: + /* + * All initializations are moved down here from up above, so we can start + * over here... + */ + eol = 0; + length = 0; + doing_redirect = FALSE; + permanent_redirection = FALSE; + redirect_post_content = FALSE; + target = NULL; + line_buffer = NULL; + line_kept_clean = NULL; + +#ifdef USE_SSL + if (!StrNCmp(url, "https", 5)) + status = HTDoConnect(url, "HTTPS", HTTPS_PORT, &s); + else + status = HTDoConnect(url, "HTTP", HTTP_PORT, &s); +#else + if (!StrNCmp(url, "https", 5)) { + HTAlert(gettext("This client does not contain support for HTTPS URLs.")); + status = HT_NOT_LOADED; + goto done; + } + status = HTDoConnect(arg, "HTTP", HTTP_PORT, &s); +#endif /* USE_SSL */ + if (status == HT_INTERRUPTED) { + /* + * Interrupt cleanly. + */ + CTRACE((tfp, "HTTP: Interrupted on connect; recovering cleanly.\n")); + _HTProgress(CONNECTION_INTERRUPTED); + status = HT_NOT_LOADED; + goto done; + } + if (status < 0) { +#ifdef _WINDOWS + CTRACE((tfp, "HTTP: Unable to connect to remote host for `%s'\n" + " (status = %d, sock_errno = %d).\n", + url, status, SOCKET_ERRNO)); +#else + CTRACE((tfp, + "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n", + url, SOCKET_ERRNO)); +#endif + HTAlert(gettext("Unable to connect to remote host.")); + status = HT_NOT_LOADED; + goto done; + } +#ifdef USE_SSL + use_tunnel: + /* + * If this is an https document, then do the SSL stuff here. + */ + if (did_connect || !StrNCmp(url, "https", 5)) { + SSL_handle = handle = HTGetSSLHandle(); + SSL_set_fd(handle, s); + /* get host we're connecting to */ + ssl_host = HTParse(url, "", PARSE_HOST); + ssl_host = StripIpv6Brackets(ssl_host); +#if SSLEAY_VERSION_NUMBER >= 0x0900 +#ifndef USE_NSS_COMPAT_INCL + if (!try_tls) { + handle->options |= SSL_OP_NO_TLSv1; +#if OPENSSL_VERSION_NUMBER >= 0x0090806fL && !defined(OPENSSL_NO_TLSEXT) + } else { + SSL_set_tlsext_host_name(handle, ssl_host); +#endif + } +#endif +#endif /* SSLEAY_VERSION_NUMBER >= 0x0900 */ + HTSSLInitPRNG(); + status = SSL_connect(handle); + + if (status <= 0) { +#if SSLEAY_VERSION_NUMBER >= 0x0900 + if (try_tls) { + _HTProgress(gettext("Retrying connection without TLS.")); + try_tls = FALSE; + if (did_connect) + HTTP_NETCLOSE(s, handle); + goto try_again; + } else { + CTRACE((tfp, + "HTTP: Unable to complete SSL handshake for '%s', SSL_connect=%d, SSL error stack dump follows\n", + url, status)); + SSL_load_error_strings(); + while ((SSLerror = ERR_get_error()) != 0) { + CTRACE((tfp, "HTTP: SSL: %s\n", ERR_error_string(SSLerror, NULL))); + } + HTAlert("Unable to make secure connection to remote host."); + if (did_connect) + HTTP_NETCLOSE(s, handle); + status = HT_NOT_LOADED; + goto done; + } +#else + unsigned long SSLerror; + + CTRACE((tfp, + "HTTP: Unable to complete SSL handshake for '%s', SSL_connect=%d, SSL error stack dump follows\n", + url, status)); + SSL_load_error_strings(); + while ((SSLerror = ERR_get_error()) != 0) { + CTRACE((tfp, "HTTP: SSL: %s\n", ERR_error_string(SSLerror, NULL))); + } + HTAlert("Unable to make secure connection to remote host."); + if (did_connect) + HTTP_NETCLOSE(s, handle); + status = HT_NOT_LOADED; + goto done; +#endif /* SSLEAY_VERSION_NUMBER >= 0x0900 */ + } +#ifdef USE_GNUTLS_INCL + ret = gnutls_certificate_verify_peers2(handle->gnutls_state, &tls_status); + if (ret < 0) { + int flag_continue = 1; + char *msg2; + + if (tls_status & GNUTLS_CERT_SIGNER_NOT_FOUND) { + msg2 = gettext("no issuer was found"); + } else if (tls_status & GNUTLS_CERT_SIGNER_NOT_CA) { + msg2 = gettext("issuer is not a CA"); + } else if (tls_status & GNUTLS_CERT_SIGNER_NOT_FOUND) { + msg2 = gettext("the certificate has no known issuer"); + } else if (tls_status & GNUTLS_CERT_REVOKED) { + msg2 = gettext("the certificate has been revoked"); + } else { + msg2 = gettext("the certificate is not trusted"); + } + HTSprintf0(&msg, SSL_FORCED_PROMPT, msg2); + CTRACE((tfp, "HTLoadHTTP: %s\n", msg)); + if (!ssl_noprompt) { + if (!HTForcedPrompt(ssl_noprompt, msg, YES)) { + flag_continue = 0; + } + } else if (ssl_noprompt == FORCE_PROMPT_NO) { + flag_continue = 0; + } + FREE(msg); + if (flag_continue == 0) { + status = HT_NOT_LOADED; + FREE(msg); + goto done; + } + } +#endif + + peer_cert = SSL_get_peer_certificate(handle); +#if defined(USE_OPENSSL_INCL) || defined(USE_GNUTLS_FUNCS) + X509_NAME_oneline(X509_get_subject_name(peer_cert), + ssl_dn, (int) sizeof(ssl_dn)); +#elif defined(USE_GNUTLS_INCL) + X509_NAME_oneline(X509_get_subject_name(peer_cert), + ssl_dn + 1, (int) sizeof(ssl_dn) - 1); + + /* Iterate over DN in incompatible GnuTLS format to bring it into OpenSSL format */ + ssl_dn[0] = '/'; + ssl_dn_start = ssl_dn; + while (*ssl_dn_start) { + if ((*ssl_dn_start == ',') && (*(ssl_dn_start + 1) == ' ')) { + *ssl_dn_start++ = '/'; + if (*(p = ssl_dn_start) != 0) { + while ((p[0] = p[1]) != 0) + ++p; + } + } else { + ssl_dn_start++; + } + } +#endif + + /* + * X.509 DN validation taking ALL CN fields into account + * (c) 2006 Thorsten Glaser <tg@mirbsd.de> + */ + + /* initialise status information */ + status_sslcertcheck = 0; /* 0 = no CN found in DN */ + ssl_dn_start = ssl_dn; + + /* validate all CNs found in DN */ + CTRACE((tfp, "Validating CNs in '%s'\n", ssl_dn_start)); + while ((cert_host = strstr(ssl_dn_start, "/CN=")) != NULL) { + status_sslcertcheck = 1; /* 1 = could not verify CN */ + /* start of CommonName */ + cert_host += 4; + /* find next part of DistinguishedName */ + if ((p = strchr(cert_host, '/')) != NULL) { + *p = '\0'; + ssl_dn_start = p; /* yes this points to the NUL byte */ + } else + ssl_dn_start = NULL; + cert_host = StripIpv6Brackets(cert_host); + + /* verify this CN */ + CTRACE((tfp, "Matching\n\tssl_host '%s'\n\tcert_host '%s'\n", + ssl_host, cert_host)); + if (!strcasecomp_asterisk(ssl_host, cert_host)) { + status_sslcertcheck = 2; /* 2 = verified peer */ + /* I think this is cool to have in the logs -TG */ + HTSprintf0(&msg, + gettext("Verified connection to %s (cert=%s)"), + ssl_host, cert_host); + _HTProgress(msg); + FREE(msg); + /* no need to continue the verification loop */ + break; + } + + /* add this CN to list of failed CNs */ + if (ssl_all_cns == NULL) + StrAllocCopy(ssl_all_cns, "CN<"); + else + StrAllocCat(ssl_all_cns, ":CN<"); + StrAllocCat(ssl_all_cns, cert_host); + StrAllocCat(ssl_all_cns, ">"); + /* if we cannot retry, don't try it */ + if (ssl_dn_start == NULL) + break; + /* now retry next CN found in DN */ + *ssl_dn_start = '/'; /* formerly NUL byte */ + } + + /* check the X.509v3 Subject Alternative Name */ +#ifdef USE_GNUTLS_INCL + if (status_sslcertcheck < 2) { + int i; + size_t size; + gnutls_x509_crt cert; + static char buf[2048]; + + /* import the certificate to the x509_crt format */ + if (gnutls_x509_crt_init(&cert) == 0) { + + if (gnutls_x509_crt_import(cert, peer_cert, + GNUTLS_X509_FMT_DER) < 0) { + gnutls_x509_crt_deinit(cert); + goto done; + } + + ret = 0; + for (i = 0; !(ret < 0); i++) { + size = sizeof(buf); + ret = gnutls_x509_crt_get_subject_alt_name(cert, i, buf, + &size, NULL); + + if (strcasecomp_asterisk(ssl_host, buf) == 0) { + status_sslcertcheck = 2; + HTSprintf0(&msg, + gettext("Verified connection to %s (subj=%s)"), + ssl_host, buf); + _HTProgress(msg); + FREE(msg); + break; + } + + } + } + } +#endif +#ifdef USE_OPENSSL_INCL + if (status_sslcertcheck < 2) { + STACK_OF(GENERAL_NAME) * gens; + int i, numalts; + const GENERAL_NAME *gn; + + gens = (STACK_OF(GENERAL_NAME) *) + X509_get_ext_d2i(peer_cert, NID_subject_alt_name, NULL, NULL); + + if (gens != NULL) { + numalts = sk_GENERAL_NAME_num(gens); + for (i = 0; i < numalts; ++i) { + gn = sk_GENERAL_NAME_value(gens, i); + if (gn->type == GEN_DNS) + cert_host = (char *) ASN1_STRING_data(gn->d.ia5); + else if (gn->type == GEN_IPADD) { + /* XXX untested -TG */ + size_t j = (size_t) ASN1_STRING_length(gn->d.ia5); + + cert_host = (char *) malloc(j + 1); + MemCpy(cert_host, ASN1_STRING_data(gn->d.ia5), j); + cert_host[j] = '\0'; + } else + continue; + status_sslcertcheck = 1; /* got at least one */ + /* verify this SubjectAltName (see above) */ + cert_host = StripIpv6Brackets(cert_host); + if (!(gn->type == GEN_IPADD ? strcasecomp : + strcasecomp_asterisk) (ssl_host, cert_host)) { + status_sslcertcheck = 2; + HTSprintf0(&msg, + gettext("Verified connection to %s (subj=%s)"), + ssl_host, cert_host); + _HTProgress(msg); + FREE(msg); + if (gn->type == GEN_IPADD) + free(cert_host); + break; + } + /* add to list of failed CNs */ + if (ssl_all_cns == NULL) + StrAllocCopy(ssl_all_cns, "SAN<"); + else + StrAllocCat(ssl_all_cns, ":SAN<"); + if (gn->type == GEN_DNS) + StrAllocCat(ssl_all_cns, "DNS="); + else if (gn->type == GEN_IPADD) + StrAllocCat(ssl_all_cns, "IP="); + StrAllocCat(ssl_all_cns, cert_host); + StrAllocCat(ssl_all_cns, ">"); + if (gn->type == GEN_IPADD) + free(cert_host); + } + sk_GENERAL_NAME_free(gens); + } + } +#endif /* USE_OPENSSL_INCL */ + + /* if an error occurred, format the appropriate message */ + if (status_sslcertcheck == 0) { + HTSprintf0(&msg, SSL_FORCED_PROMPT, + gettext("Can't find common name in certificate")); + } else if (status_sslcertcheck == 1) { + HTSprintf0(&msg, + gettext("SSL error:host(%s)!=cert(%s)-Continue?"), + ssl_host, ssl_all_cns); + } + + /* if an error occurred, let the user decide how much he trusts */ + if (status_sslcertcheck < 2) { + if (!HTForcedPrompt(ssl_noprompt, msg, YES)) { + status = HT_NOT_LOADED; + FREE(msg); + FREE(ssl_all_cns); + goto done; + } + HTSprintf0(&msg, + gettext("UNVERIFIED connection to %s (cert=%s)"), + ssl_host, ssl_all_cns ? ssl_all_cns : "NONE"); + _HTProgress(msg); + FREE(msg); + } + + show_cert_issuer(peer_cert); + + HTSprintf0(&msg, + gettext("Secure %d-bit %s (%s) HTTP connection"), + SSL_get_cipher_bits(handle, NULL), + SSL_get_cipher_version(handle), + SSL_get_cipher(handle)); + _HTProgress(msg); + FREE(msg); + } +#endif /* USE_SSL */ + + /* Ask that node for the document, omitting the host name & anchor + */ + { + char *p1 = (HTParse(url, "", PARSE_PATH | PARSE_PUNCTUATION)); + +#ifdef USE_SSL + if (do_connect) { + METHOD = "CONNECT"; + BStrCopy0(command, "CONNECT "); + } else +#endif /* USE_SSL */ + if (do_post) { + METHOD = "POST"; + BStrCopy0(command, "POST "); + } else if (do_head) { + METHOD = "HEAD"; + BStrCopy0(command, "HEAD "); + } else { + METHOD = "GET"; + BStrCopy0(command, "GET "); + } + + /* + * If we are using a proxy gateway don't copy in the first slash of + * say: /gopher://a;lkdjfl;ajdf;lkj/;aldk/adflj so that just + * gopher://.... is sent. + */ +#ifdef USE_SSL + if (using_proxy && !did_connect) { + if (do_connect) + BStrCat0(command, connect_host); + else + BStrCat0(command, p1 + 1); + } +#else + if (using_proxy) + BStrCat0(command, p1 + 1); +#endif /* USE_SSL */ + else + BStrCat0(command, p1); + FREE(p1); + } + if (extensions) { + BStrCat0(command, " "); + BStrCat0(command, HTTP_VERSION); + } + + BStrCat0(command, crlf); /* CR LF, as in rfc 977 */ + + if (extensions) { + int n, i; + char *host = NULL; + + if ((host = HTParse(anAnchor->address, "", PARSE_HOST)) != NULL) { + strip_userid(host); + HTBprintf(&command, "Host: %s%c%c", host, CR, LF); + FREE(host); + } + + if (!HTPresentations) + HTFormatInit(); + n = HTList_count(HTPresentations); + + first_Accept = TRUE; + len = 0; + for (i = 0; i < n; i++) { + HTPresentation *pres = + (HTPresentation *) HTList_objectAt(HTPresentations, i); + + if (pres->get_accept) { + if (pres->quality < 1.0) { + if (pres->maxbytes > 0) { + sprintf(temp, ";q=%4.3f;mxb=%" PRI_off_t "", + pres->quality, CAST_off_t (pres->maxbytes)); + } else { + sprintf(temp, ";q=%4.3f", pres->quality); + } + } else if (pres->maxbytes > 0) { + sprintf(temp, ";mxb=%" PRI_off_t "", CAST_off_t (pres->maxbytes)); + } else { + temp[0] = '\0'; + } + HTSprintf0(&linebuf, "%s%s%s", + (first_Accept ? + "Accept: " : ", "), + HTAtom_name(pres->rep), + temp); + len += (int) strlen(linebuf); + if (len > 252 && !first_Accept) { + BStrCat0(command, crlf); + HTSprintf0(&linebuf, "Accept: %s%s", + HTAtom_name(pres->rep), + temp); + len = (int) strlen(linebuf); + } + BStrCat0(command, linebuf); + first_Accept = FALSE; + } + } + HTBprintf(&command, "%s*/*;q=0.01%c%c", + (first_Accept ? + "Accept: " : ", "), CR, LF); + + /* + * FIXME: suppressing the "Accept-Encoding" in this case is done to + * work around limitations of the presentation logic used for the + * command-line "-base" option. The remote site may transmit the + * document gzip'd, but the ensuing logic in HTSaveToFile() would see + * the mime-type as gzip rather than text/html, and not prepend the + * base URL. This is less efficient than accepting the compressed data + * and uncompressing it, adding the base URL but is simpler than + * augmenting the dump's presentation logic -TD + */ + if (LYPrependBaseToSource && dump_output_immediately) { + CTRACE((tfp, + "omit Accept-Encoding to work-around interaction with -source\n")); + } else { + char *list = 0; + int j, k; + + for (j = 1; j < encodingALL; j <<= 1) { + if (acceptEncoding(j)) { + for (k = 0; tbl_preferred_encoding[k].name != 0; ++k) { + if (tbl_preferred_encoding[k].value == j) { + if (list != 0) + StrAllocCat(list, ", "); + StrAllocCat(list, tbl_preferred_encoding[k].name); + break; + } + } + } + } + + if (list != 0) { + HTBprintf(&command, "Accept-Encoding: %s%c%c", list, CR, LF); + free(list); + } + } + + if (language && *language) { + HTBprintf(&command, "Accept-Language: %s%c%c", language, CR, LF); + } + + if (pref_charset && *pref_charset) { + BStrCat0(command, "Accept-Charset: "); + StrAllocCopy(linebuf, pref_charset); + if (linebuf[strlen(linebuf) - 1] == ',') + linebuf[strlen(linebuf) - 1] = '\0'; + LYLowerCase(linebuf); + if (strstr(linebuf, "iso-8859-1") == NULL) + StrAllocCat(linebuf, ", iso-8859-1;q=0.01"); + if (strstr(linebuf, "us-ascii") == NULL) + StrAllocCat(linebuf, ", us-ascii;q=0.01"); + BStrCat0(command, linebuf); + HTBprintf(&command, "%c%c", CR, LF); + } +#if 0 + /* + * Promote 300 (Multiple Choices) replies, if supported, over 406 (Not + * Acceptable) replies. - FM + * + * This used to be done in versions 2.7 and 2.8*, but violates the + * specs for transparent content negotiation and has the effect that + * servers supporting those specs will send 300 (Multiple Choices) + * instead of a normal response (e.g. 200 OK), since they will assume + * that the client wants to make the choice. It is not clear whether + * there are any servers or sites for which sending this header really + * improves anything. + * + * If there ever is a need to send "Negotiate: trans" and really mean + * it, we should send "Negotiate: trans,trans" or similar, since that + * is semantically equivalent and some servers may ignore "Negotiate: + * trans" as a special case when it comes from Lynx (to work around the + * old faulty behavior). - kw + * + * References: + * RFC 2295 (see also RFC 2296), and mail to lynx-dev and + * new-httpd@apache.org from Koen Holtman, Jan 1999. + */ + if (!do_post) { + HTBprintf(&command, "Negotiate: trans%c%c", CR, LF); + } +#endif /* 0 */ + + /* + * When reloading give no-cache pragma to proxy server to make it + * refresh its cache. -- Ari L. <luotonen@dxcern.cern.ch> + * + * Also send it as a Cache-Control header for HTTP/1.1. - FM + */ + if (reloading) { + HTBprintf(&command, "Pragma: no-cache%c%c", CR, LF); + HTBprintf(&command, "Cache-Control: no-cache%c%c", CR, LF); + } + + if (LYSendUserAgent || no_useragent) { + if (non_empty(LYUserAgent)) { + char *cp = LYSkipBlanks(LYUserAgent); + + /* Won't send it at all if all blank - kw */ + if (*cp != '\0') + HTBprintf(&command, "User-Agent: %.*s%c%c", + INIT_LINE_SIZE - 15, LYUserAgent, CR, LF); + } else { + HTBprintf(&command, "User-Agent: %s/%s libwww-FM/%s%c%c", + HTAppName ? HTAppName : "unknown", + HTAppVersion ? HTAppVersion : "0.0", + HTLibraryVersion, CR, LF); + } + } + + if (personal_mail_address && !LYNoFromHeader) { + HTBprintf(&command, "From: %s%c%c", personal_mail_address, CR, LF); + } + + if (!(LYUserSpecifiedURL || + LYNoRefererHeader || LYNoRefererForThis) && + strcmp(HTLoadedDocumentURL(), "")) { + const char *cp = LYRequestReferer; + + if (!cp) + cp = HTLoadedDocumentURL(); /* @@@ Try both? - kw */ + BStrCat0(command, "Referer: "); + if (isLYNXIMGMAP(cp)) { + char *pound = findPoundSelector(cp); + int nn = (pound ? (int) (pound - cp) : (int) strlen(cp)); + + HTSABCat(&command, cp + LEN_LYNXIMGMAP, nn); + } else { + BStrCat0(command, cp); + } + HTBprintf(&command, "%c%c", CR, LF); + } { + char *abspath; + char *docname; + char *hostname; + char *colon; + int portnumber; + char *auth, *cookie = NULL; + BOOL secure = (BOOL) (StrNCmp(anAnchor->address, "https", 5) + ? FALSE + : TRUE); + + abspath = HTParse(arg, "", PARSE_PATH | PARSE_PUNCTUATION); + docname = HTParse(arg, "", PARSE_PATH); + hostname = HTParse(arg, "", PARSE_HOST); + if (hostname && + NULL != (colon = HTParsePort(hostname, &portnumber))) { + *colon = '\0'; /* Chop off port number */ + } else if (!StrNCmp(arg, "https", 5)) { + portnumber = HTTPS_PORT; + } else { + portnumber = HTTP_PORT; + } + + /* + * Add Authorization, Proxy-Authorization, and/or Cookie headers, + * if applicable. + */ + if (using_proxy) { + /* + * If we are using a proxy, first determine if we should + * include an Authorization header and/or Cookie header for the + * ultimate target of this request. - FM & AJL + */ + char *host2 = NULL, *path2 = NULL; + int port2 = (StrNCmp(docname, "https", 5) ? + HTTP_PORT : HTTPS_PORT); + + host2 = HTParse(docname, "", PARSE_HOST); + path2 = HTParse(docname, "", PARSE_PATH | PARSE_PUNCTUATION); + if (host2) { + if ((colon = HTParsePort(host2, &port2)) != NULL) { + /* Use non-default port number */ + *colon = '\0'; + } + } + /* + * This composeAuth() does file access, i.e., for the ultimate + * target of the request. - AJL + */ + auth_proxy = NO; + if ((auth = HTAA_composeAuth(host2, port2, path2, + auth_proxy)) != NULL && + *auth != '\0') { + /* + * If auth is not NULL nor zero-length, it's an + * Authorization header to be included. - FM + */ + HTBprintf(&command, "%s%c%c", auth, CR, LF); + CTRACE((tfp, "HTTP: Sending authorization: %s\n", auth)); + } else if (auth && *auth == '\0') { + /* + * If auth is a zero-length string, the user either + * cancelled or goofed at the username and password prompt. + * - FM + */ + if (!(traversal || dump_output_immediately) && + HTConfirm(CONFIRM_WO_PASSWORD)) { + show_401 = TRUE; + } else { + if (traversal || dump_output_immediately) + HTAlert(FAILED_NEED_PASSWD); +#ifdef USE_SSL + if (did_connect) + HTTP_NETCLOSE(s, handle); +#endif /* USE_SSL */ + BStrFree(command); + FREE(hostname); + FREE(docname); + FREE(abspath); + FREE(host2); + FREE(path2); + status = HT_NOT_LOADED; + goto done; + } + } else { + CTRACE((tfp, "HTTP: Not sending authorization (yet).\n")); + } + /* + * Add 'Cookie:' header, if it's HTTP or HTTPS document being + * proxied. + */ + if (!StrNCmp(docname, "http", 4)) { + cookie = LYAddCookieHeader(host2, path2, port2, secure); + } + FREE(host2); + FREE(path2); + /* + * The next composeAuth() will be for the proxy. - AJL + */ + auth_proxy = YES; + } else { + /* + * Add cookie for a non-proxied request. - FM + */ + cookie = LYAddCookieHeader(hostname, abspath, portnumber, secure); + auth_proxy = NO; + } + /* + * If we do have a cookie set, add it to the request buffer. - FM + */ + if (cookie != NULL) { + if (*cookie != '$') { + /* + * It's a historical cookie, so signal to the server that + * we support modern cookies. - FM + */ + BStrCat0(command, "Cookie2: $Version=\"1\""); + BStrCat0(command, crlf); + CTRACE((tfp, "HTTP: Sending Cookie2: $Version =\"1\"\n")); + } + if (*cookie != '\0') { + /* + * It's not a zero-length string, so add the header. Note + * that any folding of long strings has been done already + * in LYCookie.c. - FM + */ + BStrCat0(command, "Cookie: "); + BStrCat0(command, cookie); + BStrCat0(command, crlf); + CTRACE((tfp, "HTTP: Sending Cookie: %s\n", cookie)); + } + FREE(cookie); + } + FREE(abspath); + + /* + * If we are using a proxy, auth_proxy should be YES, and we check + * here whether we want a Proxy-Authorization header for it. If we + * are not using a proxy, auth_proxy should still be NO, and we + * check here for whether we want an Authorization header. - FM & + * AJL + */ + if ((auth = HTAA_composeAuth(hostname, + portnumber, + docname, + auth_proxy)) != NULL && + *auth != '\0') { + /* + * If auth is not NULL nor zero-length, it's an Authorization + * or Proxy-Authorization header to be included. - FM + */ + HTBprintf(&command, "%s%c%c", auth, CR, LF); + CTRACE((tfp, (auth_proxy ? + "HTTP: Sending proxy authorization: %s\n" : + "HTTP: Sending authorization: %s\n"), + auth)); + } else if (auth && *auth == '\0') { + /* + * If auth is a zero-length string, the user either cancelled + * or goofed at the username and password prompt. - FM + */ + if (!(traversal || dump_output_immediately) && HTConfirm(CONFIRM_WO_PASSWORD)) { + if (auth_proxy == TRUE) { + show_407 = TRUE; + } else { + show_401 = TRUE; + } + } else { + if (traversal || dump_output_immediately) + HTAlert(FAILED_NEED_PASSWD); + BStrFree(command); + FREE(hostname); + FREE(docname); + status = HT_NOT_LOADED; + goto done; + } + } else { + CTRACE((tfp, (auth_proxy ? + "HTTP: Not sending proxy authorization (yet).\n" : + "HTTP: Not sending authorization (yet).\n"))); + } + FREE(hostname); + FREE(docname); + } + } + + if ( +#ifdef USE_SSL + !do_connect && +#endif /* USE_SSL */ + do_post) { + CTRACE((tfp, "HTTP: Doing post, content-type '%s'\n", + anAnchor->post_content_type + ? anAnchor->post_content_type + : "lose")); + HTBprintf(&command, "Content-type: %s%c%c", + anAnchor->post_content_type + ? anAnchor->post_content_type + : "lose", + CR, LF); + + HTBprintf(&command, "Content-length: %d%c%c", + !isBEmpty(anAnchor->post_data) + ? BStrLen(anAnchor->post_data) + : 0, + CR, LF); + + BStrCat0(command, crlf); /* Blank line means "end" of headers */ + + BStrCat(command, anAnchor->post_data); + } else + BStrCat0(command, crlf); /* Blank line means "end" of headers */ + + if (TRACE) { + CTRACE((tfp, "Writing:\n")); + trace_bstring(command); +#ifdef USE_SSL + CTRACE((tfp, "%s", + (anAnchor->post_data && !do_connect ? crlf : ""))); +#else + CTRACE((tfp, "%s", + (anAnchor->post_data ? crlf : ""))); +#endif /* USE_SSL */ + CTRACE((tfp, "----------------------------------\n")); + } + + _HTProgress(gettext("Sending HTTP request.")); + +#ifdef NOT_ASCII /* S/390 -- gil -- 0548 */ + { + char *p2; + + for (p2 = BStrData(command); + p2 < BStrData(command) + BStrLen(command); + p2++) + *p2 = TOASCII(*p2); + } +#endif /* NOT_ASCII */ + status = (int) HTTP_NETWRITE(s, + BStrData(command), + BStrLen(command), + handle); + BStrFree(command); + FREE(linebuf); + if (status <= 0) { + if (status == 0) { + CTRACE((tfp, "HTTP: Got status 0 in initial write\n")); + /* Do nothing. */ + } else if ((SOCKET_ERRNO == ENOTCONN || + SOCKET_ERRNO == ECONNRESET || + SOCKET_ERRNO == EPIPE) && + !already_retrying && + /* Don't retry if we're posting. */ !do_post) { + /* + * Arrrrgh, HTTP 0/1 compatibility problem, maybe. + */ + CTRACE((tfp, + "HTTP: BONZO ON WRITE Trying again with HTTP0 request.\n")); + _HTProgress(RETRYING_AS_HTTP0); + HTTP_NETCLOSE(s, handle); + extensions = NO; + already_retrying = TRUE; + goto try_again; + } else { + CTRACE((tfp, + "HTTP: Hit unexpected network WRITE error; aborting connection.\n")); + HTTP_NETCLOSE(s, handle); + status = -1; + HTAlert(gettext("Unexpected network write error; connection aborted.")); + goto done; + } + } + + CTRACE((tfp, "HTTP: WRITE delivered OK\n")); + _HTProgress(gettext("HTTP request sent; waiting for response.")); + + /* Read the first line of the response + * ----------------------------------- + */ + { + /* Get numeric status etc */ + BOOL end_of_file = NO; + int buffer_length = INIT_LINE_SIZE; + + line_buffer = typecallocn(char, (size_t) buffer_length); + + if (line_buffer == NULL) + outofmem(__FILE__, "HTLoadHTTP"); + + HTReadProgress(bytes_already_read = 0, (off_t) 0); + do { /* Loop to read in the first line */ + /* + * Extend line buffer if necessary for those crazy WAIS URLs ;-) + */ + if (buffer_length - length < LINE_EXTEND_THRESH) { + buffer_length = buffer_length + buffer_length; + line_buffer = + (char *) realloc(line_buffer, ((unsigned) buffer_length * + sizeof(char))); + + if (line_buffer == NULL) + outofmem(__FILE__, "HTLoadHTTP"); + } + CTRACE((tfp, "HTTP: Trying to read %d\n", buffer_length - length - 1)); + status = HTTP_NETREAD(s, + line_buffer + length, + (buffer_length - length - 1), + handle); + CTRACE((tfp, "HTTP: Read %d\n", status)); + if (status <= 0) { + /* + * Retry if we get nothing back too. + * Bomb out if we get nothing twice. + */ + if (status == HT_INTERRUPTED) { + CTRACE((tfp, "HTTP: Interrupted initial read.\n")); + _HTProgress(CONNECTION_INTERRUPTED); + HTTP_NETCLOSE(s, handle); + status = HT_NO_DATA; + goto clean_up; + } else if (status < 0 && + (SOCKET_ERRNO == ENOTCONN || +#ifdef _WINDOWS /* 1997/11/09 (Sun) 16:59:58 */ + SOCKET_ERRNO == ETIMEDOUT || +#endif + SOCKET_ERRNO == ECONNRESET || + SOCKET_ERRNO == EPIPE) && + !already_retrying && !do_post) { + /* + * Arrrrgh, HTTP 0/1 compability problem, maybe. + */ + CTRACE((tfp, + "HTTP: BONZO Trying again with HTTP0 request.\n")); + HTTP_NETCLOSE(s, handle); + FREE(line_buffer); + FREE(line_kept_clean); + + extensions = NO; + already_retrying = TRUE; + _HTProgress(RETRYING_AS_HTTP0); + goto try_again; + } +#ifdef USE_SSL + else if ((SSLerror = ERR_get_error()) != 0) { + CTRACE((tfp, + "HTTP: Hit unexpected network read error; aborting connection; status %d:%s.\n", + status, ERR_error_string(SSLerror, NULL))); + HTAlert(gettext("Unexpected network read error; connection aborted.")); + HTTP_NETCLOSE(s, handle); + status = -1; + goto clean_up; + } +#endif + else { + CTRACE((tfp, + "HTTP: Hit unexpected network read error; aborting connection; status %d.\n", + status)); + HTAlert(gettext("Unexpected network read error; connection aborted.")); + HTTP_NETCLOSE(s, handle); + status = -1; + goto clean_up; + } + } +#ifdef NOT_ASCII /* S/390 -- gil -- 0564 */ + { + char *p2; + + for (p2 = line_buffer + length; + p2 < line_buffer + length + status; + p2++) + *p2 = FROMASCII(*p2); + } +#endif /* NOT_ASCII */ + + bytes_already_read += status; + HTReadProgress(bytes_already_read, (off_t) 0); + +#ifdef UCX /* UCX returns -1 on EOF */ + if (status == 0 || status == -1) +#else + if (status == 0) +#endif + { + break; + } + line_buffer[length + status] = 0; + + if (line_buffer) { + FREE(line_kept_clean); + line_kept_clean = (char *) malloc((unsigned) buffer_length * + sizeof(char)); + + if (line_kept_clean == NULL) + outofmem(__FILE__, "HTLoadHTTP"); + MemCpy(line_kept_clean, line_buffer, buffer_length); +#ifdef SH_EX /* FIX BUG by kaz@maczuka.hitachi.ibaraki.jp */ + real_length_of_line = length + status; +#endif + } + + eol = strchr(line_buffer + length, LF); + /* Do we *really* want to do this? */ + if (eol && eol != line_buffer && *(eol - 1) == CR) + *(eol - 1) = ' '; + + length = length + status; + + /* Do we really want to do *this*? */ + if (eol) + *eol = 0; /* Terminate the line */ + } + /* All we need is the first line of the response. If it's a HTTP/1.0 + * response, then the first line will be absurdly short and therefore + * we can safely gate the number of bytes read through this code (as + * opposed to below) to ~1000. + * + * Well, let's try 100. + */ + while (!eol && !end_of_file && bytes_already_read < 100); + } /* Scope of loop variables */ + + /* save total length, in case we decide later to show it all - kw */ + rawlength = length; + + /* We now have a terminated unfolded line. Parse it. + * -------------------------------------------------- + */ + CTRACE((tfp, "HTTP: Rx: %s\n", line_buffer)); + + /* + * Kludge to work with old buggy servers and the VMS Help gateway. They + * can't handle the third word, so we try again without it. + */ + if (extensions && /* Old buggy server or Help gateway? */ + (0 == StrNCmp(line_buffer, "<TITLE>Bad File Request</TITLE>", 31) || + 0 == StrNCmp(line_buffer, "Address should begin with", 25) || + 0 == StrNCmp(line_buffer, "<TITLE>Help ", 12) || + 0 == strcmp(line_buffer, + "Document address invalid or access not authorised"))) { + FREE(line_buffer); + FREE(line_kept_clean); + extensions = NO; + already_retrying = TRUE; + CTRACE((tfp, "HTTP: close socket %d to retry with HTTP0\n", s)); + HTTP_NETCLOSE(s, handle); + /* print a progress message */ + _HTProgress(RETRYING_AS_HTTP0); + goto try_again; + } { + int fields; + char server_version[VERSION_LENGTH + 1]; + + server_version[0] = 0; + + fields = sscanf(line_buffer, "%20s %d", + server_version, + &server_status); + + CTRACE((tfp, "HTTP: Scanned %d fields from line_buffer\n", fields)); + + if (http_error_file) { /* Make the status code externally available */ + FILE *error_file; + +#ifdef SERVER_STATUS_ONLY + error_file = fopen(http_error_file, TXT_W); + if (error_file) { /* Managed to open the file */ + fprintf(error_file, "error=%d\n", server_status); + fclose(error_file); + } +#else + error_file = fopen(http_error_file, TXT_A); + if (error_file) { /* Managed to open the file */ + fprintf(error_file, " URL=%s (%s)\n", url, METHOD); + fprintf(error_file, "STATUS=%s\n", line_buffer); + fclose(error_file); + } +#endif /* SERVER_STATUS_ONLY */ + } + + /* + * Rule out a non-HTTP/1.n reply as best we can. + */ + if (fields < 2 || !server_version[0] || server_version[0] != 'H' || + server_version[1] != 'T' || server_version[2] != 'T' || + server_version[3] != 'P' || server_version[4] != '/' || + server_version[6] != '.') { + /* + * Ugh! An HTTP0 reply, + */ + HTAtom *encoding; + + CTRACE((tfp, "--- Talking HTTP0.\n")); + + format_in = HTFileFormat(url, &encoding, NULL); + /* + * Treat all plain text as HTML. This sucks but its the only + * solution without without looking at content. + */ + if (!StrNCmp(HTAtom_name(format_in), "text/plain", 10)) { + CTRACE((tfp, "HTTP: format_in being changed to text/HTML\n")); + format_in = WWW_HTML; + } + if (!IsUnityEnc(encoding)) { + /* + * Change the format to that for "www/compressed". + */ + CTRACE((tfp, "HTTP: format_in is '%s',\n", HTAtom_name(format_in))); + StrAllocCopy(anAnchor->content_type, HTAtom_name(format_in)); + StrAllocCopy(anAnchor->content_encoding, HTAtom_name(encoding)); + format_in = HTAtom_for("www/compressed"); + CTRACE((tfp, " Treating as '%s' with encoding '%s'\n", + "www/compressed", HTAtom_name(encoding))); + } + + start_of_data = line_kept_clean; + } else { + /* + * Set up to decode full HTTP/1.n response. - FM + */ + format_in = HTAtom_for("www/mime"); + CTRACE((tfp, "--- Talking HTTP1.\n")); + + /* + * We set start_of_data to "" when !eol here because there will be + * a put_block done below; we do *not* use the value of + * start_of_data (as a pointer) in the computation of length (or + * anything else) when !eol. Otherwise, set the value of length to + * what we have beyond eol (i.e., beyond the status line). - FM + */ + if (eol != 0) { + start_of_data = (eol + 1); + } else { + start_of_data = empty; + } + length = (eol + ? length - (int) (start_of_data - line_buffer) + : 0); + + /* + * Trim trailing spaces in line_buffer so that we can use it in + * messages which include the status line. - FM + */ + while (line_buffer[strlen(line_buffer) - 1] == ' ') + line_buffer[strlen(line_buffer) - 1] = '\0'; + + /* + * Take appropriate actions based on the status. - FM + */ + switch (server_status / 100) { + case 1: + /* + * HTTP/1.1 Informational statuses. + * 100 Continue. + * 101 Switching Protocols. + * > 101 is unknown. + * We should never get these, and they have only the status + * line and possibly other headers, so we'll deal with them by + * showing the full header to the user as text/plain. - FM + */ + HTAlert(gettext("Got unexpected Informational Status.")); + do_head = TRUE; + break; + + case 2: + /* + * Good: Got MIME object! (Successful) - FM + */ + if (do_head) { + /* + * If HEAD was requested, show headers (and possibly bogus + * body) for all 2xx status codes as text/plain - KW + */ + HTProgress(line_buffer); + break; + } + switch (server_status) { + case 204: + /* + * No Content. + */ + HTAlert(line_buffer); + HTTP_NETCLOSE(s, handle); + HTNoDataOK = 1; + status = HT_NO_DATA; + goto clean_up; + + case 205: + /* + * Reset Content. The server has fulfilled the request but + * nothing is returned and we should reset any form + * content. We'll instruct the user to do that, and + * restore the current document. - FM + */ + HTAlert(gettext("Request fulfilled. Reset Content.")); + HTTP_NETCLOSE(s, handle); + status = HT_NO_DATA; + goto clean_up; + + case 206: + /* + * Partial Content. We didn't send a Range so something + * went wrong somewhere. Show the status message and + * restore the current document. - FM + */ + HTAlert(line_buffer); + HTTP_NETCLOSE(s, handle); + status = HT_NO_DATA; + goto clean_up; + + default: + /* + * 200 OK. + * 201 Created. + * 202 Accepted. + * 203 Non-Authoritative Information. + * > 206 is unknown. + * All should return something to display. + */ +#if defined(USE_SSL) && !defined(DISABLE_NEWS) + if (do_connect) { + CTRACE((tfp, + "HTTP: Proxy tunnel to '%s' established.\n", + connect_host)); + do_connect = FALSE; + url = connect_url; + FREE(line_buffer); + FREE(line_kept_clean); + if (!StrNCmp(connect_url, "snews", 5)) { + CTRACE((tfp, + " Will attempt handshake and snews connection.\n")); + status = HTNewsProxyConnect(s, url, anAnchor, + format_out, sink); + goto done; + } + did_connect = TRUE; + already_retrying = TRUE; + eol = 0; + length = 0; + doing_redirect = FALSE; + permanent_redirection = FALSE; + target = NULL; + CTRACE((tfp, + " Will attempt handshake and resubmit headers.\n")); + goto use_tunnel; + } +#endif /* USE_SSL */ + HTProgress(line_buffer); + } /* case 2 switch */ + break; + + case 3: + /* + * Various forms of Redirection. - FM + * 300 Multiple Choices. + * 301 Moved Permanently. + * 302 Found (temporary; we can, and do, use GET). + * 303 See Other (temporary; always use GET). + * 304 Not Modified. + * 305 Use Proxy. + * 306 Set Proxy. + * 307 Temporary Redirect with method retained. + * > 308 is unknown. + */ + if (no_url_redirection || do_head || keep_mime_headers) { + /* + * If any of these flags are set, we do not redirect, but + * instead show what was returned to the user as + * text/plain. - FM + */ + HTProgress(line_buffer); + break; + } + + if (server_status == 300) { /* Multiple Choices */ + /* + * For client driven content negotiation. The server + * should be sending some way for the user-agent to make a + * selection, so we'll show the user whatever the server + * returns. There might be a Location: header with the + * server's preference present, but the choice should be up + * to the user, someday based on an Alternates: header, + * and a body always should be present with descriptions + * and links for the choices (i.e., we use the latter, for + * now). - FM + */ + HTAlert(line_buffer); + if (traversal) { + HTTP_NETCLOSE(s, handle); + status = -1; + goto clean_up; + } + if (!dump_output_immediately && + format_out == HTAtom_for("www/download")) { + /* + * Convert a download request to a presentation request + * for interactive users. - FM + */ + format_out = WWW_PRESENT; + } + break; + } + + if (server_status == 304) { /* Not Modified */ + /* + * We didn't send an "If-Modified-Since" header, so this + * status is inappropriate. We'll deal with it by showing + * the full header to the user as text/plain. - FM + */ + HTAlert(gettext("Got unexpected 304 Not Modified status.")); + do_head = TRUE; + break; + } + + if (server_status == 305 || + server_status == 306 || + server_status > 307) { + /* + * Show user the content, if any, for 305, 306, or unknown + * status. - FM + */ + HTAlert(line_buffer); + if (traversal) { + HTTP_NETCLOSE(s, handle); + status = -1; + goto clean_up; + } + if (!dump_output_immediately && + format_out == HTAtom_for("www/download")) { + /* + * Convert a download request to a presentation request + * for interactive users. - FM + */ + format_out = WWW_PRESENT; + } + break; + } + + /* + * We do not load the file, but read the headers for the + * "Location:", check out that redirecting_url and if it's + * acceptible (e.g., not a telnet URL when we have that + * disabled), initiate a new fetch. If that's another + * redirecting_url, we'll repeat the checks, and fetch + * initiations if acceptible, until we reach the actual URL, or + * the redirection limit set in HTAccess.c is exceeded. If the + * status was 301 indicating that the relocation is permanent, + * we set the permanent_redirection flag to make it permanent + * for the current anchor tree (i.e., will persist until the + * tree is freed or the client exits). If the redirection + * would include POST content, we seek confirmation from an + * interactive user, with option to use 303 for 301 (but not + * for 307), and otherwise refuse the redirection. We also + * don't allow permanent redirection if we keep POST content. + * If we don't find the Location header or it's value is + * zero-length, we display whatever the server returned, and + * the user should RELOAD that to try again, or make a + * selection from it if it contains links, or Left-Arrow to the + * previous document. - FM + */ + { + if ((dump_output_immediately || traversal) && + do_post && + server_status != 303 && + server_status != 302 && + server_status != 301) { + /* + * Don't redirect POST content without approval from an + * interactive user. - FM + */ + HTTP_NETCLOSE(s, handle); + status = -1; + HTAlert(gettext("Redirection of POST content requires user approval.")); + if (traversal) + HTProgress(line_buffer); + goto clean_up; + } + + HTProgress(line_buffer); + if (server_status == 301) { /* Moved Permanently */ + if (do_post) { + /* + * Don't make the redirection permanent if we have + * POST content. - FM + */ + CTRACE((tfp, + "HTTP: Have POST content. Treating 301 (Permanent) as Temporary.\n")); + HTAlert(gettext("Have POST content. Treating Permanent Redirection as Temporary.\n")); + } else { + permanent_redirection = TRUE; + } + } + doing_redirect = TRUE; + + break; + } + + case 4: + /* + * "I think I goofed!" (Client Error) - FM + */ + switch (server_status) { + case 401: /* Unauthorized */ + /* + * Authorization for origin server required. If show_401 + * is set, proceed to showing the 401 body. Otherwise, if + * we can set up authorization based on the + * WWW-Authenticate header, and the user provides a + * username and password, try again. Otherwise, check + * whether to show the 401 body or restore the current + * document - FM + */ + if (show_401) + break; + if (HTAA_shouldRetryWithAuth(start_of_data, (size_t) + length, s, NO)) { + + HTTP_NETCLOSE(s, handle); + if (dump_output_immediately && !authentication_info[0]) { + fprintf(stderr, + "HTTP: Access authorization required.\n"); + fprintf(stderr, + " Use the -auth=id:pw parameter.\n"); + status = HT_NO_DATA; + goto clean_up; + } + + CTRACE((tfp, "%s %d %s\n", + "HTTP: close socket", s, + "to retry with Access Authorization")); + + _HTProgress(gettext("Retrying with access authorization information.")); + FREE(line_buffer); + FREE(line_kept_clean); +#ifdef USE_SSL + if (using_proxy && !StrNCmp(url, "https://", 8)) { + url = arg; + do_connect = TRUE; + did_connect = FALSE; + } +#endif /* USE_SSL */ + goto try_again; + } else if (!(traversal || dump_output_immediately) && + HTConfirm(gettext("Show the 401 message body?"))) { + break; + } else { + if (traversal || dump_output_immediately) + HTAlert(FAILED_RETRY_WITH_AUTH); + HTTP_NETCLOSE(s, handle); + status = -1; + goto clean_up; + } + + case 407: + /* + * Authorization for proxy server required. If we are not + * in fact using a proxy, or show_407 is set, proceed to + * showing the 407 body. Otherwise, if we can set up + * authorization based on the Proxy-Authenticate header, + * and the user provides a username and password, try + * again. Otherwise, check whether to show the 401 body or + * restore the current document. - FM & AJL + */ + if (!using_proxy || show_407) + break; + if (HTAA_shouldRetryWithAuth(start_of_data, (size_t) + length, s, YES)) { + + HTTP_NETCLOSE(s, handle); + if (dump_output_immediately && !proxyauth_info[0]) { + fprintf(stderr, + "HTTP: Proxy authorization required.\n"); + fprintf(stderr, + " Use the -pauth=id:pw parameter.\n"); + status = HT_NO_DATA; + goto clean_up; + } + + CTRACE((tfp, "%s %d %s\n", + "HTTP: close socket", s, + "to retry with Proxy Authorization")); + + _HTProgress(HTTP_RETRY_WITH_PROXY); + FREE(line_buffer); + FREE(line_kept_clean); + goto try_again; + } else if (!(traversal || dump_output_immediately) && + HTConfirm(gettext("Show the 407 message body?"))) { + if (!dump_output_immediately && + format_out == HTAtom_for("www/download")) { + /* + * Convert a download request to a presentation + * request for interactive users. - FM + */ + format_out = WWW_PRESENT; + } + break; + } else { + if (traversal || dump_output_immediately) + HTAlert(FAILED_RETRY_WITH_PROXY); + HTTP_NETCLOSE(s, handle); + status = -1; + goto clean_up; + } + + case 408: + /* + * Request Timeout. Show the status message and restore + * the current document. - FM + */ + HTAlert(line_buffer); + HTTP_NETCLOSE(s, handle); + status = HT_NO_DATA; + goto done; + + default: + /* + * 400 Bad Request. + * 402 Payment Required. + * 403 Forbidden. + * 404 Not Found. + * 405 Method Not Allowed. + * 406 Not Acceptable. + * 409 Conflict. + * 410 Gone. + * 411 Length Required. + * 412 Precondition Failed. + * 413 Request Entity Too Large. + * 414 Request-URI Too Long. + * 415 Unsupported Media Type. + * 416 List Response (for content negotiation). + * > 416 is unknown. + * Show the status message, and display the returned text + * if we are not doing a traversal. - FM + */ + HTAlert(line_buffer); + if (traversal) { + HTTP_NETCLOSE(s, handle); + status = -1; + goto clean_up; + } + if (!dump_output_immediately && + format_out == HTAtom_for("www/download")) { + /* + * Convert a download request to a presentation request + * for interactive users. - FM + */ + format_out = WWW_PRESENT; + } + break; + } /* case 4 switch */ + break; + + case 5: + /* + * "I think YOU goofed!" (server error) + * 500 Internal Server Error + * 501 Not Implemented + * 502 Bad Gateway + * 503 Service Unavailable + * 504 Gateway Timeout + * 505 HTTP Version Not Supported + * > 505 is unknown. + * Should always include a message, which we always should + * display. - FM + */ + HTAlert(line_buffer); + if (traversal) { + HTTP_NETCLOSE(s, handle); + status = -1; + goto clean_up; + } + if (!dump_output_immediately && + format_out == HTAtom_for("www/download")) { + /* + * Convert a download request to a presentation request for + * interactive users. - FM + */ + format_out = WWW_PRESENT; + } + break; + + default: + /* + * Bad or unknown server_status number. Take a chance and hope + * there is something to display. - FM + */ + HTAlert(gettext("Unknown status reply from server!")); + HTAlert(line_buffer); + if (traversal) { + HTTP_NETCLOSE(s, handle); + status = -1; + goto clean_up; + } + if (!dump_output_immediately && + format_out == HTAtom_for("www/download")) { + /* + * Convert a download request to a presentation request for + * interactive users. - FM + */ + format_out = WWW_PRESENT; + } + break; + } /* Switch on server_status/100 */ + + } /* Full HTTP reply */ + } /* scope of fields */ + + /* + * The user may have pressed the 'z'ap key during the pause caused by one + * of the HTAlerts above if the server reported an error, to skip loading + * of the error response page. Checking here before setting up the stream + * stack and feeding it data avoids doing unnecessary work, it also can + * avoid unnecessarily pushing a loaded document out of the cache to make + * room for the unwanted error page. - kw + */ + if (HTCheckForInterrupt()) { + HTTP_NETCLOSE(s, handle); + if (doing_redirect) { + /* + * Impatient user. - FM + */ + CTRACE((tfp, "HTTP: Interrupted followup read.\n")); + _HTProgress(CONNECTION_INTERRUPTED); + } + status = HT_INTERRUPTED; + goto clean_up; + } + /* + * Set up the stream stack to handle the body of the message. + */ + if (do_head || keep_mime_headers) { + /* + * It was a HEAD request, or we want the headers and source. + */ + start_of_data = line_kept_clean; +#ifdef SH_EX /* FIX BUG by kaz@maczuka.hitachi.ibaraki.jp */ +/* GIF file contains \0, so strlen does not return the data length */ + length = real_length_of_line; +#else + length = rawlength; +#endif + format_in = HTAtom_for("text/plain"); + + } else if (doing_redirect) { + + format_in = HTAtom_for("message/x-http-redirection"); + StrAllocCopy(anAnchor->content_type, HTAtom_name(format_in)); + if (traversal) { + format_out = WWW_DEBUG; + if (!sink) + sink = HTErrorStream(); + } else if (!dump_output_immediately && + format_out == HTAtom_for("www/download")) { + /* + * Convert a download request to a presentation request for + * interactive users. - FM + */ + format_out = WWW_PRESENT; + } + } + + target = HTStreamStack(format_in, + format_out, + sink, anAnchor); + + if (target == NULL) { + char *buffer = NULL; + + HTTP_NETCLOSE(s, handle); + HTSprintf0(&buffer, CANNOT_CONVERT_I_TO_O, + HTAtom_name(format_in), HTAtom_name(format_out)); + _HTProgress(buffer); + FREE(buffer); + status = -1; + goto clean_up; + } + + /* + * Recycle the first chunk of data, in all cases. + */ + (*target->isa->put_block) (target, start_of_data, length); + + /* + * Go pull the bulk of the data down. + */ + rv = HTCopy(anAnchor, s, (void *) handle, target); + + /* + * If we get here with doing_redirect set, it means that we were looking + * for a Location header. We either have got it now in redirecting_url - + * in that case the stream should not have loaded any data. Or we didn't + * get it, in that case the stream may have presented the message body + * normally. - kw + */ + + if (rv == -1) { + /* + * Intentional interrupt before data were received, not an error + */ + if (doing_redirect && traversal) + status = -1; + else + status = HT_INTERRUPTED; + HTTP_NETCLOSE(s, handle); + goto clean_up; + } + + if (rv == -2) { + /* + * Aw hell, a REAL error, maybe cuz it's a dumb HTTP0 server + */ + (*target->isa->_abort) (target, NULL); + if (doing_redirect && redirecting_url) { + /* + * Got a location before the error occurred? Then consider it an + * interrupt but proceed below as normal. - kw + */ + /* do nothing here */ + } else { + HTTP_NETCLOSE(s, handle); + if (!doing_redirect && !already_retrying && !do_post) { + CTRACE((tfp, "HTTP: Trying again with HTTP0 request.\n")); + /* + * May as well consider it an interrupt -- right? + */ + FREE(line_buffer); + FREE(line_kept_clean); + extensions = NO; + already_retrying = TRUE; + _HTProgress(RETRYING_AS_HTTP0); + goto try_again; + } else { + status = HT_NOT_LOADED; + goto clean_up; + } + } + } + + /* + * Free if complete transmission (socket was closed before return). Close + * socket if partial transmission (was freed on abort). + */ + if (rv != HT_INTERRUPTED && rv != -2) { + (*target->isa->_free) (target); + } else { + HTTP_NETCLOSE(s, handle); + } + + if (doing_redirect) { + if (redirecting_url) { + /* + * Set up for checking redirecting_url in LYGetFile.c for + * restrictions before we seek the document at that Location. - FM + */ + CTRACE((tfp, "HTTP: Picked up location '%s'\n", + redirecting_url)); + if (rv == HT_INTERRUPTED) { + /* + * Intentional interrupt after data were received, not an error + * (probably). We take it as a user request to abandon the + * redirection chain. + * + * This could reasonably be changed (by just removing this + * block), it would make sense if there are redirecting + * resources that "hang" after sending the headers. - kw + */ + FREE(redirecting_url); + CTRACE((tfp, "HTTP: Interrupted followup read.\n")); + status = HT_INTERRUPTED; + goto clean_up; + } + HTProgress(line_buffer); + if (server_status == 305) { /* Use Proxy */ + /* + * Make sure the proxy field ends with a slash. - FM + */ + if (redirecting_url[strlen(redirecting_url) - 1] + != '/') + StrAllocCat(redirecting_url, "/"); + /* + * Append our URL. - FM + */ + StrAllocCat(redirecting_url, anAnchor->address); + CTRACE((tfp, "HTTP: Proxy URL is '%s'\n", + redirecting_url)); + } + if (!do_post || + server_status == 303 || + server_status == 302) { + /* + * We don't have POST content (nor support PUT or DELETE), or + * the status is "See Other" or "General Redirection" and we + * can convert to GET, so go back and check out the new URL. - + * FM + */ + status = HT_REDIRECTING; + goto clean_up; + } + /* + * Make sure the user wants to redirect the POST content, or treat + * as GET - FM & DK + */ + switch (HTConfirmPostRedirect(redirecting_url, + server_status)) { + /* + * User failed to confirm. Abort the fetch. + */ + case 0: + FREE(redirecting_url); + status = HT_NO_DATA; + goto clean_up; + + /* + * User wants to treat as GET with no content. Go back to + * check out the URL. + */ + case 303: + break; + + /* + * Set the flag to retain the POST content and go back to check + * out the URL. - FM + */ + default: + redirect_post_content = TRUE; + } + + /* Lou's old comment: - FM */ + /* OK, now we've got the redirection URL temporarily stored + in external variable redirecting_url, exported from HTMIME.c, + since there's no straightforward way to do this in the library + currently. Do the right thing. */ + + status = HT_REDIRECTING; + + } else { + status = traversal ? -1 : HT_LOADED; + } + + } else { + /* + * If any data were received, treat as a complete transmission + */ + status = HT_LOADED; + } + + /* + * Clean up + */ + clean_up: + FREE(line_buffer); + FREE(line_kept_clean); + + done: + /* + * Clear out on exit, just in case. + */ + reloading = FALSE; +#ifdef USE_SSL + FREE(connect_host); + if (handle) { + SSL_free(handle); + SSL_handle = handle = NULL; + } +#endif /* USE_SSL */ + dump_server_status = server_status; + return status; +} + +/* Protocol descriptor +*/ +#ifdef GLOBALDEF_IS_MACRO +#define _HTTP_C_GLOBALDEF_1_INIT { "http", HTLoadHTTP, 0} +GLOBALDEF(HTProtocol, HTTP, _HTTP_C_GLOBALDEF_1_INIT); +#define _HTTP_C_GLOBALDEF_2_INIT { "https", HTLoadHTTP, 0} +GLOBALDEF(HTProtocol, HTTPS, _HTTP_C_GLOBALDEF_2_INIT); +#else +GLOBALDEF HTProtocol HTTP = +{"http", HTLoadHTTP, 0}; +GLOBALDEF HTProtocol HTTPS = +{"https", HTLoadHTTP, 0}; +#endif /* GLOBALDEF_IS_MACRO */ diff --git a/WWW/Library/Implementation/HTTP.h b/WWW/Library/Implementation/HTTP.h new file mode 100644 index 00000000..f10c2fce --- /dev/null +++ b/WWW/Library/Implementation/HTTP.h @@ -0,0 +1,41 @@ +/* + * $LynxId: HTTP.h,v 1.10 2009/04/07 22:44:50 tom Exp $ + * + * /Net/dxcern/userd/timbl/hypertext/WWW/Library/Implementation/HTTP.html + * HYPERTEXT TRANFER PROTOCOL + */ +#ifndef HTTP_H +#define HTTP_H + +#include <HTAccess.h> + +#ifdef __cplusplus +extern "C" { +#endif +#ifdef GLOBALREF_IS_MACRO + extern GLOBALREF (HTProtocol, HTTP); + extern GLOBALREF (HTProtocol, HTTPS); + +#else + GLOBALREF HTProtocol HTTP; + GLOBALREF HTProtocol HTTPS; +#endif /* GLOBALREF_IS_MACRO */ + +#define URL_GET_METHOD 1 +#define URL_POST_METHOD 2 +#define URL_MAIL_METHOD 3 + + extern int ws_read_per_sec; + extern BOOLEAN reloading; + extern char *redirecting_url; + extern BOOL permanent_redirection; + extern BOOL redirect_post_content; + +#ifdef USE_SSL + extern SSL *SSL_handle; +#endif + +#ifdef __cplusplus +} +#endif +#endif /* HTTP_H */ diff --git a/WWW/Library/Implementation/HTTelnet.c b/WWW/Library/Implementation/HTTelnet.c new file mode 100644 index 00000000..f525d5c7 --- /dev/null +++ b/WWW/Library/Implementation/HTTelnet.c @@ -0,0 +1,553 @@ +/* + * $LynxId: HTTelnet.c,v 1.39 2009/11/21 17:05:33 Bela.Lubkin Exp $ + * + * Telnet Access, Rlogin, etc HTTelnet.c + * ========================== + * + * Authors + * TBL Tim Berners-Lee timbl@info.cern.ch + * JFG Jean-Francois Groff jgh@next.com + * DD Denis DeLaRoca (310) 825-4580 <CSP1DWD@mvs.oac.ucla.edu> + * History + * 8 Jun 92 Telnet hopping prohibited as telnet is not secure (TBL) + * 26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. (JFG) + * 6 Oct 92 Moved HTClientHost and logfile into here. (TBL) + * 17 Dec 92 Tn3270 added, bug fix. (DD) + * 2 Feb 93 Split from HTAccess.c. Registration.(TBL) + */ + +#include <HTUtils.h> +#include <LYUtils.h> + +/* Implements: +*/ +#include <HTTelnet.h> + +#include <HTParse.h> +#include <HTAnchor.h> +#include <HTTP.h> +#include <HTFile.h> + +#include <HTTCP.h> +#include <HText.h> + +#include <HTAccess.h> +#include <HTAlert.h> + +#include <LYStrings.h> +#include <LYClean.h> +#include <LYLeaks.h> + +#ifdef __GNUC__ +static void do_system(char *) GCC_UNUSED; +#endif + +static void do_system(char *command) +{ + if (non_empty(command)) { + CTRACE((tfp, "HTTelnet: Command is: %s\n\n", command)); + LYSystem(command); + } + FREE(command); +} + +/* Telnet or "rlogin" access + * ------------------------- + */ +static int remote_session(char *acc_method, char *host) +{ + const char *program; + char *user = host; + char *password = NULL; + char *cp; + char *hostname; + char *port; + char *command = NULL; + enum _login_protocol { + telnet, + rlogin, + tn3270 + } login_protocol = + strcmp(acc_method, "rlogin") == 0 ? rlogin : + strcmp(acc_method, "tn3270") == 0 ? tn3270 : telnet; + + /* + * Modified to allow for odd chars in a username only if exists. + * 05-28-94 Lynx 2-3-1 Garrett Arch Blythe + */ + /* prevent telnet://hostname;rm -rf * URL's (VERY BAD) + * *cp=0; // terminate at any ;,<,>,`,|,",' or space or return + * or tab to prevent security hole + */ + for (cp = (strchr(host, '@') ? strchr(host, '@') : host); *cp != '\0'; + cp++) { + if (!isalnum(UCH(*cp)) && *cp != '_' && *cp != '-' && + *cp != ':' && *cp != '.' && *cp != '@') { + *cp = '\0'; + break; + } + } + + hostname = strchr(host, '@'); + + if (hostname) { + *hostname++ = '\0'; /* Split */ + } else { + hostname = host; + user = NULL; /* No user specified */ + } + + port = strchr(hostname, ':'); + if (port) + *port++ = '\0'; /* Split */ + + if (!hostname || *hostname == '\0') { + CTRACE((tfp, "HTTelnet: No host specified!\n")); + return HT_NO_DATA; + } else if (!valid_hostname(hostname)) { + char *prefix = NULL; + char *line = NULL; + + CTRACE((tfp, "HTTelnet: Invalid hostname %s!\n", host)); + HTSprintf0(&prefix, + gettext("remote %s session:"), acc_method); + HTSprintf0(&line, + gettext("Invalid hostname %s"), host); + HTAlwaysAlert(prefix, line); + FREE(prefix); + FREE(line); + return HT_NO_DATA; + } + + if (user) { + password = strchr(user, ':'); + if (password) { + *password++ = '\0'; + } + } + + /* If the person is already telnetting etc, forbid hopping */ + /* This is a security precaution, for us and remote site */ + + if (HTSecure) { + +#ifdef TELNETHOPPER_MAIL + HTSprintf0(&command, + "finger @%s | mail -s \"**telnethopper %s\" tbl@dxcern.cern.ch", + HTClientHost, HTClientHost); + do_system(command); +#endif + printf("\n\nSorry, but the service you have selected is one\n"); + printf("to which you have to log in. If you were running www\n"); + printf("on your own computer, you would be automatically connected.\n"); + printf("For security reasons, this is not allowed when\n"); + printf("you log in to this information service remotely.\n\n"); + + printf("You can manually connect to this service using %s\n", + acc_method); + printf("to host %s", hostname); + if (user) + printf(", user name %s", user); + if (password) + printf(", password %s", password); + if (port) + printf(", port %s", port); + printf(".\n\n"); + return HT_NO_DATA; + } + + /* Not all telnet servers get it even if user name is specified so we + * always tell the guy what to log in as. + */ + if (user && login_protocol != rlogin) + printf("When you are connected, log in as: %s\n", user); + if (password && login_protocol != rlogin) + printf(" The password is: %s\n", password); + fflush(stdout); + +/* + * NeXTSTEP is the implied version of the NeXT operating system. + * You may need to define this yourself. + */ +#if !defined(TELNET_DONE) && (defined(NeXT) && defined(NeXTSTEP) && NeXTSTEP<=20100) +#define FMT_TELNET "%s%s%s %s %s" + + if ((program = HTGetProgramPath(ppTELNET)) != NULL) { + HTAddParam(&command, FMT_TELNET, 1, program); + HTOptParam(&command, FMT_TELNET, 2, user ? " -l " : ""); + HTAddParam(&command, FMT_TELNET, 3, user); + HTAddParam(&command, FMT_TELNET, 4, hostname); + HTAddParam(&command, FMT_TELNET, 5, port); + HTEndParam(&command, FMT_TELNET, 5); + } + do_system(command); +#define TELNET_DONE +#endif + +/* Most unix machines support username only with rlogin */ +#if !defined(TELNET_DONE) && (defined(UNIX) || defined(DOSPATH) || defined(__CYGWIN__)) + +#define FMT_RLOGIN "%s %s%s%s" +#define FMT_TN3270 "%s %s %s" +#define FMT_TELNET "%s %s %s" + + switch (login_protocol) { + case rlogin: + if ((program = HTGetProgramPath(ppRLOGIN)) != NULL) { + HTAddParam(&command, FMT_RLOGIN, 1, program); + HTAddParam(&command, FMT_RLOGIN, 2, hostname); + HTOptParam(&command, FMT_RLOGIN, 3, user ? " -l " : ""); + HTAddParam(&command, FMT_RLOGIN, 4, user); + HTEndParam(&command, FMT_RLOGIN, 4); + } + break; + + case tn3270: + if ((program = HTGetProgramPath(ppTN3270)) != NULL) { + HTAddParam(&command, FMT_TN3270, 1, program); + HTAddParam(&command, FMT_TN3270, 2, hostname); + HTAddParam(&command, FMT_TN3270, 3, port); + HTEndParam(&command, FMT_TN3270, 3); + } + break; + + case telnet: + if ((program = HTGetProgramPath(ppTELNET)) != NULL) { + HTAddParam(&command, FMT_TELNET, 1, program); + HTAddParam(&command, FMT_TELNET, 2, hostname); + HTAddParam(&command, FMT_TELNET, 3, port); + HTEndParam(&command, FMT_TELNET, 3); + } + break; + } + + LYSystem(command); +#define TELNET_DONE +#endif /* unix */ + +/* VMS varieties */ +#if !defined(TELNET_DONE) && (defined(MULTINET)) + if (login_protocol == rlogin) { + HTSprintf0(&command, "RLOGIN%s%s%s%s%s %s", /*lm 930713 */ + user ? "/USERNAME=\"" : "", + NonNull(user), + user ? "\"" : "", + port ? "/PORT=" : "", + NonNull(port), + hostname); + + } else if (login_protocol == tn3270) { + HTSprintf0(&command, "TELNET/TN3270 %s%s %s", + port ? "/PORT=" : "", + NonNull(port), + hostname); + + } else { /* TELNET */ + HTSprintf0(&command, "TELNET %s%s %s", + port ? "/PORT=" : "", + NonNull(port), + hostname); + } + + do_system(command); +#define TELNET_DONE +#endif /* MULTINET */ + +#if !defined(TELNET_DONE) && defined(WIN_TCP) + if ((cp = getenv("WINTCP_COMMAND_STYLE")) != NULL && + 0 == strncasecomp(cp, "VMS", 3)) { /* VMS command syntax */ + if (login_protocol == rlogin) { + HTSprintf0(&command, "RLOGIN%s%s%s%s%s %s", /*lm 930713 */ + user ? "/USERNAME=\"" : "", + NonNull(user), + user ? "\"" : "", + port ? "/PORT=" : "", + NonNull(port), + hostname); + + } else if (login_protocol == tn3270) { + HTSprintf0(&command, "TELNET/TN3270 %s%s %s", + port ? "/PORT=" : "", + NonNull(port), + hostname); + + } else { /* TELNET */ + HTSprintf0(&command, "TELNET %s%s %s", + port ? "/PORT=" : "", + NonNull(port), + hostname); + } + + } else { /* UNIX command syntax */ + if (login_protocol == rlogin) { + HTSprintf0(&command, "RLOGIN %s%s%s%s%s", + hostname, + user ? " -l " : "", + user ? "\"" : "", + NonNull(user), + user ? "\"" : ""); + + } else if (login_protocol == tn3270) { + HTSprintf0(&command, "TN3270 %s %s", + hostname, + NonNull(port)); + + } else { /* TELNET */ + HTSprintf0(&command, "TELNET %s %s", + hostname, + NonNull(port)); + } + } + + do_system(command); +#define TELNET_DONE +#endif /* WIN_TCP */ + +#if !defined(TELNET_DONE) && defined(UCX) + if (login_protocol == rlogin) { + HTSprintf0(&command, "RLOGIN%s%s%s %s %s", + user ? "/USERNAME=\"" : "", + NonNull(user), + user ? "\"" : "", + hostname, + NonNull(port)); + + } else if (login_protocol == tn3270) { + HTSprintf0(&command, "TN3270 %s %s", + hostname, + NonNull(port)); + + } else { /* TELNET */ + HTSprintf0(&command, "TELNET %s %s", + hostname, + NonNull(port)); + } + + do_system(command); +#define TELNET_DONE +#endif /* UCX */ + +#if !defined(TELNET_DONE) && defined(CMU_TCP) + if (login_protocol == telnet) { + HTSprintf0(&command, "TELNET %s%s %s", + port ? "/PORT=" : "", + NonNull(port), + hostname); + do_system(command); + } else { + printf("\nSorry, this browser was compiled without the %s access option.\n", + acc_method); + printf("\nPress <return> to return to Lynx."); + LYgetch(); + HadVMSInterrupt = FALSE; + } +#define TELNET_DONE +#endif /* CMU_TCP */ + +#if !defined(TELNET_DONE) && defined(SOCKETSHR_TCP) + if (getenv("MULTINET_SOCKET_LIBRARY") != NULL) { + if (login_protocol == rlogin) { + HTSprintf0(&command, "MULTINET RLOGIN%s%s%s%s %s", /*lm 930713 */ + user ? "/USERNAME=" : "", + NonNull(user), + port ? "/PORT=" : "", + NonNull(port), + hostname); + + } else if (login_protocol == tn3270) { + HTSprintf0(&command, "MULTINET TELNET/TN3270 %s%s %s", + port ? "/PORT=" : "", + NonNull(port), + hostname); + + } else { /* TELNET */ + HTSprintf0(&command, "MULTINET TELNET %s%s %s", + port ? "/PORT=" : "", + NonNull(port), + hostname); + } + + do_system(command); + return HT_NO_DATA; /* Ok - it was done but no data */ + } else if ((cp = getenv("WINTCP_COMMAND_STYLE")) != NULL) { + if (0 == strncasecomp(cp, "VMS", 3)) { /* VMS command syntax */ + if (login_protocol == rlogin) { + HTSprintf0(&command, "RLOGIN%s%s%s%s %s", /*lm 930713 */ + user ? "/USERNAME=" : "", + NonNull(user), + port ? "/PORT=" : "", + NonNull(port), + hostname); + } else if (login_protocol == tn3270) { + HTSprintf0(&command, "TELNET/TN3270 %s%s %s", + port ? "/PORT=" : "", + NonNull(port), + hostname); + } else { /* TELNET */ + HTSprintf0(&command, "TELNET %s%s %s", + port ? "/PORT=" : "", + NonNull(port), + hostname); + } + } else { /* UNIX command syntax */ + if (login_protocol == rlogin) { + HTSprintf0(&command, "RLOGIN %s%s%s", + hostname, + user ? " -l " : "", + NonNull(user)); + } else if (login_protocol == tn3270) { + HTSprintf0(&command, "TN3270 %s %s", + hostname, + NonNull(port)); + } else { /* TELNET */ + HTSprintf0(&command, "TELNET %s %s", + hostname, + NonNull(port)); + } + } + + do_system(command); + return HT_NO_DATA; /* Ok - it was done but no data */ + } else if (getenv("UCX$DEVICE") != NULL + || getenv("TCPIP$DEVICE") != NULL) { + if (login_protocol == rlogin) { + HTSprintf0(&command, "RLOGIN%s%s %s %s", + user ? "/USERNAME=" : "", + NonNull(user), + hostname, + NonNull(port)); + + } else if (login_protocol == tn3270) { + HTSprintf0(&command, "TN3270 %s %s", + hostname, + NonNull(port)); + + } else { /* TELNET */ + HTSprintf0(&command, "TELNET %s %s", + hostname, + NonNull(port)); + } + + do_system(command); + return HT_NO_DATA; /* Ok - it was done but no data */ + } else if (getenv("CMUTEK_ROOT") != NULL) { + if (login_protocol == telnet) { + HTSprintf0(&command, "TELNET %s%s %s", + port ? "/PORT=" : "", + NonNull(port), + hostname); + do_system(command); + } else { + printf("\nSorry, this browser was compiled without the %s access option.\n", + acc_method); + printf("\nPress <return> to return to Lynx."); + LYgetch(); + HadVMSInterrupt = FALSE; + } + } else { + if (login_protocol == telnet) { + HTSprintf0(&command, "TELNET %s%s %s", + port ? "/PORT=" : "", + NonNull(port), + hostname); + do_system(command); + } else { + printf("\nSorry, this browser was compiled without the %s access option.\n", + acc_method); + printf("\nPress <return> to return to Lynx."); + LYgetch(); + HadVMSInterrupt = FALSE; + } + } +#define TELNET_DONE +#endif /* SOCKETSHR_TCP */ + +#if !defined(TELNET_DONE) && (defined(SIMPLE_TELNET) || defined(VM)) + if (login_protocol == telnet) { /* telnet only */ + HTSprintf0(&command, "TELNET %s", /* @@ Bug: port ignored */ + hostname); + do_system(command); + return HT_NO_DATA; /* Ok - it was done but no data */ + } +#define TELNET_DONE +#endif + +#ifndef TELNET_DONE + printf("\nSorry, this browser was compiled without the %s access option.\n", + acc_method); + printf("\nTo access the information you must %s to %s", acc_method, hostname); + if (port) + printf(" (port %s)", port); + if (user) + printf("\nlogging in with username %s", user); + printf(".\n"); + { + printf("\nPress <return> to return to Lynx."); + fflush(stdout); + LYgetch(); +#ifdef VMS + HadVMSInterrupt = FALSE; +#endif /* VMS */ + } +#endif /* !TELNET_DONE */ + return HT_NO_DATA; +} + +/* "Load a document" -- establishes a session + * ------------------------------------------ + * + * On entry, + * addr must point to the fully qualified hypertext reference. + * + * On exit, + * returns <0 Error has occurred. + * >=0 Value of file descriptor or socket to be used + * to read data. + * *pFormat Set to the format of the file, if known. + * (See WWW.h) + * + */ +static int HTLoadTelnet(const char *addr, + HTParentAnchor *anchor GCC_UNUSED, + HTFormat format_out GCC_UNUSED, + HTStream *sink) /* Ignored */ +{ + char *acc_method; + char *host; + int status; + + if (sink) { + CTRACE((tfp, + "HTTelnet: Can't output a live session -- must be interactive!\n")); + return HT_NO_DATA; + } + acc_method = HTParse(addr, STR_FILE_URL, PARSE_ACCESS); + + host = HTParse(addr, "", PARSE_HOST); + if (!host || *host == '\0') { + status = HT_NO_DATA; + CTRACE((tfp, "HTTelnet: No host specified!\n")); + } else { + status = remote_session(acc_method, host); + } + + FREE(host); + FREE(acc_method); + return status; +} + +#ifdef GLOBALDEF_IS_MACRO +#define _HTTELNET_C_1_INIT { "telnet", HTLoadTelnet, NULL } +#define _HTTELNET_C_2_INIT { "rlogin", HTLoadTelnet, NULL } +#define _HTTELNET_C_3_INIT { "tn3270", HTLoadTelnet, NULL } +GLOBALDEF(HTProtocol, HTTelnet, _HTTELNET_C_1_INIT); +GLOBALDEF(HTProtocol, HTRlogin, _HTTELNET_C_2_INIT); +GLOBALDEF(HTProtocol, HTTn3270, _HTTELNET_C_3_INIT); +#else +GLOBALDEF HTProtocol HTTelnet = +{"telnet", HTLoadTelnet, NULL}; +GLOBALDEF HTProtocol HTRlogin = +{"rlogin", HTLoadTelnet, NULL}; +GLOBALDEF HTProtocol HTTn3270 = +{"tn3270", HTLoadTelnet, NULL}; +#endif /* GLOBALDEF_IS_MACRO */ diff --git a/WWW/Library/Implementation/HTTelnet.h b/WWW/Library/Implementation/HTTelnet.h new file mode 100644 index 00000000..f2208d88 --- /dev/null +++ b/WWW/Library/Implementation/HTTelnet.h @@ -0,0 +1,28 @@ +/* /Net/dxcern/userd/timbl/hypertext/WWW/Library/Implementation/HTTelnet.html + TELNET AND SIMILAR ACCESS METHODS + + */ + +#ifndef HTTELNET_H +#define HTTELNET_H + +#include <HTAccess.h> + +#ifdef __cplusplus +extern "C" { +#endif +#ifdef GLOBALREF_IS_MACRO + extern GLOBALREF (HTProtocol, HTTelnet); + extern GLOBALREF (HTProtocol, HTRlogin); + extern GLOBALREF (HTProtocol, HTTn3270); + +#else + GLOBALREF HTProtocol HTTelnet; + GLOBALREF HTProtocol HTRlogin; + GLOBALREF HTProtocol HTTn3270; +#endif /* GLOBALREF_IS_MACRO */ + +#ifdef __cplusplus +} +#endif +#endif /* HTTELNET_H */ diff --git a/WWW/Library/Implementation/HTUU.c b/WWW/Library/Implementation/HTUU.c new file mode 100644 index 00000000..4848cce5 --- /dev/null +++ b/WWW/Library/Implementation/HTUU.c @@ -0,0 +1,210 @@ +/* + * $LynxId: HTUU.c,v 1.11 2010/09/21 23:55:12 tom Exp $ + * + * MODULE HTUU.c + * UUENCODE AND UUDECODE + * + * ACKNOWLEDGEMENT: + * This code is taken from rpem distribution, and was originally + * written by Mark Riordan. + * + * AUTHORS: + * MR Mark Riordan riordanmr@clvax1.cl.msu.edu + * AL Ari Luotonen luotonen@dxcern.cern.ch + * + * HISTORY: + * Added as part of the WWW library and edited to conform + * with the WWW project coding standards by: AL 5 Aug 1993 + * Originally written by: MR 12 Aug 1990 + * Original header text: + * ------------------------------------------------------------- + * File containing routines to convert a buffer + * of bytes to/from RFC 1113 printable encoding format. + * + * This technique is similar to the familiar Unix uuencode + * format in that it maps 6 binary bits to one ASCII + * character (or more aptly, 3 binary bytes to 4 ASCII + * characters). However, RFC 1113 does not use the same + * mapping to printable characters as uuencode. + * + * Mark Riordan 12 August 1990 and 17 Feb 1991. + * This code is hereby placed in the public domain. + * ------------------------------------------------------------- + */ + +#include <HTUtils.h> +#include <HTUU.h> + +#include <LYLeaks.h> + +static char six2pr[64] = +{ + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' +}; + +static unsigned char pr2six[256]; + +/*--- function HTUU_encode ----------------------------------------------- + * + * Encode a single line of binary data to a standard format that + * uses only printing ASCII characters (but takes up 33% more bytes). + * + * Entry bufin points to a buffer of bytes. If nbytes is not + * a multiple of three, then the byte just beyond + * the last byte in the buffer must be 0. + * nbytes is the number of bytes in that buffer. + * This cannot be more than 48. + * bufcoded points to an output buffer. Be sure that this + * can hold at least 1 + (4*nbytes)/3 characters. + * + * Exit bufcoded contains the coded line. The first 4*nbytes/3 bytes + * contain printing ASCII characters representing + * those binary bytes. This may include one or + * two '=' characters used as padding at the end. + * The last byte is a zero byte. + * Returns the number of ASCII characters in "bufcoded". + */ +int HTUU_encode(unsigned char *bufin, + size_t nbytes, + char *bufcoded) +{ +/* ENC is the basic 1 character encoding function to make a char printing */ +#define ENC(c) six2pr[c] + + register char *outptr = bufcoded; + size_t i; + + /* This doesn't seem to be needed (AL): register unsigned char *inptr = bufin; */ + + for (i = 0; i < nbytes; i += 3) { + *(outptr++) = ENC(*bufin >> 2); /* c1 */ + *(outptr++) = ENC(((*bufin << 4) & 060) | ((bufin[1] >> 4) & 017)); /*c2 */ + *(outptr++) = ENC(((bufin[1] << 2) & 074) | ((bufin[2] >> 6) & 03)); /*c3 */ + *(outptr++) = ENC(bufin[2] & 077); /* c4 */ + + bufin += 3; + } + + /* If nbytes was not a multiple of 3, then we have encoded too + * many characters. Adjust appropriately. + */ + if (i == nbytes + 1) { + /* There were only 2 bytes in that last group */ + outptr[-1] = '='; + } else if (i == nbytes + 2) { + /* There was only 1 byte in that last group */ + outptr[-1] = '='; + outptr[-2] = '='; + } + *outptr = '\0'; + return (int) (outptr - bufcoded); +} + +/*--- function HTUU_decode ------------------------------------------------ + * + * Decode an ASCII-encoded buffer back to its original binary form. + * + * Entry bufcoded points to a uuencoded string. It is + * terminated by any character not in + * the printable character table six2pr, but + * leading whitespace is stripped. + * bufplain points to the output buffer; must be big + * enough to hold the decoded string (generally + * shorter than the encoded string) plus + * as many as two extra bytes used during + * the decoding process. + * outbufsize is the maximum number of bytes that + * can fit in bufplain. + * + * Exit Returns the number of binary bytes decoded. + * bufplain contains these bytes. + */ +int HTUU_decode(char *bufcoded, + unsigned char *bufplain, + int outbufsize) +{ +/* single character decode */ +#define DEC(c) pr2six[(int)c] +#define MAXVAL 63 + + static int first = 1; + + int nbytesdecoded, j; + register char *bufin; + register unsigned char *bufout = bufplain; + register int nprbytes; + + /* If this is the first call, initialize the mapping table. + * This code should work even on non-ASCII machines. + */ + if (first) { + first = 0; + for (j = 0; j < 256; j++) + pr2six[j] = MAXVAL + 1; + + for (j = 0; j < 64; j++) + pr2six[UCH(six2pr[j])] = UCH(j); +#if 0 + /* *INDENT-OFF* */ + pr2six['A']= 0; pr2six['B']= 1; pr2six['C']= 2; pr2six['D']= 3; + pr2six['E']= 4; pr2six['F']= 5; pr2six['G']= 6; pr2six['H']= 7; + pr2six['I']= 8; pr2six['J']= 9; pr2six['K']=10; pr2six['L']=11; + pr2six['M']=12; pr2six['N']=13; pr2six['O']=14; pr2six['P']=15; + pr2six['Q']=16; pr2six['R']=17; pr2six['S']=18; pr2six['T']=19; + pr2six['U']=20; pr2six['V']=21; pr2six['W']=22; pr2six['X']=23; + pr2six['Y']=24; pr2six['Z']=25; pr2six['a']=26; pr2six['b']=27; + pr2six['c']=28; pr2six['d']=29; pr2six['e']=30; pr2six['f']=31; + pr2six['g']=32; pr2six['h']=33; pr2six['i']=34; pr2six['j']=35; + pr2six['k']=36; pr2six['l']=37; pr2six['m']=38; pr2six['n']=39; + pr2six['o']=40; pr2six['p']=41; pr2six['q']=42; pr2six['r']=43; + pr2six['s']=44; pr2six['t']=45; pr2six['u']=46; pr2six['v']=47; + pr2six['w']=48; pr2six['x']=49; pr2six['y']=50; pr2six['z']=51; + pr2six['0']=52; pr2six['1']=53; pr2six['2']=54; pr2six['3']=55; + pr2six['4']=56; pr2six['5']=57; pr2six['6']=58; pr2six['7']=59; + pr2six['8']=60; pr2six['9']=61; pr2six['+']=62; pr2six['/']=63; + /* *INDENT-ON* */ + +#endif + } + + /* Strip leading whitespace. */ + + while (*bufcoded == ' ' || *bufcoded == '\t') + bufcoded++; + + /* Figure out how many characters are in the input buffer. + * If this would decode into more bytes than would fit into + * the output buffer, adjust the number of input bytes downwards. + */ + bufin = bufcoded; + while (pr2six[UCH(*(bufin++))] <= MAXVAL) ; + nprbytes = (int) (bufin - bufcoded - 1); + nbytesdecoded = ((nprbytes + 3) / 4) * 3; + if (nbytesdecoded > outbufsize) { + nprbytes = (outbufsize * 4) / 3; + } + + bufin = bufcoded; + + while (nprbytes > 0) { + *(bufout++) = UCH((DEC(bufin[0]) << 2) | (DEC(bufin[1]) >> 4)); + *(bufout++) = UCH((DEC(bufin[1]) << 4) | (DEC(bufin[2]) >> 2)); + *(bufout++) = UCH((DEC(bufin[2]) << 6) | (DEC(bufin[3]))); + bufin += 4; + nprbytes -= 4; + } + + if (nprbytes & 03) { + if (pr2six[(int) bufin[-2]] > MAXVAL) { + nbytesdecoded -= 2; + } else { + nbytesdecoded -= 1; + } + } + + return (nbytesdecoded); +} diff --git a/WWW/Library/Implementation/HTUU.h b/WWW/Library/Implementation/HTUU.h new file mode 100644 index 00000000..93ba4357 --- /dev/null +++ b/WWW/Library/Implementation/HTUU.h @@ -0,0 +1,36 @@ +/* + * $LynxId: HTUU.h,v 1.11 2010/09/25 11:43:27 tom Exp $ + * + * ENCODING TO PRINTABLE CHARACTERS + + File module provides functions HTUU_encode() and HTUU_decode() which convert + a buffer of bytes to/from RFC 1113 printable encoding format. This + technique is similar to the familiar Unix uuencode format in that it maps 6 + binary bits to one ASCII character (or more aptly, 3 binary bytes to 4 ASCII + characters). However, RFC 1113 does not use the same mapping to printable + characters as uuencode. + + */ + +#ifndef HTUU_H +#define HTUU_H + +#ifndef HTUTILS_H +#include <HTUtils.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + extern int HTUU_encode(unsigned char *bufin, + size_t nbytes, + char *bufcoded); + + extern int HTUU_decode(char *bufcoded, + unsigned char *bufplain, + int outbufsize); + +#ifdef __cplusplus +} +#endif +#endif /* HTUU_H */ diff --git a/WWW/Library/Implementation/HTUtils.h b/WWW/Library/Implementation/HTUtils.h new file mode 100644 index 00000000..5df9c620 --- /dev/null +++ b/WWW/Library/Implementation/HTUtils.h @@ -0,0 +1,790 @@ +/* + * $LynxId: HTUtils.h,v 1.109 2012/02/10 18:44:24 tom Exp $ + * + * Utility macros for the W3 code library + * MACROS FOR GENERAL USE + * + * See also: the system dependent file "www_tcp.h", which is included here. + */ + +#ifndef NO_LYNX_TRACE +#define DEBUG /* Turns on trace; turn off for smaller binary */ +#endif + +#ifndef HTUTILS_H +#define HTUTILS_H + +#ifdef HAVE_CONFIG_H +#include <lynx_cfg.h> /* generated by autoconf 'configure' script */ + +/* see AC_FUNC_ALLOCA macro */ +#ifdef __GNUC__ +# define alloca __builtin_alloca +#else +# ifdef _MSC_VER +# include <malloc.h> +# define alloca _alloca +# else +# if HAVE_ALLOCA_H +# include <alloca.h> +# else +# ifdef _AIX +#pragma alloca +# else +# ifndef alloca /* predefined by HP cc +Olibcalls */ +char *alloca(); + +# endif +# endif +# endif +# endif +#endif + +#include <sys/types.h> +#include <stdio.h> + +#else /* HAVE_CONFIG_H */ + +#ifdef DJGPP +#include <sys/config.h> /* pseudo-autoconf values for DJGPP libc/headers */ +#define HAVE_TRUNCATE 1 +#define HAVE_ALLOCA 1 +#include <limits.h> +#endif /* DJGPP */ + +#include <stdio.h> + +#define DONT_TRACK_INTERNAL_LINKS 1 + +/* Explicit system-configure */ +#ifdef VMS +#define NO_SIZECHANGE + +#if defined(VAXC) && !defined(__DECC) +#define NO_UNISTD_H /* DEC C has unistd.h, but not VAX C */ +#endif + +#define NO_KEYPAD +#define NO_UTMP + +#undef NO_FILIO_H +#define NO_FILIO_H + +#define NOUSERS +#define DISP_PARTIAL /* experimental */ +#endif + +#if defined(VMS) || defined(_WINDOWS) +#define HAVE_STDLIB_H 1 +#endif + +/* Accommodate non-autoconf'd Makefile's (VMS, DJGPP, etc) */ + +#ifndef NO_ARPA_INET_H +#define HAVE_ARPA_INET_H 1 +#endif + +#ifndef NO_CBREAK +#define HAVE_CBREAK 1 +#endif + +#ifndef NO_CUSERID +#define HAVE_CUSERID 1 +#endif + +#ifndef NO_FILIO_H +#define HAVE_SYS_FILIO_H 1 +#endif + +#ifndef NO_GETCWD +#define HAVE_GETCWD 1 +#endif + +#ifndef USE_SLANG +#ifndef NO_KEYPAD +#define HAVE_KEYPAD 1 +#endif +#ifndef NO_TTYTYPE +#define HAVE_TTYTYPE 1 +#endif +#endif /* USE_SLANG */ + +#ifndef NO_PUTENV +#define HAVE_PUTENV 1 +#endif + +#ifndef NO_SIZECHANGE +#define HAVE_SIZECHANGE 1 +#endif + +#ifndef NO_UNISTD_H +#undef HAVE_UNISTD_H +#define HAVE_UNISTD_H 1 +#endif + +#ifndef NO_UTMP +#define HAVE_UTMP 1 +#endif + +#endif /* HAVE_CONFIG_H */ + +#include <assert.h> + +/* suppress inadvertant use of gettext in makeuctb when cross-compiling */ +#ifdef DONT_USE_GETTEXT +#undef HAVE_GETTEXT +#undef HAVE_LIBGETTEXT_H +#undef HAVE_LIBINTL_H +#endif + +#ifndef lynx_srand +#define lynx_srand srand +#endif + +#ifndef lynx_rand +#define lynx_rand rand +#endif + +#if '0' != 48 +#define NOT_ASCII +#endif + +#if '0' == 240 +#define EBCDIC +#endif + +#ifndef LY_MAXPATH +#define LY_MAXPATH 256 +#endif + +#ifndef GCC_NORETURN +#define GCC_NORETURN /* nothing */ +#endif + +#ifndef GCC_UNUSED +#define GCC_UNUSED /* nothing */ +#endif + +#if defined(__GNUC__) && defined(_FORTIFY_SOURCE) +#define USE_IGNORE_RC +extern int ignore_unused; + +#define IGNORE_RC(func) ignore_unused = (int) func +#else +#define IGNORE_RC(func) (void) func +#endif /* gcc workarounds */ + +#if defined(__CYGWIN32__) && ! defined(__CYGWIN__) +#define __CYGWIN__ 1 +#endif + +#if defined(__CYGWIN__) /* 1998/12/31 (Thu) 16:13:46 */ +#include <windows.h> /* #include "windef.h" */ +#define BOOLEAN_DEFINED +#undef HAVE_POPEN /* FIXME: does this not work, or is it missing */ +#undef small /* see <w32api/rpcndr.h> */ +#endif + +/* cygwin, mingw32, etc. */ +#ifdef FILE_DOES_NOT_EXIST +#undef FILE_DOES_NOT_EXIST /* see <w32api/winnt.h> */ +#endif + +/* + * VS .NET 2003 includes winsock.h unconditionally from windows.h, + * so we do not want to include windows.h if we want winsock2.h + */ +#if defined(_WINDOWS) && !defined(__CYGWIN__) + +#ifndef __GNUC__ +#pragma warning (disable : 4100) /* unreferenced formal parameter */ +#pragma warning (disable : 4127) /* conditional expression is constant */ +#pragma warning (disable : 4201) /* nameless struct/union */ +#pragma warning (disable : 4214) /* bit field types other than int */ +#pragma warning (disable : 4310) /* cast truncates constant value */ +#pragma warning (disable : 4514) /* unreferenced inline function has been removed */ +#pragma warning (disable : 4996) /* This function or variable may be unsafe. ... */ +#endif + +#if defined(USE_WINSOCK2_H) && (_MSC_VER >= 1300) && (_MSC_VER < 1400) +#include <winsock2.h> /* includes windows.h, in turn windef.h */ +#else +#include <windows.h> /* #include "windef.h" */ +#endif + +#define BOOLEAN_DEFINED + +#if !_WIN_CC /* 1999/09/29 (Wed) 22:00:53 */ +#include <dos.h> +#endif + +#undef sleep /* 1998/06/23 (Tue) 16:54:53 */ +extern void sleep(unsigned __seconds); + +#define popen _popen +#define pclose _pclose + +#if defined(_MSC_VER) +typedef unsigned short mode_t; +#endif + +#endif /* _WINDOWS */ + +#if defined(USE_DEFAULT_COLORS) && !defined(HAVE_USE_DEFAULT_COLORS) + /* if we don't have use_default_colors() */ +# undef USE_DEFAULT_COLORS +#endif + +#ifndef USE_COLOR_STYLE + /* it's useless for such setup */ +# define NO_EMPTY_HREFLESS_A +#endif + +#if defined(__EMX__) || defined(WIN_EX) || defined(HAVE_POPEN) +# define CAN_CUT_AND_PASTE +#endif + +#if defined(USE_SLANG) || (defined(USE_COLOR_STYLE) && defined(__EMX__)) +# define USE_BLINK +#endif + +#if defined(DOSPATH) || defined(__EMX__) +# define USE_DOS_DRIVES /* we allow things like "c:" in paths */ +#endif + +#if defined(UNIX) +# if (defined(__BEOS__) || defined(__CYGWIN__) || defined(__DJGPP__) || defined(__EMX__) || defined(__MINGW32__)) +# define SINGLE_USER_UNIX /* well, at least they try */ +# else +# define MULTI_USER_UNIX +# endif +#endif + +/* + + ERROR TYPE + + This is passed back when streams are aborted. It might be nice to have some structure + of error messages, numbers, and recursive pointers to reasons. Curently this is a + placeholder for something more sophisticated. + + */ +typedef void *HTError; /* Unused at present -- best definition? */ + +/* + +Standard C library for malloc() etc + + */ +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +#ifndef EXIT_SUCCESS +#define EXIT_SUCCESS 0 +#endif + +#ifndef EXIT_FAILURE +#define EXIT_FAILURE 1 +#endif + +#ifdef __EMX__ +#include <unistd.h> /* should be re-include protected under EMX */ +#define getcwd _getcwd2 +#define chdir _chdir2 +#endif + +#ifdef vax +#ifdef unix +#define ultrix /* Assume vax+unix=ultrix */ +#endif /* unix */ +#endif /* vax */ + +#ifndef VMS +#ifndef ultrix + +#ifdef NeXT +#include <libc.h> /* NeXT */ +#endif /* NeXT */ + +#else /* ultrix: */ + +#include <malloc.h> +#include <memory.h> + +#endif /* !ultrix */ +#else /* VMS: */ + +#include <unixlib.h> +#if defined(VAXC) && !defined(__DECC) +#define malloc VAXC$MALLOC_OPT +#define calloc VAXC$CALLOC_OPT +#define free VAXC$FREE_OPT +#define cfree VAXC$CFREE_OPT +#define realloc VAXC$REALLOC_OPT +#endif /* VAXC && !__DECC */ + +#endif /* !VMS */ + +#ifndef NULL +#define NULL ((void *)0) +#endif + +#define isEmpty(s) ((s) == 0 || *(s) == 0) +#define non_empty(s) !isEmpty(s) + +#define NonNull(s) (((s) != 0) ? s : "") +#define NONNULL(s) (((s) != 0) ? s : "(null)") + +/* array/table size */ +#define TABLESIZE(v) (sizeof(v)/sizeof(v[0])) + +#define typecalloc(cast) (cast *)calloc((size_t)1, sizeof(cast)) +#define typecallocn(cast,ntypes) (cast *)calloc((size_t)(ntypes),sizeof(cast)) + +#define typeRealloc(cast,ptr,ntypes) (cast *)realloc(ptr, (size_t)(ntypes)*sizeof(cast)) + +#define typeMalloc(cast) (cast *)malloc(sizeof(cast)) +#define typeMallocn(cast,ntypes) (cast *)malloc((size_t)(ntypes)*sizeof(cast)) + +/* + +OFTEN USED INTEGER MACROS + + Min and Max functions + + */ +#ifndef HTMIN +#define HTMIN(a,b) ((a) <= (b) ? (a) : (b)) +#define HTMAX(a,b) ((a) >= (b) ? (a) : (b)) +#endif +/* + +Booleans + + */ +/* Note: GOOD and BAD are already defined (differently) on RS6000 aix */ +/* #define GOOD(status) ((status)38;1) VMS style status: test bit 0 */ +/* #define BAD(status) (!GOOD(status)) Bit 0 set if OK, otherwise clear */ + +#ifndef _WINDOWS +#ifndef BOOLEAN_DEFINED +typedef char BOOLEAN; /* Logical value */ + +#ifndef CURSES +#ifndef TRUE +#define TRUE (BOOLEAN)1 +#define FALSE (BOOLEAN)0 +#endif +#endif /* CURSES */ +#endif /* BOOLEAN_DEFINED */ +#define BOOLEAN_DEFINED +#endif /* _WINDOWS */ + +#if defined(_MSC_VER) && (_MSC_VER >= 1300) +/* it declares BOOL/BOOLEAN as BYTE/int */ +#else +#ifndef BOOL +#define BOOL BOOLEAN +#endif +#endif + +#ifndef YES +#define YES (BOOLEAN)1 +#define NO (BOOLEAN)0 +#endif + +#define STRING1PTR const char * +#define STRING2PTR const char * const * + +extern BOOL LYOutOfMemory; /* Declared in LYexit.c - FM */ + +#define TCP_PORT 80 /* Allocated to http by Jon Postel/ISI 24-Jan-92 */ +#define OLD_TCP_PORT 2784 /* Try the old one if no answer on 80 */ +#define DNP_OBJ 80 /* This one doesn't look busy, but we must check */ + /* That one was for decnet */ + +/* Inline Function WHITE: Is character c white space? */ +/* For speed, include all control characters */ + +#define WHITE(c) ((UCH(TOASCII(c))) <= 32) + +/* Inline Function LYIsASCII: Is character c a traditional ASCII + * character (i.e. <128) after converting from host character set. */ + +#define LYIsASCII(c) (TOASCII(UCH(c)) < 128) + +/* + +Success (>=0) and failure (<0) codes + +Some of the values are chosen to be HTTP-like, but status return values +are generally not the response status from any specific protocol. + + */ + +#define HT_PARSER_OTHER_CONTENT 701 /* tells SGML to change content model */ +#define HT_PARSER_REOPEN_ELT 700 /* tells SGML parser to keep tag open */ +#define HT_REDIRECTING 399 +#define HT_PARTIAL_CONTENT 206 /* Partial Content */ +#define HT_LOADED 200 /* Instead of a socket */ + +#define HT_OK 0 /* Generic success */ + +#define HT_ERROR -1 /* Generic failure */ +#define HT_CANNOT_TRANSLATE -4 +#define HT_BAD_EOF -12 /* Premature EOF */ +#define HT_NO_CONNECTION -99 /* ERR no connection available - */ +#define HT_NO_DATA -204 /* OK but no data was loaded - */ + /* possibly other app started or forked */ +#define HT_NO_ACCESS -401 /* Access not available */ +#define HT_FORBIDDEN -403 /* Access forbidden */ +#define HT_NOT_ACCEPTABLE -406 /* Not Acceptable */ +#define HT_H_ERRNO_VALID -800 /* see h_errno for resolver error */ +#define HT_INTERNAL -900 /* Weird -- should never happen. */ +#define HT_INTERRUPTED -29998 +#define HT_NOT_LOADED -29999 + +#ifndef va_arg +# include <stdarg.h> +#endif + +#define LYva_start(ap,format) va_start(ap,format) + +/* + * GCC can be told that some functions are like printf (and do type-checking on + * their parameters). + */ +#ifndef GCC_PRINTFLIKE +#if defined(GCC_PRINTF) && !defined(printf) && !defined(HAVE_LIBUTF8_H) +#define GCC_PRINTFLIKE(fmt,var) __attribute__((format(printf,fmt,var))) +#else +#define GCC_PRINTFLIKE(fmt,var) /*nothing */ +#endif +#endif + +#include <HTString.h> /* String utilities */ + +/* + +Out Of Memory checking for malloc() return: + + */ +#ifndef __FILE__ +#define __FILE__ "" +#define __LINE__ "" +#endif + +#include <LYexit.h> + +/* + * Upper- and Lowercase macros + * + * The problem here is that toupper(x) is not defined officially unless + * isupper(x) is. These macros are CERTAINLY needed on #if defined(pyr) || + * define(mips) or BDSI platforms. For safefy, we make them mandatory. + * + * Note: Pyramid and Mips can't uppercase non-alpha. + */ +#include <ctype.h> +#include <string.h> + +#ifndef TOLOWER + +#ifdef USE_ASCII_CTYPES + +#define TOLOWER(c) ascii_tolower(UCH(c)) +#define TOUPPER(c) ascii_toupper(UCH(c)) +#define ISUPPER(c) ascii_isupper(UCH(c)) + +#else + +#define TOLOWER(c) (isupper(UCH(c)) ? tolower(UCH(c)) : UCH(c)) +#define TOUPPER(c) (islower(UCH(c)) ? toupper(UCH(c)) : UCH(c)) +#define ISUPPER(c) (isupper(UCH(c))) + +#endif + +#endif /* TOLOWER */ + +#define FREE(x) {if (x != 0) {free((char *)x); x = NULL;}} + +/* + +The local equivalents of CR and LF + + We can check for these after net ascii text has been converted to the local + representation. Similarly, we include them in strings to be sent as net ascii after + translation. + + */ +#define LF FROMASCII('\012') /* ASCII line feed LOCAL EQUIVALENT */ +#define CR FROMASCII('\015') /* Will be converted to ^M for transmission */ + +/* + * Debug message control. + */ +#ifdef NO_LYNX_TRACE +#define WWW_TraceFlag 0 +#define WWW_TraceMask 0 +#define LYTraceLogFP 0 +#else +extern BOOLEAN WWW_TraceFlag; +extern int WWW_TraceMask; +#endif + +#define TRACE (WWW_TraceFlag) +#define TRACE_bit(n) (TRACE && (WWW_TraceMask & (1 << n)) != 0) +#define TRACE_SGML (TRACE_bit(0)) +#define TRACE_STYLE (TRACE_bit(1)) +#define TRACE_TRST (TRACE_bit(2)) +#define TRACE_CFG (TRACE_bit(3)) +#define TRACE_BSTRING (TRACE_bit(4)) +#define TRACE_COOKIES (TRACE_bit(5)) +#define TRACE_CHARSETS (TRACE_bit(6)) +#define TRACE_GRIDTEXT (TRACE_bit(7)) +#define TRACE_TIMING (TRACE_bit(8)) + +/* + * Get printing/scanning formats. + */ +#if defined(HAVE_INTTYPES_H) +#include <inttypes.h> +#endif + +/* + * Printing/scanning-formats for "off_t", as well as cast needed to fit. + */ +#if defined(HAVE_LONG_LONG) && defined(HAVE_INTTYPES_H) && defined(SIZEOF_OFF_T) +#if (SIZEOF_OFF_T == 8) && defined(PRId64) + +#define PRI_off_t PRId64 +#define SCN_off_t SCNd64 +#define CAST_off_t(n) (int64_t)(n) + +#elif (SIZEOF_OFF_T == 4) && defined(PRId32) + +#define PRI_off_t PRId32 +#define SCN_off_t SCNd32 + +#if (SIZEOF_INT == 4) +#define CAST_off_t(n) (int)(n) +#elif (SIZEOF_LONG == 4) +#define CAST_off_t(n) (long)(n) +#else +#define CAST_off_t(n) (int32_t)(n) +#endif + +#endif +#endif + +#ifndef PRI_off_t +#if defined(HAVE_LONG_LONG) && (SIZEOF_OFF_T > SIZEOF_LONG) +#define PRI_off_t "lld" +#define SCN_off_t "lld" +#define CAST_off_t(n) (long long)(n) +#else +#define PRI_off_t "ld" +#define SCN_off_t "ld" +#define CAST_off_t(n) (long)(n) +#endif +#endif + +/* + * Printing-format for "time_t", as well as cast needed to fit. + */ +#if defined(HAVE_LONG_LONG) && defined(HAVE_INTTYPES_H) && defined(SIZEOF_TIME_T) +#if (SIZEOF_TIME_T == 8) && defined(PRId64) + +#define PRI_time_t PRId64 +#define SCN_time_t SCNd64 +#define CAST_time_t(n) (int64_t)(n) + +#elif (SIZEOF_TIME_T == 4) && defined(PRId32) + +#define PRI_time_t PRId32 +#define SCN_time_t SCNd32 + +#if (SIZEOF_INT == 4) +#define CAST_time_t(n) (int)(n) +#elif (SIZEOF_LONG == 4) +#define CAST_time_t(n) (long)(n) +#else +#define CAST_time_t(n) (int32_t)(n) +#endif + +#endif +#endif + +#ifndef PRI_time_t +#if defined(HAVE_LONG_LONG) && (SIZEOF_TIME_T > SIZEOF_LONG) +#define PRI_time_t "lld" +#define SCN_time_t "lld" +#define CAST_time_t(n) (long long)(n) +#else +#define PRI_time_t "ld" +#define SCN_time_t "ld" +#define CAST_time_t(n) (long)(n) +#endif +#endif + +/* + * Printing-format for "UCode_t". + */ +#define PRI_UCode_t "lX" + +/* + * Verbose-tracing. + */ +#if defined(USE_VERTRACE) && !defined(LY_TRACELINE) +#define LY_TRACELINE __LINE__ +#endif + +#if defined(LY_TRACELINE) +#define LY_SHOWWHERE fprintf( tfp, "%s: %d: ", __FILE__, LY_TRACELINE ), +#else +#define LY_SHOWWHERE /* nothing */ +#endif + +#define CTRACE(p) ((void)((TRACE) && ( LY_SHOWWHERE fprintf p ))) +#define CTRACE2(m,p) ((void)((m) && ( LY_SHOWWHERE fprintf p ))) +#define tfp TraceFP() +#define CTRACE_SLEEP(secs) if (TRACE && LYTraceLogFP == 0) sleep((unsigned)secs) +#define CTRACE_FLUSH(fp) if (TRACE) fflush(fp) + +#include <www_tcp.h> + +/* + * We force this include-ordering since socks.h contains redefinitions of + * functions that probably are prototyped via other includes. The socks.h + * definitions have to be included everywhere, since they're making wrappers + * for the stdio functions as well as the network functions. + */ +#if defined(USE_SOCKS5) && !defined(DONT_USE_SOCKS5) +#define SOCKS4TO5 /* turn on the Rxxxx definitions used in Lynx */ +#include <socks.h> + +/* + * The AIX- and SOCKS4-specific definitions in socks.h are inconsistent. + * Repair them so they're consistent (and usable). + */ +#if defined(_AIX) && !defined(USE_SOCKS4_PREFIX) +#undef Raccept +#define Raccept accept +#undef Rgetsockname +#define Rgetsockname getsockname +#undef Rgetpeername +#define Rgetpeername getpeername +#endif + +/* + * Workaround for order-of-evaluation problem with gcc and socks5 headers + * which breaks the Rxxxx names by attaching the prefix twice: + */ +#ifdef INCLUDE_PROTOTYPES +#undef Raccept +#undef Rbind +#undef Rconnect +#undef Rlisten +#undef Rselect +#undef Rgetpeername +#undef Rgetsockname +#define Raccept accept +#define Rbind bind +#define Rconnect connect +#define Rgetpeername getpeername +#define Rgetsockname getsockname +#define Rlisten listen +#define Rselect select +#endif + +#endif /* USE_SOCKS5 */ + +#define SHORTENED_RBIND /* FIXME: do this in configure-script */ + +#ifdef USE_SSL + +#define free_func free__func + +#ifdef USE_OPENSSL_INCL +#include <openssl/ssl.h> +#include <openssl/crypto.h> +#include <openssl/rand.h> +#include <openssl/err.h> + +#else + +#if defined(USE_GNUTLS_FUNCS) +#include <tidy_tls.h> +#define USE_GNUTLS_INCL 1 /* do this for the ".c" ifdef's */ +#elif defined(USE_GNUTLS_INCL) +#include <gnutls/openssl.h> +/* + * GNUTLS's implementation of OpenSSL is very incomplete and rudimentary. + * For a start, let's make it compile (TD - 2003/4/13). + */ +#ifndef SSL_VERIFY_PEER +#define SSL_VERIFY_PEER 0x01 +#endif +#else + +#ifdef USE_NSS_COMPAT_INCL +#include <nss_compat_ossl/nss_compat_ossl.h> + +#else /* assume SSLeay */ +#include <ssl.h> +#include <crypto.h> +#include <rand.h> +#include <err.h> +#endif +#endif +#endif /* USE_OPENSSL_INCL */ + +#undef free_func +#endif /* USE_SSL */ + +#ifdef HAVE_BSD_STDLIB_H +#include <bsd/stdlib.h> /* prototype for arc4random.h */ +#elif defined(HAVE_BSD_RANDOM_H) +#include <bsd/random.h> /* prototype for arc4random.h */ +#endif + +#ifdef HAVE_LIBDMALLOC +#include <dmalloc.h> /* Gray Watson's library */ +#define show_alloc() dmalloc_log_unfreed() +#endif + +#ifdef HAVE_LIBDBMALLOC +#include <dbmalloc.h> /* Conor Cahill's library */ +#define show_alloc() malloc_dump(fileno(stderr)) +#endif + +#ifndef show_alloc +#define show_alloc() /* nothing */ +#endif + +#include <userdefs.h> + +#ifdef __cplusplus +extern "C" { +#endif +#ifndef TOLOWER +#ifdef USE_ASCII_CTYPES + extern int ascii_toupper(int); + extern int ascii_tolower(int); + extern int ascii_isupper(int); +#endif +#endif + + extern FILE *TraceFP(void); + +#ifdef USE_SSL + extern SSL *HTGetSSLHandle(void); + extern void HTSSLInitPRNG(void); + extern int HTGetSSLCharacter(void *handle); +#endif /* USE_SSL */ + +#ifdef __cplusplus +} +#endif +#endif /* HTUTILS_H */ diff --git a/WWW/Library/Implementation/HTVMSUtils.c b/WWW/Library/Implementation/HTVMSUtils.c new file mode 100644 index 00000000..85ca863a --- /dev/null +++ b/WWW/Library/Implementation/HTVMSUtils.c @@ -0,0 +1,1133 @@ +/* + * $LynxId: HTVMSUtils.c,v 1.37 2010/09/25 11:39:49 tom Exp $ + * + * MODULE HTVMSUtil.c + * VMS Utility Routines + * + * AUTHORS: + * MD Mark Donszelmann duns@vxdeop.cern.ch + * + * HISTORY: + * 14 Nov 93 MD Written + * + * BUGS: + * + * + */ + +#include <HTUtils.h> +#ifdef VMS +#include <HTFormat.h> +#include <HTStream.h> +#include <UCDefs.h> +#include <UCMap.h> +#include <UCAux.h> +#include <HTFTP.h> +#include <HTTCP.h> +#include <HTVMSUtils.h> +#include <ssdef.h> +#include <jpidef.h> +#include <prvdef.h> +#include <acldef.h> +#include <chpdef.h> +#include <descrip.h> +#include <lib$routines.h> +#include <starlet.h> +#include <rmsdef.h> + +#include <LYGlobalDefs.h> +#include <LYUtils.h> +#include <LYLeaks.h> +#include <LYStrings.h> + +BOOL HTVMSFileVersions = FALSE; /* Include version numbers in listing? */ + +typedef struct { + unsigned long BufferLength:16; + unsigned long ItemCode:16; + unsigned long BufferAddress:32; + unsigned long ReturnLengthAddress:32; +} ItemStruct; + +/* PUBLIC HTVMS_authSysPrv() + * CHECKS IF THIS PROCESS IS AUTHORIZED TO ENABLE SYSPRV + * ON ENTRY: + * No arguments. + * + * ON EXIT: + * returns YES if SYSPRV is authorized + */ +BOOL HTVMS_authSysPrv(void) +{ + unsigned long Result; + ItemStruct ItemList[2]; + unsigned long Length; + unsigned long Buffer[2]; + + /* fill Item */ + ItemList[0].BufferLength = sizeof(Buffer); + ItemList[0].BufferAddress = (unsigned long) Buffer; + ItemList[0].ReturnLengthAddress = (unsigned long) &Length; + ItemList[0].ItemCode = JPI$_AUTHPRIV; + + /* terminate list */ + ItemList[1].ItemCode = 0; + ItemList[1].BufferLength = 0; + + /* call system */ + Result = sys$getjpiw(0, 0, 0, ItemList, 0, 0, 0); + + if (Result != SS$_NORMAL) + return (NO); + + if (Buffer[0] & PRV$M_SYSPRV) + return (YES); + + return (NO); +} + +/* PUBLIC HTVMS_enableSysPrv() + * ENABLES SYSPRV + * ON ENTRY: + * No arguments. + * + * ON EXIT: + * + */ +void HTVMS_enableSysPrv(void) +{ + unsigned long Result; + unsigned long Prv[2], PreviousPrv[2]; + + Prv[0] = PRV$M_SYSPRV; + Prv[1] = 0; + Result = sys$setprv(1, &Prv, 0, &PreviousPrv); + + if (Result == SS$_NORMAL) { + if (!(PreviousPrv[0] & PRV$M_SYSPRV)) { + CTRACE((tfp, "HTVMS_enableSysPrv: Enabled SYSPRV\n")); + } + } +} + +/* PUBLIC HTVMS_disableSysPrv() + * DISABLES SYSPRV + * ON ENTRY: + * No arguments. + * + * ON EXIT: + * + */ +void HTVMS_disableSysPrv(void) +{ + unsigned long Result; + unsigned long Prv[2], PreviousPrv[2]; + + Prv[0] = PRV$M_SYSPRV; + Prv[1] = 0; + Result = sys$setprv(0, &Prv, 0, &PreviousPrv); + + if (Result == SS$_NORMAL) { + if (PreviousPrv[0] & PRV$M_SYSPRV) { + CTRACE((tfp, "HTVMS_disableSysPrv: Disabled SYSPRV\n")); + } + } +} + +/* PUBLIC HTVMS_checkAccess() + * CHECKS ACCESS TO FILE FOR CERTAIN USER + * ON ENTRY: + * FileName The file to be accessed + * UserName Name of the user to check access for. + * User nobody, represented by "" is given NO for an answer + * Method Name of the method to be chceked + * + * ON EXIT: + * returns YES if access is allowed + * + */ +BOOL HTVMS_checkAccess(const char *FileName, + const char *UserName, + const char *Method) +{ + unsigned long Result; + ItemStruct ItemList[2]; + unsigned long Length; + unsigned long Buffer; + unsigned long ObjType; + + char *VmsName; + + struct dsc$descriptor_s FileNameDesc; + struct dsc$descriptor_s UserNameDesc; + + char *colon; + + /* user nobody should access as from account under which server is running */ + if (0 == strcmp(UserName, "")) + return (NO); + + /* check Filename and convert */ + colon = strchr(FileName, ':'); + if (colon) + VmsName = HTVMS_name("", colon + 1); + else + VmsName = HTVMS_name("", FileName); + + /* check for GET */ + if (0 == strcmp(Method, "GET")) { + /* fill Item */ + ItemList[0].BufferLength = sizeof(Buffer); + ItemList[0].BufferAddress = (unsigned long) &Buffer; + ItemList[0].ReturnLengthAddress = (unsigned long) &Length; + ItemList[0].ItemCode = CHP$_FLAGS; + + /* terminate list */ + ItemList[1].ItemCode = 0; + ItemList[1].BufferLength = 0; + + /* fill input */ + ObjType = ACL$C_FILE; + Buffer = CHP$M_READ; + UserNameDesc.dsc$w_length = strlen(UserName); + UserNameDesc.dsc$b_dtype = DSC$K_DTYPE_T; + UserNameDesc.dsc$b_class = DSC$K_CLASS_S; + UserNameDesc.dsc$a_pointer = (char *) UserName; + FileNameDesc.dsc$w_length = strlen(VmsName); + FileNameDesc.dsc$b_dtype = DSC$K_DTYPE_T; + FileNameDesc.dsc$b_class = DSC$K_CLASS_S; + FileNameDesc.dsc$a_pointer = VmsName; + + /* call system */ + Result = sys$check_access(&ObjType, &FileNameDesc, &UserNameDesc, ItemList); + + if (Result == SS$_NORMAL) + return (YES); + else + return (NO); + } + + return (NO); +} + +/* PUBLIC HTVMS_wwwName() + * CONVERTS VMS Name into WWW Name + * ON ENTRY: + * vmsname VMS file specification (NO NODE) + * + * ON EXIT: + * returns www file specification + * + * EXAMPLES: + * vmsname wwwname + * DISK$USER disk$user + * DISK$USER: /disk$user/ + * DISK$USER:[DUNS] /disk$user/duns + * DISK$USER:[DUNS.ECHO] /disk$user/duns/echo + * [DUNS] duns + * [DUNS.ECHO] duns/echo + * [DUNS.ECHO.-.TRANS] duns/echo/../trans + * [DUNS.ECHO.--.TRANS] duns/echo/../../trans + * [.DUNS] duns + * [.DUNS.ECHO] duns/echo + * [.DUNS.ECHO]TEST.COM duns/echo/test.com + * TEST.COM test.com + * + * + */ +const char *HTVMS_wwwName(const char *vmsname) +{ + static char wwwname[LY_MAXPATH]; + const char *src; + char *dst; + int dir; + + dst = wwwname; + src = vmsname; + dir = 0; + if (strchr(src, ':')) + *(dst++) = '/'; + for (; *src != '\0'; src++) { + switch (*src) { + case ':': + *(dst++) = '/'; + break; + case '-': + if (dir) { + if ((*(src - 1) == '[' || + *(src - 1) == '.' || + *(src - 1) == '-') && + (*(src + 1) == '.' || + *(src + 1) == '-')) { + *(dst++) = '/'; + *(dst++) = '.'; + *(dst++) = '.'; + } else + *(dst++) = '-'; + } else { + if (*(src - 1) == ']') + *(dst++) = '/'; + *(dst++) = '-'; + } + break; + case '.': + if (dir) { + if (*(src - 1) != '[') + *(dst++) = '/'; + } else { + if (*(src - 1) == ']') + *(dst++) = '/'; + *(dst++) = '.'; + } + break; + case '[': + dir = 1; + break; + case ']': + dir = 0; + break; + default: + if (*(src - 1) == ']') + *(dst++) = '/'; + *(dst++) = *src; + break; + } + } + *(dst++) = '\0'; + return (wwwname); +} + +/* + * The code below is for directory browsing by VMS Curses clients. + * It is based on the newer WWWLib's HTDirBrw.c. - Foteos Macrides + */ +int HTStat(const char *filename, + struct stat *info) +{ + /* + the following stuff does not work in VMS with a normal stat... + --> /disk$user/duns/www if www is a directory + is statted like: /disk$user/duns/www.dir + after a normal stat has failed + --> /disk$user/duns if duns is a toplevel directory + is statted like: /disk$user/000000/duns.dir + --> /disk$user since disk$user is a device + is statted like: /disk$user/000000/000000.dir + --> / + searches all devices, no solution yet... + --> /vxcern!/disk$cr/wwwteam/login.com + is not statted but granted with fake information... + */ + int Result; + int Len; + char *Ptr, *Ptr2; + static char *Name; + + /* try normal stat... */ + Result = stat((char *) filename, info); + if (Result == 0) + return (Result); + + /* make local copy */ + StrAllocCopy(Name, filename); + + /* failed,so do device search in case root is requested */ + if (!strcmp(Name, "/")) { /* root requested */ + return (-1); + } + + /* failed so this might be a directory, add '.dir' */ + Len = strlen(Name); + if (Name[Len - 1] == '/') + Name[Len - 1] = '\0'; + + /* fail in case of device */ + Ptr = strchr(Name + 1, '/'); + if ((Ptr == NULL) && (Name[0] == '/')) { /* device only... */ + StrAllocCat(Name, "/000000/000000"); + } + + if (Ptr != NULL) { /* correct filename in case of toplevel dir */ + Ptr2 = strchr(Ptr + 1, '/'); + if ((Ptr2 == NULL) && (Name[0] == '/')) { + char End[256]; + + LYStrNCpy(End, Ptr, sizeof(End) - 1); + *(Ptr + 1) = '\0'; + StrAllocCat(Name, "000000"); + StrAllocCat(Name, End); + } + } + + /* try in case a file on toplevel directory or .DIR was already specified */ + Result = stat(Name, info); + if (Result == 0) + return (Result); + + /* add .DIR and try again */ + StrAllocCat(Name, ".dir"); + Result = stat(Name, info); + return (Result); +} + +#ifndef _POSIX_SOURCE +#define d_ino d_fileno /* compatability */ +#ifndef NULL +#define NULL 0 +#endif +#endif /* !_POSIX_SOURCE */ + +typedef struct __dirdesc { + long context; /* context descriptor for LIB$FIND_FILE calls */ + char dirname[255 + 1]; /* keeps the directory name, including *.* */ + struct dsc$descriptor_s dirname_desc; /* descriptor of dirname */ +} DIR; + +static DIR *HTVMSopendir(char *dirname); +static struct dirent *HTVMSreaddir(DIR *dirp); +static int HTVMSclosedir(DIR *dirp); + +/*** #include <sys_dirent.h> ***/ +/*** "sys_dirent.h" ***/ +struct dirent { + unsigned long d_fileno; /* file number of entry */ + unsigned short d_namlen; /* length of string in d_name */ + char d_name[255 + 1]; /* name (up to MAXNAMLEN + 1) */ +}; + +#ifndef _POSIX_SOURCE +/* + * It's unlikely to change, but make sure that sizeof d_name above is + * at least MAXNAMLEN + 1 (more may be added for padding). + */ +#define MAXNAMLEN 255 +/* + * The macro DIRSIZ(dp) gives the minimum amount of space required to represent + * a directory entry. For any directory entry dp->d_reclen >= DIRSIZ(dp). + * Specific filesystem types may use this macro to construct the value + * for d_reclen. + */ +#undef DIRSIZ +#define DIRSIZ(dp) \ + (((sizeof(struct dirent) - (MAXNAMLEN+1) + ((dp)->d_namlen+1)) +3) & ~3) + +#endif /* !_POSIX_SOURCE */ + +static DIR *HTVMSopendir(char *dirname) +{ + static DIR dir; + char *closebracket; + long status; + struct dsc$descriptor_s entryname_desc; + struct dsc$descriptor_s dirname_desc; + static char *DirEntry; + char Actual[256]; + char VMSentry[256]; + char UnixEntry[256]; + int index; + char *dot; + + /* check if directory exists */ + /* dirname can look like /disk$user/duns/www/test/multi */ + /* or like /disk$user/duns/www/test/multi/ */ + /* DirEntry should look like disk$user:[duns.www.test]multi in both cases */ + /* dir.dirname should look like disk$user:[duns.www.test.multi] */ + sprintf(UnixEntry, "%.*s", sizeof(UnixEntry) - 2, dirname); + if (UnixEntry[strlen(UnixEntry) - 1] != '/') + strcat(UnixEntry, "/"); + + StrAllocCopy(DirEntry, HTVMS_name("", UnixEntry)); + if (strlen(DirEntry) > sizeof(dir.dirname) - 1) + return (NULL); + strcpy(dir.dirname, DirEntry); + index = strlen(DirEntry) - 1; + + if (DirEntry[index] == ']') + DirEntry[index] = '\0'; + + if ((dot = strrchr(DirEntry, '.')) == NULL) { /* convert disk$user:[duns] into disk$user:[000000]duns.dir */ + char *openbr = strrchr(DirEntry, '['); + + if (!openbr) { /* convert disk$user: into disk$user:[000000]000000.dir */ + if (strlen(dir.dirname) > sizeof(dir.dirname) - 10) + return (NULL); + sprintf(dir.dirname, "%.*s[000000]", sizeof(dir.dirname) - 9, DirEntry); + StrAllocCat(DirEntry, "[000000]000000.dir"); + } else { + char End[256]; + + strcpy(End, openbr + 1); + *(openbr + 1) = '\0'; + StrAllocCat(DirEntry, "000000]"); + StrAllocCat(DirEntry, End); + StrAllocCat(DirEntry, ".dir"); + } + } else { + *dot = ']'; + StrAllocCat(DirEntry, ".dir"); + } + /* lib$find_file needs a fixed-size buffer */ + LYStrNCpy(Actual, DirEntry, sizeof(Actual) - 1); + + dir.context = 0; + dirname_desc.dsc$w_length = strlen(Actual); + dirname_desc.dsc$b_dtype = DSC$K_DTYPE_T; + dirname_desc.dsc$b_class = DSC$K_CLASS_S; + dirname_desc.dsc$a_pointer = (char *) &(Actual); + + /* look for the directory */ + entryname_desc.dsc$w_length = 255; + entryname_desc.dsc$b_dtype = DSC$K_DTYPE_T; + entryname_desc.dsc$b_class = DSC$K_CLASS_S; + entryname_desc.dsc$a_pointer = VMSentry; + + status = lib$find_file(&(dirname_desc), + &entryname_desc, + &(dir.context), + 0, 0, 0, 0); + if (!(status & 0x01)) { /* directory not found */ + return (NULL); + } + + if (strlen(dir.dirname) > sizeof(dir.dirname) - 10) + return (NULL); + if (HTVMSFileVersions) + strcat(dir.dirname, "*.*;*"); + else + strcat(dir.dirname, "*.*"); + dir.context = 0; + dir.dirname_desc.dsc$w_length = strlen(dir.dirname); + dir.dirname_desc.dsc$b_dtype = DSC$K_DTYPE_T; + dir.dirname_desc.dsc$b_class = DSC$K_CLASS_S; + dir.dirname_desc.dsc$a_pointer = (char *) &(dir.dirname); + return (&dir); +} + +static struct dirent *HTVMSreaddir(DIR *dirp) +{ + static struct dirent entry; + long status; + struct dsc$descriptor_s entryname_desc; + char *space, *slash; + char VMSentry[256]; + const char *UnixEntry; + + entryname_desc.dsc$w_length = 255; + entryname_desc.dsc$b_dtype = DSC$K_DTYPE_T; + entryname_desc.dsc$b_class = DSC$K_CLASS_S; + entryname_desc.dsc$a_pointer = VMSentry; + + status = lib$find_file(&(dirp->dirname_desc), + &entryname_desc, + &(dirp->context), + 0, 0, 0, 0); + if (status == RMS$_NMF) { /* no more files */ + return (NULL); + } else { /* ok */ + if (!(status & 0x01)) + return (0); + if (HTVMSFileVersions) + space = strchr(VMSentry, ' '); + else + space = strchr(VMSentry, ';'); + if (space) + *space = '\0'; + + /* convert to unix style... */ + UnixEntry = HTVMS_wwwName(VMSentry); + slash = strrchr(UnixEntry, '/') + 1; + strcpy(entry.d_name, slash); + entry.d_namlen = strlen(entry.d_name); + entry.d_fileno = 1; + return (&entry); + } +} + +static int HTVMSclosedir(DIR *dirp) +{ + long status; + + status = lib$find_file_end(&(dirp->context)); + if (!(status & 0x01)) + exit_immediately(status); + dirp->context = 0; + return (0); +} + +#include <HTAnchor.h> +#include <HTParse.h> +#include <HTBTree.h> +#include <HTFile.h> /* For HTFileFormat() */ +#include <HTAlert.h> +/* + * Hypertext object building machinery. + */ +#include <HTML.h> +#define PUTC(c) (*targetClass.put_character)(target, c) +#define PUTS(s) (*targetClass.put_string)(target, s) +#define START(e) (*targetClass.start_element)(target, e, 0, 0, -1, 0) +#define END(e) (*targetClass.end_element)(target, e, 0) +#define FREE_TARGET (*targetClass._free)(target) +#define ABORT_TARGET (*targetClass._free)(target) +struct _HTStructured { + const HTStructuredClass *isa; + /* ... */ +}; + +#define STRUCT_DIRENT struct dirent + +static char *months[12] = +{ + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" +}; + +typedef struct _VMSEntryInfo { + char *filename; + char *type; + char *date; + unsigned int size; + BOOLEAN display; /* show this entry? */ +} VMSEntryInfo; + +static void free_VMSEntryInfo_contents(VMSEntryInfo * entry_info) +{ + if (entry_info) { + FREE(entry_info->filename); + FREE(entry_info->type); + FREE(entry_info->date); + } + /* dont free the struct */ +} + +int compare_VMSEntryInfo_structs(VMSEntryInfo * entry1, VMSEntryInfo * entry2) +{ + int i, status; + char date1[16], date2[16], time1[8], time2[8], month[4]; + + switch (HTfileSortMethod) { + case FILE_BY_SIZE: + /* both equal or both 0 */ + if (entry1->size == entry2->size) + return (strcasecomp(entry1->filename, + entry2->filename)); + else if (entry1->size > entry2->size) + return (1); + else + return (-1); + case FILE_BY_TYPE: + if (entry1->type && entry2->type) { + status = strcasecomp(entry1->type, entry2->type); + if (status) + return (status); + /* else fall to filename comparison */ + } + return (strcasecomp(entry1->filename, + entry2->filename)); + case FILE_BY_DATE: + if (entry1->date && entry2->date) { + /* + * Make sure we have the correct length. - FM + */ + if (strlen(entry1->date) != 12 || + strlen(entry2->date) != 12) { + return (strcasecomp(entry1->filename, + entry2->filename)); + } + /* + * Set up for sorting in reverse + * chronological order. - FM + */ + if (entry1->date[7] != ' ') { + strcpy(date1, "9999"); + strcpy(time1, (char *) &entry1->date[7]); + } else { + strcpy(date1, (char *) &entry1->date[8]); + strcpy(time1, "00:00"); + } + StrNCpy(month, entry1->date, 3); + month[3] = '\0'; + for (i = 0; i < 12; i++) { + if (!strcasecomp(month, months[i])) { + break; + } + } + i++; + sprintf(month, "%02d", i); + strcat(date1, month); + StrNCat(date1, (char *) &entry1->date[4], 2); + date1[8] = '\0'; + if (date1[6] == ' ') { + date1[6] = '0'; + } + strcat(date1, time1); + if (entry2->date[7] != ' ') { + strcpy(date2, "9999"); + strcpy(time2, (char *) &entry2->date[7]); + } else { + strcpy(date2, (char *) &entry2->date[8]); + strcpy(time2, "00:00"); + } + StrNCpy(month, entry2->date, 3); + month[3] = '\0'; + for (i = 0; i < 12; i++) { + if (!strcasecomp(month, months[i])) { + break; + } + } + i++; + sprintf(month, "%02d", i); + strcat(date2, month); + StrNCat(date2, (char *) &entry2->date[4], 2); + date2[8] = '\0'; + if (date2[6] == ' ') { + date2[6] = '0'; + } + strcat(date2, time2); + /* + * Do the comparison. - FM + */ + status = strcasecomp(date2, date1); + if (status) + return (status); + /* else fall to filename comparison */ + } + return (strcasecomp(entry1->filename, + entry2->filename)); + case FILE_BY_NAME: + default: + return (strcmp(entry1->filename, + entry2->filename)); + } +} + +/* HTVMSBrowseDir() + * + * This function generates a directory listing as an HTML-object + * for local file URL's. It assumes the first two elements of + * of the path are a device followed by a directory: + * + * file://localhost/device/directory[/[foo]] + * + * Will not accept 000000 as a directory name. + * Will offer links to parent through the top directory, unless + * a terminal slash was included in the calling URL. + * + * Returns HT_LOADED on success, HTLoadError() messages on error. + * + * Developed for Lynx by Foteos Macrides (macrides@sci.wfeb.edu). + */ +int HTVMSBrowseDir(const char *address, + HTParentAnchor *anchor, + HTFormat format_out, + HTStream *sink) +{ + HTStructured *target; + HTStructuredClass targetClass; + char *pathname = HTParse(address, "", PARSE_PATH + PARSE_PUNCTUATION); + char *tail = NULL; + char *title = NULL; + char *header = NULL; + char *parent = NULL; + char *relative = NULL; + char *cp, *cp1; + int pathend, len; + DIR *dp; + struct stat file_info; + time_t NowTime; + static char ThisYear[8]; + VMSEntryInfo *entry_info = 0; + char string_buffer[64]; + + HTUnEscape(pathname); + CTRACE((tfp, "HTVMSBrowseDir: Browsing `%s\'\n", pathname)); + + /* + * Require at least two elements (presumably a device and directory) and + * disallow the device root (000000 directory). Symbolic paths (e.g., + * sys$help) should have been translated and expanded (e.g., to + * /sys$sysroot/syshlp) before calling this routine. + */ + if (((*pathname != '/') || + (cp = strchr(pathname + 1, '/')) == NULL || + *(cp + 1) == '\0' || + 0 == StrNCmp((cp + 1), "000000", 6)) || + (dp = HTVMSopendir(pathname)) == NULL) { + FREE(pathname); + return HTLoadError(sink, 403, COULD_NOT_ACCESS_DIR); + } + + /* + * Set up the output stream. + */ + _HTProgress(BUILDING_DIR_LIST); + if (UCLYhndl_HTFile_for_unspec >= 0) { + HTAnchor_setUCInfoStage(anchor, + UCLYhndl_HTFile_for_unspec, + UCT_STAGE_PARSER, + UCT_SETBY_DEFAULT); + } + target = HTML_new(anchor, format_out, sink); + targetClass = *(target->isa); + + /* + * Set up the offset string of the anchor reference, and strings for the + * title and header. + */ + cp = strrchr(pathname, '/'); /* find lastslash */ + StrAllocCopy(tail, (cp + 1)); /* take slash off the beginning */ + if (*tail != '\0') { + StrAllocCopy(title, tail); + *cp = '\0'; + if ((cp1 = strrchr(pathname, '/')) != NULL && + cp1 != pathname && + StrNCmp((cp1 + 1), "000000", 6)) + StrAllocCopy(parent, (cp1 + 1)); + *cp = '/'; + } else { + pathname[strlen(pathname) - 1] = '\0'; + cp = strrchr(pathname, '/'); + StrAllocCopy(title, (cp + 1)); + pathname[strlen(pathname)] = '/'; + } + StrAllocCopy(header, pathname); + + /* + * Initialize path name for HTStat(). + */ + pathend = strlen(pathname); + if (*(pathname + pathend - 1) != '/') { + StrAllocCat(pathname, "/"); + pathend++; + } + + /* + * Output the title and header. + */ + START(HTML_HTML); + PUTC('\n'); + START(HTML_HEAD); + PUTC('\n'); + HTUnEscape(title); + START(HTML_TITLE); + PUTS(title); + PUTS(" directory"); + END(HTML_TITLE); + PUTC('\n'); + FREE(title); + END(HTML_HEAD); + PUTC('\n'); + START(HTML_BODY); + PUTC('\n'); + HTUnEscape(header); + START(HTML_H1); + PUTS(header); + END(HTML_H1); + PUTC('\n'); + if (HTDirReadme == HT_DIR_README_TOP) { + FILE *fp; + + if (header[strlen(header) - 1] != '/') + StrAllocCat(header, "/"); + StrAllocCat(header, HT_DIR_README_FILE); + if ((fp = fopen(header, "r")) != NULL) { + START(HTML_PRE); + for (;;) { + char c = fgetc(fp); + + if (c == (char) EOF) + break; +#ifdef NOTDEFINED + switch (c) { + case '&': + case '<': + case '>': + PUTC('&'); + PUTC('#'); + PUTC((char) (c / 10)); + PUTC((char) (c % 10)); + PUTC(';'); + break; + default: + PUTC(c); + } +#else + PUTC(c); +#endif /* NOTDEFINED */ + } + END(HTML_PRE); + fclose(fp); + } + } + FREE(header); + if (parent) { + HTSprintf0(&relative, "%s/..", tail); + HTStartAnchor(target, "", relative); + PUTS("Up to "); + HTUnEscape(parent); + PUTS(parent); + END(HTML_A); + START(HTML_P); + PUTC('\n'); + FREE(relative); + FREE(parent); + } + + /* + * Set up the date comparison. + */ + NowTime = time(NULL); + strcpy(ThisYear, (char *) ctime(&NowTime) + 20); + ThisYear[4] = '\0'; + + /* + * Now, generate the Btree and put it out to the output stream. + */ + { + char dottest = 2; /* To avoid two strcmp() each time */ + STRUCT_DIRENT *dirbuf; + HTBTree *bt; + + /* Set up sort key and initialize BTree */ + bt = HTBTree_new((HTComparer) compare_VMSEntryInfo_structs); + + /* Build tree */ + while ((dirbuf = HTVMSreaddir(dp))) { + HTAtom *encoding = NULL; + HTFormat format; + + /* Skip if not used */ + if (!dirbuf->d_ino) { + continue; + } + + /* Current and parent directories are never shown in list */ + if (dottest && (!strcmp(dirbuf->d_name, ".") || + !strcmp(dirbuf->d_name, ".."))) { + dottest--; + continue; + } + + /* Don't show the selective enabling file + * unless version numbers are included */ + if (!strcasecomp(dirbuf->d_name, HT_DIR_ENABLE_FILE)) { + continue; + } + + /* Skip files beginning with a dot? */ + if ((no_dotfiles || !show_dotfiles) && *dirbuf->d_name == '.') { + continue; + } + + /* OK, make an lstat() and get a key ready. */ + *(pathname + pathend) = '\0'; + StrAllocCat(pathname, dirbuf->d_name); + if (HTStat(pathname, &file_info)) { + /* for VMS the failure here means the file is not readable... + we however continue to browse through the directory... */ + continue; + } + entry_info = (VMSEntryInfo *) malloc(sizeof(VMSEntryInfo)); + if (entry_info == NULL) + outofmem(__FILE__, "HTVMSBrowseDir"); + entry_info->type = 0; + entry_info->size = 0; + entry_info->date = 0; + entry_info->filename = 0; + entry_info->display = TRUE; + + /* Get the type */ + format = HTFileFormat(dirbuf->d_name, &encoding, + (const char **) &cp); + if (!cp) { + if (!StrNCmp(HTAtom_name(format), "application", 11)) { + cp = HTAtom_name(format) + 12; + if (!StrNCmp(cp, "x-", 2)) + cp += 2; + } else + cp = HTAtom_name(format); + } + StrAllocCopy(entry_info->type, cp); + + StrAllocCopy(entry_info->filename, dirbuf->d_name); + if (S_ISDIR(file_info.st_mode)) { + /* strip .DIR part... */ + char *dot; + + dot = strstr(entry_info->filename, ".DIR"); + if (dot) + *dot = '\0'; + LYLowerCase(entry_info->filename); + StrAllocCopy(entry_info->type, "Directory"); + } else { + if ((cp = strstr(entry_info->filename, "READ")) == NULL) { + cp = entry_info->filename; + } else { + cp += 4; + if (!StrNCmp(cp, "ME", 2)) { + cp += 2; + while (cp && *cp && *cp != '.') { + cp++; + } + } else if (!StrNCmp(cp, ".ME", 3)) { + cp = (entry_info->filename + + strlen(entry_info->filename)); + } else { + cp = entry_info->filename; + } + } + LYLowerCase(cp); + if (((len = strlen(entry_info->filename)) > 2) && + entry_info->filename[len - 1] == 'z') { + if (entry_info->filename[len - 2] == '.' || + entry_info->filename[len - 2] == '_') + entry_info->filename[len - 1] = 'Z'; + } + } + + /* Get the date */ + { + char *t = (char *) ctime((const time_t *) &file_info.st_ctime); + + *(t + 24) = '\0'; + + StrAllocCopy(entry_info->date, (t + 4)); + *((entry_info->date) + 7) = '\0'; + if ((atoi((t + 19))) < atoi(ThisYear)) + StrAllocCat(entry_info->date, (t + 19)); + else { + StrAllocCat(entry_info->date, (t + 11)); + *((entry_info->date) + 12) = '\0'; + } + } + + /* Get the size */ + if (!S_ISDIR(file_info.st_mode)) + entry_info->size = (unsigned int) file_info.st_size; + else + entry_info->size = 0; + + /* Now, update the BTree etc. */ + if (entry_info->display) { + CTRACE((tfp, "Adding file to BTree: %s\n", + entry_info->filename)); + HTBTree_add(bt, entry_info); + } + + } /* End while HTVMSreaddir() */ + + FREE(pathname); + HTVMSclosedir(dp); + + START(HTML_PRE); + /* + * Run through the BTree printing out in order + */ + { + HTBTElement *ele; + int i; + + for (ele = HTBTree_next(bt, NULL); + ele != NULL; + ele = HTBTree_next(bt, ele)) { + entry_info = (VMSEntryInfo *) HTBTree_object(ele); + + /* Output the date */ + if (entry_info->date) { + PUTS(entry_info->date); + PUTS(" "); + } else + PUTS(" * "); + + /* Output the type */ + if (entry_info->type) { + for (i = 0; entry_info->type[i] != '\0' && i < 15; i++) + PUTC(entry_info->type[i]); + for (; i < 17; i++) + PUTC(' '); + + } + + /* Output the link for the name */ + HTDirEntry(target, tail, entry_info->filename); + PUTS(entry_info->filename); + END(HTML_A); + + /* Output the size */ + if (entry_info->size) { + if (entry_info->size < 1024) + sprintf(string_buffer, " %d bytes", + entry_info->size); + else + sprintf(string_buffer, " %dKb", + entry_info->size / 1024); + PUTS(string_buffer); + } + + PUTC('\n'); /* end of this entry */ + + free_VMSEntryInfo_contents(entry_info); + } + } + + HTBTreeAndObject_free(bt); + + } /* End of both BTree loops */ + + /* + * Complete the output stream. + */ + END(HTML_PRE); + PUTC('\n'); + END(HTML_BODY); + PUTC('\n'); + END(HTML_HTML); + PUTC('\n'); + FREE(tail); + FREE_TARGET; + + return HT_LOADED; + +} /* End of directory reading section */ + +/* + * Remove all versions of the given file. We assume there are no permissions + * problems, since we do this mainly for removing temporary files. + */ +int HTVMS_remove(char *filename) +{ + int code = remove(filename); /* return the first status code */ + + while (remove(filename) == 0) ; + return code; +} + +/* + * Remove all older versions of the given file. We may fail to remove some + * version due to permissions -- the loop stops either at that point, or when + * we run out of older versions to remove. + */ +void HTVMS_purge(char *filename) +{ + char *older_file = 0; + char *oldest_file = 0; + struct stat sb; + + StrAllocCopy(older_file, filename); + StrAllocCat(older_file, ";-1"); + + while (remove(older_file) == 0) ; + /* + * If we do not have any more older versions, it is safe to rename the + * current file to version #1. + */ + if (stat(older_file, &sb) != 0) { + StrAllocCopy(oldest_file, filename); + StrAllocCat(oldest_file, ";1"); + rename(older_file, oldest_file); + FREE(oldest_file); + } + + FREE(older_file); +} +#endif /* VMS */ diff --git a/WWW/Library/Implementation/HTVMSUtils.h b/WWW/Library/Implementation/HTVMSUtils.h new file mode 100644 index 00000000..d7efe8c4 --- /dev/null +++ b/WWW/Library/Implementation/HTVMSUtils.h @@ -0,0 +1,101 @@ +/* VMS specific routines + + */ + +#ifndef HTVMSUTIL_H +#define HTVMSUTIL_H + +#ifndef HTUTILS_H +#include <HTUtils.h> +#endif + +#include <HTAnchor.h> + +#ifdef __cplusplus +extern "C" { +#endif + extern BOOL HTVMSFileVersions; /* Include version numbers in listing? */ + +/* PUBLIC HTVMS_authSysPrv() + * CHECKS IF THIS PROCESS IS AUTHORIZED TO ENABLE SYSPRV + * ON ENTRY: + * No arguments. + * + * ON EXIT: + * returns YES if SYSPRV is authorized + */ + extern BOOL HTVMS_authSysPrv(void); + +/* PUBLIC HTVMS_enableSysPrv() + * ENABLES SYSPRV + * ON ENTRY: + * No arguments. + * + * ON EXIT: + * + */ + extern void HTVMS_enableSysPrv(void); + +/* PUBLIC HTVMS_disableSysPrv() + * DISABLES SYSPRV + * ON ENTRY: + * No arguments. + * + * ON EXIT: + * + */ + extern void HTVMS_disableSysPrv(void); + +/* PUBLIC HTVMS_checkAccess() + * CHECKS ACCESS TO FILE FOR CERTAIN USER + * ON ENTRY: + * FileName The file to be accessed + * UserName Name of the user to check access for + * + * ON EXIT: + * returns YES if access is allowed + * + */ + extern BOOL HTVMS_checkAccess(const char *FileName, + const char *UserName, + const char *Method); + +/* PUBLIC HTVMS_wwwName() + * CONVERTS VMS Name into WWW Name + * ON ENTRY: + * vmsname VMS file specification (NO NODE) + * + * ON EXIT: + * returns www file specification + * + * EXAMPLES: + * vmsname wwwname + * DISK$USER disk$user + * DISK$USER: /disk$user/ + * DISK$USER:[DUNS] /disk$user/duns + * DISK$USER:[DUNS.ECHO] /disk$user/duns/echo + * [DUNS] duns + * [DUNS.ECHO] duns/echo + * [DUNS.ECHO.-.TRANS] duns/echo/../trans + * [DUNS.ECHO.--.TRANS] duns/echo/../../trans + * [.DUNS] duns + * [.DUNS.ECHO] duns/echo + * [.DUNS.ECHO]TEST.COM duns/echo/test.com + * TEST.COM test.com + * + * + */ + const extern char *HTVMS_wwwName(const char *vmsname); + + extern int HTVMSBrowseDir(const char *address, + HTParentAnchor *anchor, + HTFormat format_out, + HTStream *sink); + + extern int HTVMS_remove(char *filename); + extern void HTVMS_purge(char *filename); + +#ifdef __cplusplus +} +#endif +#endif /* not HTVMSUTIL_H */ diff --git a/WWW/Library/Implementation/HTVMS_WaisProt.c b/WWW/Library/Implementation/HTVMS_WaisProt.c new file mode 100644 index 00000000..30feae62 --- /dev/null +++ b/WWW/Library/Implementation/HTVMS_WaisProt.c @@ -0,0 +1,2469 @@ +/* + * $LynxId: HTVMS_WaisProt.c,v 1.9 2010/09/24 23:51:22 tom Exp $ + * + * HTVMS_WAISProt.c + * + * Adaptation for Lynx by F.Macrides (macrides@sci.wfeb.edu) + * + * 31-May-1994 FM Initial version. + * + *----------------------------------------------------------------------*/ + +/* + * Routines originally from WProt.c -- FM + * + *----------------------------------------------------------------------*/ +/* WIDE AREA INFORMATION SERVER SOFTWARE: + * No guarantees or restrictions. See the readme file for the full standard + * disclaimer. + + * 3.26.90 Harry Morris, morris@think.com + * 3.30.90 Harry Morris + * - removed chunk code from WAISSearchAPDU, + * - added makeWAISQueryType1Query() and readWAISType1Query() which replace + * makeWAISQueryTerms() and makeWAISQueryDocs(). + * 4.11.90 HWM - generalized conditional includes (see c-dialect.h) + * - renamed makeWAISType1Query() to makeWAISTextQuery() + * renamed readWAISType1Query() to readWAISTextQuery() + * 5.29.90 TS - fixed bug in makeWAISQueryDocs + * added CSTFreeWAISFoo functions + */ + +#define _C_WAIS_protocol_ + +/* This file implements the Z39.50 extensions required for WAIS +*/ + +#include <HTUtils.h> +#include <HTVMS_WaisUI.h> +#include <HTVMS_WaisProt.h> + +#include <LYLeaks.h> + +/* very rough estimates of the size of an object */ +#define DefWAISInitResponseSize (size_t)200 +#define DefWAISSearchSize (size_t)3000 +#define DefWAISSearchResponseSize (size_t)6000 +#define DefWAISPresentSize (size_t)1000 +#define DefWAISPresentResponseSize (size_t)6000 +#define DefWAISDocHeaderSize (size_t)500 +#define DefWAISShortHeaderSize (size_t)200 +#define DefWAISLongHeaderSize (size_t)800 +#define DefWAISDocTextSize (size_t)6000 +#define DefWAISDocHeadlineSize (size_t)500 +#define DefWAISDocCodeSize (size_t)500 + +#define RESERVE_SPACE_FOR_WAIS_HEADER(len) \ + if (*len > 0) \ + *len -= header_len; + +#define S_MALLOC(type) (type*)s_malloc(sizeof(type)) +#define S_MALLOC2(type) (type*)s_malloc(sizeof(type) * 2) + +#define S_REALLOC2(type, ptr, num) (type*)s_realloc((char*)ptr, (sizeof(type) * (num + 2))) + +/*----------------------------------------------------------------------*/ + +static unsigned long userInfoTagSize(data_tag tag, + unsigned long length) +/* return the number of bytes required to write the user info tag and + length + */ +{ + unsigned long size; + + /* calculate bytes required to represent tag. max tag is 16K */ + size = writtenCompressedIntSize(tag); + size += writtenCompressedIntSize(length); + + return (size); +} + +/*----------------------------------------------------------------------*/ + +static char *writeUserInfoHeader(data_tag tag, + long infoSize, + long estHeaderSize, + char *buffer, + long *len) +/* write the tag and size, making sure the info fits. return the true end + of the info (after adjustment) note that the argument infoSize includes + estHeaderSize. Note that the argument len is the number of bytes remaining + in the buffer. Since we write the tag and size at the begining of the + buffer (in space that we reserved) we don't want to pass len the calls which + do that writing. + */ +{ + long dummyLen = 100; /* plenty of space for a tag and size */ + char *buf = buffer; + long realSize = infoSize - estHeaderSize; + long realHeaderSize = userInfoTagSize(tag, realSize); + + if (buffer == NULL || *len == 0) + return (NULL); + + /* write the tag */ + buf = writeTag(tag, buf, &dummyLen); + + /* see if the if the header size was correct. if not, + we have to shift the info to fit the real header size */ + if (estHeaderSize != realHeaderSize) { /* make sure there is enough space */ + CHECK_FOR_SPACE_LEFT(realHeaderSize - estHeaderSize, len); + memmove(buffer + realHeaderSize, buffer + estHeaderSize, (size_t) (realSize)); + } + + /* write the size */ + writeCompressedInteger(realSize, buf, &dummyLen); + + /* return the true end of buffer */ + return (buffer + realHeaderSize + realSize); +} + +/*----------------------------------------------------------------------*/ + +static char *readUserInfoHeader(data_tag *tag, + unsigned long *num, + char *buffer) +/* read the tag and size */ +{ + char *buf = buffer; + + buf = readTag(tag, buf); + buf = readCompressedInteger(num, buf); + return (buf); +} + +/*----------------------------------------------------------------------*/ + +WAISInitResponse *makeWAISInitResponse(long chunkCode, + long chunkIDLen, + char *chunkMarker, + char *highlightMarker, + char *deHighlightMarker, + char *newLineChars) +/* create a WAIS init response object */ +{ + WAISInitResponse *init = S_MALLOC(WAISInitResponse); + + init->ChunkCode = chunkCode; /* note: none are copied! */ + init->ChunkIDLength = chunkIDLen; + init->ChunkMarker = chunkMarker; + init->HighlightMarker = highlightMarker; + init->DeHighlightMarker = deHighlightMarker; + init->NewlineCharacters = newLineChars; + + return (init); +} + +/*----------------------------------------------------------------------*/ + +void freeWAISInitResponse(WAISInitResponse *init) +/* free an object made with makeWAISInitResponse */ +{ + s_free(init->ChunkMarker); + s_free(init->HighlightMarker); + s_free(init->DeHighlightMarker); + s_free(init->NewlineCharacters); + s_free(init); +} + +/*----------------------------------------------------------------------*/ + +char *writeInitResponseInfo(InitResponseAPDU *init, + char *buffer, + long *len) +/* write an init response object */ +{ + unsigned long header_len = userInfoTagSize(DT_UserInformationLength, + DefWAISInitResponseSize); + char *buf = buffer + header_len; + WAISInitResponse *info = (WAISInitResponse *) init->UserInformationField; + unsigned long size; + + RESERVE_SPACE_FOR_WAIS_HEADER(len); + + buf = writeNum(info->ChunkCode, DT_ChunkCode, buf, len); + buf = writeNum(info->ChunkIDLength, DT_ChunkIDLength, buf, len); + buf = writeString(info->ChunkMarker, DT_ChunkMarker, buf, len); + buf = writeString(info->HighlightMarker, DT_HighlightMarker, buf, len); + buf = writeString(info->DeHighlightMarker, DT_DeHighlightMarker, buf, len); + buf = writeString(info->NewlineCharacters, DT_NewlineCharacters, buf, len); + + /* now write the header and size */ + size = buf - buffer; + buf = writeUserInfoHeader(DT_UserInformationLength, + size, + header_len, + buffer, + len); + + return (buf); +} + +/*----------------------------------------------------------------------*/ + +char *readInitResponseInfo(void **info, + char *buffer) +/* read an init response object */ +{ + char *buf = buffer; + unsigned long size; + unsigned long headerSize; + long chunkCode, chunkIDLen; + data_tag tag1; + char *chunkMarker = NULL; + char *highlightMarker = NULL; + char *deHighlightMarker = NULL; + char *newLineChars = NULL; + + chunkCode = chunkIDLen = UNUSED; + + buf = readUserInfoHeader(&tag1, &size, buf); + headerSize = buf - buffer; + + while (buf < (buffer + size + headerSize)) { + data_tag tag = peekTag(buf); + + switch (tag) { + case DT_ChunkCode: + buf = readNum(&chunkCode, buf); + break; + case DT_ChunkIDLength: + buf = readNum(&chunkIDLen, buf); + break; + case DT_ChunkMarker: + buf = readString(&chunkMarker, buf); + break; + case DT_HighlightMarker: + buf = readString(&highlightMarker, buf); + break; + case DT_DeHighlightMarker: + buf = readString(&deHighlightMarker, buf); + break; + case DT_NewlineCharacters: + buf = readString(&newLineChars, buf); + break; + default: + s_free(highlightMarker); + s_free(deHighlightMarker); + s_free(newLineChars); + REPORT_READ_ERROR(buf); + break; + } + } + + *info = (void *) makeWAISInitResponse(chunkCode, chunkIDLen, chunkMarker, + highlightMarker, deHighlightMarker, + newLineChars); + return (buf); +} + +/*----------------------------------------------------------------------*/ + +WAISSearch *makeWAISSearch(char *seedWords, + DocObj **docs, + char **textList, + long dateFactor, + char *beginDateRange, + char *endDateRange, + long maxDocsRetrieved) + +/* create a type 3 query object */ +{ + WAISSearch *query = S_MALLOC(WAISSearch); + + query->SeedWords = seedWords; /* not copied! */ + query->Docs = docs; /* not copied! */ + query->TextList = textList; /* not copied! */ + query->DateFactor = dateFactor; + query->BeginDateRange = beginDateRange; + query->EndDateRange = endDateRange; + query->MaxDocumentsRetrieved = maxDocsRetrieved; + + return (query); +} + +/*----------------------------------------------------------------------*/ + +void freeWAISSearch(WAISSearch *query) + +/* destroy an object made with makeWAISSearch() */ +{ + void *ptr = NULL; + long i; + + s_free(query->SeedWords); + + if (query->Docs != NULL) + for (i = 0, ptr = (void *) query->Docs[i]; + ptr != NULL; + ptr = (void *) query->Docs[++i]) + freeDocObj((DocObj *) ptr); + s_free(query->Docs); + + if (query->TextList != NULL) /* XXX revisit when textlist is fully defined */ + for (i = 0, ptr = (void *) query->TextList[i]; + ptr != NULL; + ptr = (void *) query->TextList[++i]) + s_free(ptr); + s_free(query->TextList); + + s_free(query->BeginDateRange); + s_free(query->EndDateRange); + s_free(query); +} + +/*----------------------------------------------------------------------*/ + +DocObj *makeDocObjUsingWholeDocument(any *docID, + char *type) + +/* construct a document object using byte chunks - only for use by + servers */ +{ + DocObj *doc = S_MALLOC(DocObj); + + doc->DocumentID = docID; /* not copied! */ + doc->Type = type; /* not copied! */ + doc->ChunkCode = CT_document; + return (doc); +} + +/*----------------------------------------------------------------------*/ + +DocObj *makeDocObjUsingLines(any *docID, + char *type, + long start, + long end) + +/* construct a document object using line chunks - only for use by + servers */ +{ + DocObj *doc = S_MALLOC(DocObj); + + doc->ChunkCode = CT_line; + doc->DocumentID = docID; /* not copied */ + doc->Type = type; /* not copied! */ + doc->ChunkStart.Pos = start; + doc->ChunkEnd.Pos = end; + return (doc); +} + +/*----------------------------------------------------------------------*/ + +DocObj *makeDocObjUsingBytes(any *docID, + char *type, + long start, + long end) + +/* construct a document object using byte chunks - only for use by + servers */ +{ + DocObj *doc = S_MALLOC(DocObj); + + doc->ChunkCode = CT_byte; + doc->DocumentID = docID; /* not copied */ + doc->Type = type; /* not copied! */ + doc->ChunkStart.Pos = start; + doc->ChunkEnd.Pos = end; + return (doc); +} + +/*----------------------------------------------------------------------*/ + +DocObj *makeDocObjUsingParagraphs(any *docID, + char *type, + any *start, + any *end) + +/* construct a document object using byte chunks - only for use by + servers */ +{ + DocObj *doc = S_MALLOC(DocObj); + + doc->ChunkCode = CT_paragraph; + doc->DocumentID = docID; /* not copied */ + doc->Type = type; + doc->ChunkStart.ID = start; + doc->ChunkEnd.ID = end; + return (doc); +} + +/*----------------------------------------------------------------------*/ + +void freeDocObj(DocObj *doc) + +/* free a docObj */ +{ + freeAny(doc->DocumentID); + s_free(doc->Type); + if (doc->ChunkCode == CT_paragraph) { + freeAny(doc->ChunkStart.ID); + freeAny(doc->ChunkEnd.ID); + } + s_free(doc); +} + +/*----------------------------------------------------------------------*/ + +static char *writeDocObj(DocObj *doc, + char *buffer, + long *len) + +/* write as little as we can about the doc obj */ +{ + char *buf = buffer; + + /* we alwasy have to write the id, but its tag depends on if its a chunk */ + if (doc->ChunkCode == CT_document) + buf = writeAny(doc->DocumentID, DT_DocumentID, buf, len); + else + buf = writeAny(doc->DocumentID, DT_DocumentIDChunk, buf, len); + + if (doc->Type != NULL) + buf = writeString(doc->Type, DT_TYPE, buf, len); + + switch (doc->ChunkCode) { + case CT_document: + /* do nothing - there is no chunk data */ + break; + case CT_byte: + case CT_line: + buf = writeNum(doc->ChunkCode, DT_ChunkCode, buf, len); + buf = writeNum(doc->ChunkStart.Pos, DT_ChunkStartID, buf, len); + buf = writeNum(doc->ChunkEnd.Pos, DT_ChunkEndID, buf, len); + break; + case CT_paragraph: + buf = writeNum(doc->ChunkCode, DT_ChunkCode, buf, len); + buf = writeAny(doc->ChunkStart.ID, DT_ChunkStartID, buf, len); + buf = writeAny(doc->ChunkEnd.ID, DT_ChunkEndID, buf, len); + break; + default: + panic("Implementation error: unknown chuck type %ld", + doc->ChunkCode); + break; + } + + return (buf); +} + +/*----------------------------------------------------------------------*/ + +static char *readDocObj(DocObj **doc, + char *buffer) + +/* read whatever we have about the new document */ +{ + char *buf = buffer; + data_tag tag; + + *doc = S_MALLOC(DocObj); + + tag = peekTag(buf); + buf = readAny(&((*doc)->DocumentID), buf); + + if (tag == DT_DocumentID) { + (*doc)->ChunkCode = CT_document; + tag = peekTag(buf); + if (tag == DT_TYPE) /* XXX depends on DT_TYPE != what comes next */ + buf = readString(&((*doc)->Type), buf); + /* ChunkStart and ChunkEnd are undefined */ + } else if (tag == DT_DocumentIDChunk) { + boolean readParagraphs = false; /* for cleanup */ + + tag = peekTag(buf); + if (tag == DT_TYPE) /* XXX depends on DT_TYPE != CT_FOO */ + buf = readString(&((*doc)->Type), buf); + buf = readNum(&((*doc)->ChunkCode), buf); + switch ((*doc)->ChunkCode) { + case CT_byte: + case CT_line: + buf = readNum(&((*doc)->ChunkStart.Pos), buf); + buf = readNum(&((*doc)->ChunkEnd.Pos), buf); + break; + case CT_paragraph: + buf = readAny(&((*doc)->ChunkStart.ID), buf); + buf = readAny(&((*doc)->ChunkEnd.ID), buf); + break; + default: + freeAny((*doc)->DocumentID); + if (readParagraphs) { + freeAny((*doc)->ChunkStart.ID); + freeAny((*doc)->ChunkEnd.ID); + } + s_free(doc); + REPORT_READ_ERROR(buf); + break; + } + } else { + freeAny((*doc)->DocumentID); + s_free(*doc); + REPORT_READ_ERROR(buf); + } + return (buf); +} + +/*----------------------------------------------------------------------*/ + +char *writeSearchInfo(SearchAPDU *query, + char *buffer, + long *len) + +/* write out a WAIS query (type 1 or 3) */ +{ + if (strcmp(query->QueryType, QT_TextRetrievalQuery) == 0) { + return (writeAny((any *) query->Query, DT_Query, buffer, len)); + } else { + unsigned long header_len = userInfoTagSize(DT_UserInformationLength, + DefWAISSearchSize); + char *buf = buffer + header_len; + WAISSearch *info = (WAISSearch *) query->Query; + unsigned long size; + long i; + + RESERVE_SPACE_FOR_WAIS_HEADER(len); + + buf = writeString(info->SeedWords, DT_SeedWords, buf, len); + + if (info->Docs != NULL) { + for (i = 0; info->Docs[i] != NULL; i++) { + buf = writeDocObj(info->Docs[i], buf, len); + } + } + + /* XXX text list */ + + buf = writeNum(info->DateFactor, + DT_DateFactor, + buf, + len); + buf = writeString(info->BeginDateRange, + DT_BeginDateRange, + buf, + len); + buf = writeString(info->EndDateRange, + DT_EndDateRange, + buf, + len); + buf = writeNum(info->MaxDocumentsRetrieved, + DT_MaxDocumentsRetrieved, + buf, + len); + + /* now write the header and size */ + size = buf - buffer; + buf = writeUserInfoHeader(DT_UserInformationLength, + size, + header_len, + buffer, + len); + + return (buf); + } +} + +/*----------------------------------------------------------------------*/ + +char *readSearchInfo(void **info, + char *buffer) + +/* read a WAIS query (type 1 or 3) */ +{ + data_tag type = peekTag(buffer); + + if (type == DT_Query) /* this is a type 1 query */ + { + char *buf = buffer; + any *query = NULL; + + buf = readAny(&query, buf); + *info = (void *) query; + return (buf); + } else { /* a type 3 query */ + char *buf = buffer; + unsigned long size; + unsigned long headerSize; + data_tag tag1; + char *seedWords = NULL; + char *beginDateRange = NULL; + char *endDateRange = NULL; + long dateFactor, maxDocsRetrieved; + char **textList = NULL; + DocObj **docIDs = NULL; + DocObj *doc = NULL; + long docs = 0; + long i; + void *ptr = NULL; + + dateFactor = maxDocsRetrieved = UNUSED; + + buf = readUserInfoHeader(&tag1, &size, buf); + headerSize = buf - buffer; + + while (buf < (buffer + size + headerSize)) { + data_tag tag = peekTag(buf); + + switch (tag) { + case DT_SeedWords: + buf = readString(&seedWords, buf); + break; + case DT_DocumentID: + case DT_DocumentIDChunk: + if (docIDs == NULL) /* create a new doc list */ + { + docIDs = S_MALLOC2(DocObj *); + } else { /* grow the doc list */ + docIDs = S_REALLOC2(DocObj *, docIDs, docs); + } + buf = readDocObj(&doc, buf); + if (buf == NULL) { + s_free(seedWords); + s_free(beginDateRange); + s_free(endDateRange); + if (docIDs != NULL) + for (i = 0, ptr = (void *) docIDs[i]; + ptr != NULL; + ptr = (void *) docIDs[++i]) + freeDocObj((DocObj *) ptr); + s_free(docIDs); + /* XXX should also free textlist when it is fully defined */ + } + RETURN_ON_NULL(buf); + docIDs[docs++] = doc; /* put it in the list */ + docIDs[docs] = NULL; + break; + case DT_TextList: + /* XXX */ + break; + case DT_DateFactor: + buf = readNum(&dateFactor, buf); + break; + case DT_BeginDateRange: + buf = readString(&beginDateRange, buf); + break; + case DT_EndDateRange: + buf = readString(&endDateRange, buf); + break; + case DT_MaxDocumentsRetrieved: + buf = readNum(&maxDocsRetrieved, buf); + break; + default: + s_free(seedWords); + s_free(beginDateRange); + s_free(endDateRange); + if (docIDs != NULL) + for (i = 0, ptr = (void *) docIDs[i]; + ptr != NULL; + ptr = (void *) docIDs[++i]) + freeDocObj((DocObj *) ptr); + s_free(docIDs); + /* XXX should also free textlist when it is fully defined */ + REPORT_READ_ERROR(buf); + break; + } + } + + *info = (void *) makeWAISSearch(seedWords, docIDs, textList, + dateFactor, beginDateRange, endDateRange, + maxDocsRetrieved); + return (buf); + } +} + +/*----------------------------------------------------------------------*/ + +WAISDocumentHeader *makeWAISDocumentHeader(any *docID, + long versionNumber, + long score, + long bestMatch, + long docLen, + long lines, + char **types, + char *source, + char *date, + char *headline, + char *originCity) + +/* construct a standard document header, note that no fields are copied! + if the application needs to save these fields, it should copy them, + or set the field in this object to NULL before freeing it. + */ +{ + WAISDocumentHeader *header = S_MALLOC(WAISDocumentHeader); + + header->DocumentID = docID; + header->VersionNumber = versionNumber; + header->Score = score; + header->BestMatch = bestMatch; + header->DocumentLength = docLen; + header->Lines = lines; + header->Types = types; + header->Source = source; + header->Date = date; + header->Headline = headline; + header->OriginCity = originCity; + + return (header); +} + +/*----------------------------------------------------------------------*/ + +void freeWAISDocumentHeader(WAISDocumentHeader *header) +{ + freeAny(header->DocumentID); + doList((void **) header->Types, fs_free); /* can't use the macro here ! */ + s_free(header->Types); + s_free(header->Source); + s_free(header->Date); + s_free(header->Headline); + s_free(header->OriginCity); + s_free(header); +} + +/*----------------------------------------------------------------------*/ + +char *writeWAISDocumentHeader(WAISDocumentHeader *header, char *buffer, + long *len) +{ + unsigned long header_len = userInfoTagSize(DT_DocumentHeaderGroup, + DefWAISDocHeaderSize); + char *buf = buffer + header_len; + unsigned long size1; + + RESERVE_SPACE_FOR_WAIS_HEADER(len); + + buf = writeAny(header->DocumentID, DT_DocumentID, buf, len); + buf = writeNum(header->VersionNumber, DT_VersionNumber, buf, len); + buf = writeNum(header->Score, DT_Score, buf, len); + buf = writeNum(header->BestMatch, DT_BestMatch, buf, len); + buf = writeNum(header->DocumentLength, DT_DocumentLength, buf, len); + buf = writeNum(header->Lines, DT_Lines, buf, len); + if (header->Types != NULL) { + long size; + char *ptr = NULL; + long i; + + buf = writeTag(DT_TYPE_BLOCK, buf, len); + for (i = 0, size = 0, ptr = header->Types[i]; + ptr != NULL; + ptr = header->Types[++i]) { + long typeSize = strlen(ptr); + + size += writtenTagSize(DT_TYPE); + size += writtenCompressedIntSize(typeSize); + size += typeSize; + } + buf = writeCompressedInteger((unsigned long) size, buf, len); + for (i = 0, ptr = header->Types[i]; ptr != NULL; ptr = header->Types[++i]) + buf = writeString(ptr, DT_TYPE, buf, len); + } + buf = writeString(header->Source, DT_Source, buf, len); + buf = writeString(header->Date, DT_Date, buf, len); + buf = writeString(header->Headline, DT_Headline, buf, len); + buf = writeString(header->OriginCity, DT_OriginCity, buf, len); + + /* now write the header and size */ + size1 = buf - buffer; + buf = writeUserInfoHeader(DT_DocumentHeaderGroup, + size1, + header_len, + buffer, + len); + + return (buf); +} + +/*----------------------------------------------------------------------*/ + +char *readWAISDocumentHeader(WAISDocumentHeader **header, char *buffer) +{ + char *buf = buffer; + unsigned long size1; + unsigned long headerSize; + data_tag tag1; + any *docID = NULL; + long versionNumber, score, bestMatch, docLength, lines; + char **types = NULL; + char *source = NULL; + char *date = NULL; + char *headline = NULL; + char *originCity = NULL; + + versionNumber = score = bestMatch = docLength = lines = UNUSED; + + buf = readUserInfoHeader(&tag1, &size1, buf); + headerSize = buf - buffer; + + while (buf < (buffer + size1 + headerSize)) { + data_tag tag = peekTag(buf); + + switch (tag) { + case DT_DocumentID: + buf = readAny(&docID, buf); + break; + case DT_VersionNumber: + buf = readNum(&versionNumber, buf); + break; + case DT_Score: + buf = readNum(&score, buf); + break; + case DT_BestMatch: + buf = readNum(&bestMatch, buf); + break; + case DT_DocumentLength: + buf = readNum(&docLength, buf); + break; + case DT_Lines: + buf = readNum(&lines, buf); + break; + case DT_TYPE_BLOCK: + { + unsigned long size = -1; + long numTypes = 0; + + buf = readTag(&tag, buf); + buf = readCompressedInteger(&size, buf); + while (size > 0) { + char *type = NULL; + char *originalBuf = buf; + + buf = readString(&type, buf); + types = S_REALLOC2(char *, types, numTypes); + + types[numTypes++] = type; + types[numTypes] = NULL; + size -= (buf - originalBuf); + } + } + /* FALLTHRU */ + case DT_Source: + buf = readString(&source, buf); + break; + case DT_Date: + buf = readString(&date, buf); + break; + case DT_Headline: + buf = readString(&headline, buf); + break; + case DT_OriginCity: + buf = readString(&originCity, buf); + break; + default: + freeAny(docID); + s_free(source); + s_free(date); + s_free(headline); + s_free(originCity); + REPORT_READ_ERROR(buf); + break; + } + } + + *header = makeWAISDocumentHeader(docID, versionNumber, score, bestMatch, + docLength, lines, types, source, date, headline, + originCity); + return (buf); +} + +/*----------------------------------------------------------------------*/ + +WAISDocumentShortHeader *makeWAISDocumentShortHeader(any *docID, + long versionNumber, + long score, + long bestMatch, + long docLen, + long lines) +/* construct a short document header, note that no fields are copied! + if the application needs to save these fields, it should copy them, + or set the field in this object to NULL before freeing it. + */ +{ + WAISDocumentShortHeader *header = S_MALLOC(WAISDocumentShortHeader); + + header->DocumentID = docID; + header->VersionNumber = versionNumber; + header->Score = score; + header->BestMatch = bestMatch; + header->DocumentLength = docLen; + header->Lines = lines; + + return (header); +} + +/*----------------------------------------------------------------------*/ + +void freeWAISDocumentShortHeader(WAISDocumentShortHeader *header) +{ + freeAny(header->DocumentID); + s_free(header); +} + +/*----------------------------------------------------------------------*/ + +char *writeWAISDocumentShortHeader(WAISDocumentShortHeader *header, char *buffer, + long *len) +{ + unsigned long header_len = userInfoTagSize(DT_DocumentShortHeaderGroup, + DefWAISShortHeaderSize); + char *buf = buffer + header_len; + unsigned long size; + + RESERVE_SPACE_FOR_WAIS_HEADER(len); + + buf = writeAny(header->DocumentID, DT_DocumentID, buf, len); + buf = writeNum(header->VersionNumber, DT_VersionNumber, buf, len); + buf = writeNum(header->Score, DT_Score, buf, len); + buf = writeNum(header->BestMatch, DT_BestMatch, buf, len); + buf = writeNum(header->DocumentLength, DT_DocumentLength, buf, len); + buf = writeNum(header->Lines, DT_Lines, buf, len); + + /* now write the header and size */ + size = buf - buffer; + buf = writeUserInfoHeader(DT_DocumentShortHeaderGroup, + size, + header_len, + buffer, + len); + + return (buf); +} + +/*----------------------------------------------------------------------*/ + +char *readWAISDocumentShortHeader(WAISDocumentShortHeader **header, char *buffer) +{ + char *buf = buffer; + unsigned long size; + unsigned long headerSize; + data_tag tag1; + any *docID = NULL; + long versionNumber, score, bestMatch, docLength, lines; + + versionNumber = score = bestMatch = docLength = lines = UNUSED; + + buf = readUserInfoHeader(&tag1, &size, buf); + headerSize = buf - buffer; + + while (buf < (buffer + size + headerSize)) { + data_tag tag = peekTag(buf); + + switch (tag) { + case DT_DocumentID: + buf = readAny(&docID, buf); + break; + case DT_VersionNumber: + buf = readNum(&versionNumber, buf); + break; + case DT_Score: + buf = readNum(&score, buf); + break; + case DT_BestMatch: + buf = readNum(&bestMatch, buf); + break; + case DT_DocumentLength: + buf = readNum(&docLength, buf); + break; + case DT_Lines: + buf = readNum(&lines, buf); + break; + default: + freeAny(docID); + REPORT_READ_ERROR(buf); + break; + } + } + + *header = makeWAISDocumentShortHeader(docID, versionNumber, score, bestMatch, + docLength, lines); + return (buf); +} + +/*----------------------------------------------------------------------*/ + +WAISDocumentLongHeader *makeWAISDocumentLongHeader(any *docID, + long versionNumber, + long score, + long bestMatch, + long docLen, + long lines, + char **types, + char *source, + char *date, + char *headline, + char *originCity, + char *stockCodes, + char *companyCodes, + char *industryCodes) +/* construct a long document header, note that no fields are copied! + if the application needs to save these fields, it should copy them, + or set the field in this object to NULL before freeing it. + */ +{ + WAISDocumentLongHeader *header = S_MALLOC(WAISDocumentLongHeader); + + header->DocumentID = docID; + header->VersionNumber = versionNumber; + header->Score = score; + header->BestMatch = bestMatch; + header->DocumentLength = docLen; + header->Lines = lines; + header->Types = types; + header->Source = source; + header->Date = date; + header->Headline = headline; + header->OriginCity = originCity; + header->StockCodes = stockCodes; + header->CompanyCodes = companyCodes; + header->IndustryCodes = industryCodes; + + return (header); +} + +/*----------------------------------------------------------------------*/ + +void freeWAISDocumentLongHeader(WAISDocumentLongHeader *header) +{ + freeAny(header->DocumentID); + doList((void **) header->Types, fs_free); /* can't use the macro here! */ + s_free(header->Source); + s_free(header->Date); + s_free(header->Headline); + s_free(header->OriginCity); + s_free(header->StockCodes); + s_free(header->CompanyCodes); + s_free(header->IndustryCodes); + s_free(header); +} + +/*----------------------------------------------------------------------*/ + +char *writeWAISDocumentLongHeader(WAISDocumentLongHeader *header, char *buffer, + long *len) +{ + unsigned long header_len = userInfoTagSize(DT_DocumentLongHeaderGroup, + DefWAISLongHeaderSize); + char *buf = buffer + header_len; + unsigned long size1; + + RESERVE_SPACE_FOR_WAIS_HEADER(len); + + buf = writeAny(header->DocumentID, DT_DocumentID, buf, len); + buf = writeNum(header->VersionNumber, DT_VersionNumber, buf, len); + buf = writeNum(header->Score, DT_Score, buf, len); + buf = writeNum(header->BestMatch, DT_BestMatch, buf, len); + buf = writeNum(header->DocumentLength, DT_DocumentLength, buf, len); + buf = writeNum(header->Lines, DT_Lines, buf, len); + if (header->Types != NULL) { + long size; + char *ptr = NULL; + long i; + + buf = writeTag(DT_TYPE_BLOCK, buf, len); + for (i = 0, size = 0, ptr = header->Types[i]; + ptr != NULL; + ptr = header->Types[++i]) { + long typeSize = strlen(ptr); + + size += writtenTagSize(DT_TYPE); + size += writtenCompressedIntSize(typeSize); + size += typeSize; + } + buf = writeCompressedInteger((unsigned long) size, buf, len); + for (i = 0, ptr = header->Types[i]; ptr != NULL; ptr = header->Types[++i]) + buf = writeString(ptr, DT_TYPE, buf, len); + } + buf = writeString(header->Source, DT_Source, buf, len); + buf = writeString(header->Date, DT_Date, buf, len); + buf = writeString(header->Headline, DT_Headline, buf, len); + buf = writeString(header->OriginCity, DT_OriginCity, buf, len); + buf = writeString(header->StockCodes, DT_StockCodes, buf, len); + buf = writeString(header->CompanyCodes, DT_CompanyCodes, buf, len); + buf = writeString(header->IndustryCodes, DT_IndustryCodes, buf, len); + + /* now write the header and size */ + size1 = buf - buffer; + buf = writeUserInfoHeader(DT_DocumentLongHeaderGroup, + size1, + header_len, + buffer, + len); + + return (buf); +} + +/*----------------------------------------------------------------------*/ + +char *readWAISDocumentLongHeader(WAISDocumentLongHeader **header, char *buffer) +{ + char *buf = buffer; + unsigned long size1; + unsigned long headerSize; + data_tag tag1; + any *docID; + long versionNumber, score, bestMatch, docLength, lines; + char **types; + char *source, *date, *headline, *originCity, *stockCodes, *companyCodes, *industryCodes; + + docID = NULL; + versionNumber = + score = + bestMatch = + docLength = + lines = UNUSED; + types = NULL; + source = + date = + headline = + originCity = + stockCodes = + companyCodes = + industryCodes = NULL; + + buf = readUserInfoHeader(&tag1, &size1, buf); + headerSize = buf - buffer; + + while (buf < (buffer + size1 + headerSize)) { + data_tag tag = peekTag(buf); + + switch (tag) { + case DT_DocumentID: + buf = readAny(&docID, buf); + break; + case DT_VersionNumber: + buf = readNum(&versionNumber, buf); + break; + case DT_Score: + buf = readNum(&score, buf); + break; + case DT_BestMatch: + buf = readNum(&bestMatch, buf); + break; + case DT_DocumentLength: + buf = readNum(&docLength, buf); + break; + case DT_Lines: + buf = readNum(&lines, buf); + break; + case DT_TYPE_BLOCK: + { + unsigned long size = -1; + long numTypes = 0; + + buf = readTag(&tag, buf); + readCompressedInteger(&size, buf); + while (size > 0) { + char *type = NULL; + char *originalBuf = buf; + + buf = readString(&type, buf); + types = S_REALLOC2(char *, types, numTypes); + + types[numTypes++] = type; + types[numTypes] = NULL; + size -= (buf - originalBuf); + } + } + /* FALLTHRU */ + case DT_Source: + buf = readString(&source, buf); + break; + case DT_Date: + buf = readString(&date, buf); + break; + case DT_Headline: + buf = readString(&headline, buf); + break; + case DT_OriginCity: + buf = readString(&originCity, buf); + break; + case DT_StockCodes: + buf = readString(&stockCodes, buf); + break; + case DT_CompanyCodes: + buf = readString(&companyCodes, buf); + break; + case DT_IndustryCodes: + buf = readString(&industryCodes, buf); + break; + default: + freeAny(docID); + s_free(source); + s_free(date); + s_free(headline); + s_free(originCity); + s_free(stockCodes); + s_free(companyCodes); + s_free(industryCodes); + REPORT_READ_ERROR(buf); + break; + } + } + + *header = makeWAISDocumentLongHeader(docID, + versionNumber, + score, + bestMatch, + docLength, + lines, + types, + source, + date, + headline, + originCity, + stockCodes, + companyCodes, + industryCodes); + return (buf); +} + +/*----------------------------------------------------------------------*/ + +WAISSearchResponse * + makeWAISSearchResponse( + char *seedWordsUsed, + WAISDocumentHeader **docHeaders, + WAISDocumentShortHeader **shortHeaders, + WAISDocumentLongHeader **longHeaders, + WAISDocumentText **text, + WAISDocumentHeadlines **headlines, + WAISDocumentCodes **codes, + diagnosticRecord ** diagnostics) +{ + WAISSearchResponse *response = S_MALLOC(WAISSearchResponse); + + response->SeedWordsUsed = seedWordsUsed; + response->DocHeaders = docHeaders; + response->ShortHeaders = shortHeaders; + response->LongHeaders = longHeaders; + response->Text = text; + response->Headlines = headlines; + response->Codes = codes; + response->Diagnostics = diagnostics; + + return (response); +} + +/*----------------------------------------------------------------------*/ + +void freeWAISSearchResponse(WAISSearchResponse * response) +{ + void *ptr = NULL; + long i; + + s_free(response->SeedWordsUsed); + + if (response->DocHeaders != NULL) + for (i = 0, ptr = (void *) response->DocHeaders[i]; + ptr != NULL; + ptr = (void *) response->DocHeaders[++i]) + freeWAISDocumentHeader((WAISDocumentHeader *) ptr); + s_free(response->DocHeaders); + + if (response->ShortHeaders != NULL) + for (i = 0, ptr = (void *) response->ShortHeaders[i]; + ptr != NULL; + ptr = (void *) response->ShortHeaders[++i]) + freeWAISDocumentShortHeader((WAISDocumentShortHeader *) ptr); + s_free(response->ShortHeaders); + + if (response->LongHeaders != NULL) + for (i = 0, ptr = (void *) response->LongHeaders[i]; + ptr != NULL; + ptr = (void *) response->LongHeaders[++i]) + freeWAISDocumentLongHeader((WAISDocumentLongHeader *) ptr); + s_free(response->LongHeaders); + + if (response->Text != NULL) + for (i = 0, ptr = (void *) response->Text[i]; + ptr != NULL; + ptr = (void *) response->Text[++i]) + freeWAISDocumentText((WAISDocumentText *) ptr); + s_free(response->Text); + + if (response->Headlines != NULL) + for (i = 0, ptr = (void *) response->Headlines[i]; + ptr != NULL; + ptr = (void *) response->Headlines[++i]) + freeWAISDocumentHeadlines((WAISDocumentHeadlines *) ptr); + s_free(response->Headlines); + + if (response->Codes != NULL) + for (i = 0, ptr = (void *) response->Codes[i]; + ptr != NULL; + ptr = (void *) response->Codes[++i]) + freeWAISDocumentCodes((WAISDocumentCodes *) ptr); + s_free(response->Codes); + + if (response->Diagnostics != NULL) + for (i = 0, ptr = (void *) response->Diagnostics[i]; + ptr != NULL; + ptr = (void *) response->Diagnostics[++i]) + freeDiag((diagnosticRecord *) ptr); + s_free(response->Diagnostics); + + s_free(response); +} + +/*----------------------------------------------------------------------*/ + +char *writeSearchResponseInfo(SearchResponseAPDU *query, + char *buffer, + long *len) +{ + unsigned long header_len = userInfoTagSize(DT_UserInformationLength, + DefWAISSearchResponseSize); + char *buf = buffer + header_len; + WAISSearchResponse *info = (WAISSearchResponse *) query->DatabaseDiagnosticRecords; + unsigned long size; + void *header = NULL; + long i; + + RESERVE_SPACE_FOR_WAIS_HEADER(len); + + buf = writeString(info->SeedWordsUsed, DT_SeedWordsUsed, buf, len); + + /* write out all the headers */ + if (info->DocHeaders != NULL) { + for (i = 0, header = (void *) info->DocHeaders[i]; + header != NULL; + header = (void *) info->DocHeaders[++i]) + buf = writeWAISDocumentHeader((WAISDocumentHeader *) header, buf, len); + } + + if (info->ShortHeaders != NULL) { + for (i = 0, header = (void *) info->ShortHeaders[i]; + header != NULL; + header = (void *) info->ShortHeaders[++i]) + buf = writeWAISDocumentShortHeader((WAISDocumentShortHeader *) header, + buf, + len); + } + + if (info->LongHeaders != NULL) { + for (i = 0, header = (void *) info->LongHeaders[i]; + header != NULL; + header = (void *) info->LongHeaders[++i]) + buf = writeWAISDocumentLongHeader((WAISDocumentLongHeader *) header, + buf, + len); + } + + if (info->Text != NULL) { + for (i = 0, header = (void *) info->Text[i]; + header != NULL; + header = (void *) info->Text[++i]) + buf = writeWAISDocumentText((WAISDocumentText *) header, buf, len); + } + + if (info->Headlines != NULL) { + for (i = 0, header = (void *) info->Headlines[i]; + header != NULL; + header = (void *) info->Headlines[++i]) + buf = writeWAISDocumentHeadlines((WAISDocumentHeadlines *) header, + buf, + len); + } + + if (info->Codes != NULL) { + for (i = 0, header = (void *) info->Codes[i]; + header != NULL; + header = (void *) info->Codes[++i]) + buf = writeWAISDocumentCodes((WAISDocumentCodes *) header, buf, len); + } + + if (info->Diagnostics != NULL) { + for (i = 0, header = (void *) info->Diagnostics[i]; + header != NULL; + header = (void *) info->Diagnostics[++i]) + buf = writeDiag((diagnosticRecord *) header, buf, len); + } + + /* now write the header and size */ + size = buf - buffer; + buf = writeUserInfoHeader(DT_UserInformationLength, + size, + header_len, + buffer, + len); + + return (buf); +} + +/*----------------------------------------------------------------------*/ + +static void cleanUpWaisSearchResponse(char *buf, + char *seedWordsUsed, + WAISDocumentHeader **docHeaders, + WAISDocumentShortHeader **shortHeaders, + WAISDocumentLongHeader **longHeaders, + WAISDocumentText **text, + WAISDocumentHeadlines **headlines, + WAISDocumentCodes **codes, + diagnosticRecord ** diags) +/* if buf is NULL, we have just gotten a read error, and need to clean up + any state we have built. If not, then everything is going fine, and + we should just hang loose + */ +{ + void *ptr = NULL; + long i; + + if (buf == NULL) { + s_free(seedWordsUsed); + if (docHeaders != NULL) + for (i = 0, ptr = (void *) docHeaders[i]; ptr != NULL; + ptr = (void *) docHeaders[++i]) + freeWAISDocumentHeader((WAISDocumentHeader *) ptr); + s_free(docHeaders); + if (shortHeaders != NULL) + for (i = 0, ptr = (void *) shortHeaders[i]; ptr != NULL; + ptr = (void *) shortHeaders[++i]) + freeWAISDocumentShortHeader((WAISDocumentShortHeader *) ptr); + s_free(shortHeaders); + if (longHeaders != NULL) + for (i = 0, ptr = (void *) longHeaders[i]; ptr != NULL; + ptr = (void *) longHeaders[++i]) + freeWAISDocumentLongHeader((WAISDocumentLongHeader *) ptr); + s_free(longHeaders); + if (text != NULL) + for (i = 0, ptr = (void *) text[i]; + ptr != NULL; + ptr = (void *) text[++i]) + freeWAISDocumentText((WAISDocumentText *) ptr); + s_free(text); + if (headlines != NULL) + for (i = 0, ptr = (void *) headlines[i]; ptr != NULL; + ptr = (void *) headlines[++i]) + freeWAISDocumentHeadlines((WAISDocumentHeadlines *) ptr); + s_free(headlines); + if (codes != NULL) + for (i = 0, ptr = (void *) codes[i]; ptr != NULL; + ptr = (void *) codes[++i]) + freeWAISDocumentCodes((WAISDocumentCodes *) ptr); + s_free(codes); + if (diags != NULL) + for (i = 0, ptr = (void *) diags[i]; ptr != NULL; + ptr = (void *) diags[++i]) + freeDiag((diagnosticRecord *) ptr); + s_free(diags); + } +} + +/*----------------------------------------------------------------------*/ + +char *readSearchResponseInfo(void **info, + char *buffer) +{ + char *buf = buffer; + unsigned long size; + unsigned long headerSize; + data_tag tag1; + void *header = NULL; + WAISDocumentHeader **docHeaders = NULL; + WAISDocumentShortHeader **shortHeaders = NULL; + WAISDocumentLongHeader **longHeaders = NULL; + WAISDocumentText **text = NULL; + WAISDocumentHeadlines **headlines = NULL; + WAISDocumentCodes **codes = NULL; + long numDocHeaders, numLongHeaders, numShortHeaders, numText, numHeadlines; + long numCodes; + char *seedWordsUsed = NULL; + diagnosticRecord **diags = NULL; + diagnosticRecord *diag = NULL; + long numDiags = 0; + + numDocHeaders = + numLongHeaders = + numShortHeaders = + numText = + numHeadlines = + numCodes = 0; + + buf = readUserInfoHeader(&tag1, &size, buf); + headerSize = buf - buffer; + + while (buf < (buffer + size + headerSize)) { + data_tag tag = peekTag(buf); + + switch (tag) { + case DT_SeedWordsUsed: + buf = readString(&seedWordsUsed, buf); + break; + case DT_DatabaseDiagnosticRecords: + if (diags == NULL) /* create a new diag list */ + { + diags = S_MALLOC2(diagnosticRecord *); + } else { /* grow the diag list */ + diags = S_REALLOC2(diagnosticRecord *, diags, numDiags); + } + buf = readDiag(&diag, buf); + diags[numDiags++] = diag; /* put it in the list */ + diags[numDiags] = NULL; + break; + case DT_DocumentHeaderGroup: + if (docHeaders == NULL) /* create a new header list */ + { + docHeaders = S_MALLOC2(WAISDocumentHeader *); + } else { /* grow the doc list */ + docHeaders = S_REALLOC2(WAISDocumentHeader *, docHeaders, numDocHeaders); + } + buf = readWAISDocumentHeader((WAISDocumentHeader **) &header, buf); + cleanUpWaisSearchResponse(buf, + seedWordsUsed, + docHeaders, + shortHeaders, + longHeaders, + text, + headlines, + codes, + diags); + RETURN_ON_NULL(buf); + docHeaders[numDocHeaders++] = + (WAISDocumentHeader *) header; /* put it in the list */ + docHeaders[numDocHeaders] = NULL; + break; + case DT_DocumentShortHeaderGroup: + if (shortHeaders == NULL) /* create a new header list */ + { + shortHeaders = S_MALLOC2(WAISDocumentShortHeader *); + } else { /* grow the doc list */ + shortHeaders = S_REALLOC2(WAISDocumentShortHeader *, + shortHeaders, + numShortHeaders); + } + buf = readWAISDocumentShortHeader((WAISDocumentShortHeader **) &header, + buf); + cleanUpWaisSearchResponse(buf, + seedWordsUsed, + docHeaders, + shortHeaders, + longHeaders, + text, + headlines, + codes, + diags); + RETURN_ON_NULL(buf); + shortHeaders[numShortHeaders++] = + (WAISDocumentShortHeader *) header; /* put it in the list */ + shortHeaders[numShortHeaders] = NULL; + break; + case DT_DocumentLongHeaderGroup: + if (longHeaders == NULL) /* create a new header list */ + { + longHeaders = S_MALLOC2(WAISDocumentLongHeader *); + } else { /* grow the doc list */ + longHeaders = S_REALLOC2(WAISDocumentLongHeader *, + longHeaders, + numLongHeaders); + } + buf = readWAISDocumentLongHeader((WAISDocumentLongHeader **) &header, + buf); + cleanUpWaisSearchResponse(buf, + seedWordsUsed, + docHeaders, + shortHeaders, + longHeaders, + text, + headlines, + codes, + diags); + RETURN_ON_NULL(buf); + longHeaders[numLongHeaders++] = + (WAISDocumentLongHeader *) header; /* put it in the list */ + longHeaders[numLongHeaders] = NULL; + break; + case DT_DocumentTextGroup: + if (text == NULL) /* create a new list */ + { + text = S_MALLOC2(WAISDocumentText *); + } else { /* grow the list */ + text = S_REALLOC2(WAISDocumentText *, text, numText); + } + buf = readWAISDocumentText((WAISDocumentText **) &header, buf); + cleanUpWaisSearchResponse(buf, + seedWordsUsed, + docHeaders, + shortHeaders, + longHeaders, + text, + headlines, + codes, + diags); + RETURN_ON_NULL(buf); + text[numText++] = + (WAISDocumentText *) header; /* put it in the list */ + text[numText] = NULL; + break; + case DT_DocumentHeadlineGroup: + if (headlines == NULL) /* create a new list */ + { + headlines = S_MALLOC2(WAISDocumentHeadlines *); + } else { /* grow the list */ + headlines = S_REALLOC2(WAISDocumentHeadlines *, headlines, numHeadlines); + } + buf = readWAISDocumentHeadlines((WAISDocumentHeadlines **) &header, + buf); + cleanUpWaisSearchResponse(buf, + seedWordsUsed, + docHeaders, + shortHeaders, + longHeaders, + text, + headlines, + codes, + diags); + RETURN_ON_NULL(buf); + headlines[numHeadlines++] = + (WAISDocumentHeadlines *) header; /* put it in the list */ + headlines[numHeadlines] = NULL; + break; + case DT_DocumentCodeGroup: + if (codes == NULL) /* create a new list */ + { + codes = S_MALLOC2(WAISDocumentCodes *); + } else { /* grow the list */ + codes = S_REALLOC2(WAISDocumentCodes *, codes, numCodes); + } + buf = readWAISDocumentCodes((WAISDocumentCodes **) &header, buf); + cleanUpWaisSearchResponse(buf, + seedWordsUsed, + docHeaders, + shortHeaders, + longHeaders, + text, + headlines, + codes, + diags); + RETURN_ON_NULL(buf); + codes[numCodes++] = + (WAISDocumentCodes *) header; /* put it in the list */ + codes[numCodes] = NULL; + break; + default: + cleanUpWaisSearchResponse(buf, + seedWordsUsed, + docHeaders, + shortHeaders, + longHeaders, + text, + headlines, + codes, + diags); + REPORT_READ_ERROR(buf); + break; + } + } + + *info = (void *) makeWAISSearchResponse(seedWordsUsed, + docHeaders, + shortHeaders, + longHeaders, + text, + headlines, + codes, + diags); + + return (buf); +} + +/*----------------------------------------------------------------------*/ + +WAISDocumentText *makeWAISDocumentText(any *docID, + long versionNumber, + any *documentText) +{ + WAISDocumentText *docText = S_MALLOC(WAISDocumentText); + + docText->DocumentID = docID; + docText->VersionNumber = versionNumber; + docText->DocumentText = documentText; + + return (docText); +} + +/*----------------------------------------------------------------------*/ + +void freeWAISDocumentText(WAISDocumentText *docText) +{ + freeAny(docText->DocumentID); + freeAny(docText->DocumentText); + s_free(docText); +} + +/*----------------------------------------------------------------------*/ + +char *writeWAISDocumentText(WAISDocumentText *docText, char *buffer, + long *len) +{ + unsigned long header_len = userInfoTagSize(DT_DocumentTextGroup, + DefWAISDocTextSize); + char *buf = buffer + header_len; + unsigned long size; + + RESERVE_SPACE_FOR_WAIS_HEADER(len); + + buf = writeAny(docText->DocumentID, DT_DocumentID, buf, len); + buf = writeNum(docText->VersionNumber, DT_VersionNumber, buf, len); + buf = writeAny(docText->DocumentText, DT_DocumentText, buf, len); + + /* now write the header and size */ + size = buf - buffer; + buf = writeUserInfoHeader(DT_DocumentTextGroup, size, header_len, buffer, len); + + return (buf); +} + +/*----------------------------------------------------------------------*/ + +char *readWAISDocumentText(WAISDocumentText **docText, char *buffer) +{ + char *buf = buffer; + unsigned long size; + unsigned long headerSize; + data_tag tag1; + any *docID, *documentText; + long versionNumber; + + docID = documentText = NULL; + versionNumber = UNUSED; + + buf = readUserInfoHeader(&tag1, &size, buf); + headerSize = buf - buffer; + + while (buf < (buffer + size + headerSize)) { + data_tag tag = peekTag(buf); + + switch (tag) { + case DT_DocumentID: + buf = readAny(&docID, buf); + break; + case DT_VersionNumber: + buf = readNum(&versionNumber, buf); + break; + case DT_DocumentText: + buf = readAny(&documentText, buf); + break; + default: + freeAny(docID); + freeAny(documentText); + REPORT_READ_ERROR(buf); + break; + } + } + + *docText = makeWAISDocumentText(docID, versionNumber, documentText); + return (buf); +} + +/*----------------------------------------------------------------------*/ + +WAISDocumentHeadlines *makeWAISDocumentHeadlines(any *docID, + long versionNumber, + char *source, + char *date, + char *headline, + char *originCity) +{ + WAISDocumentHeadlines *docHeadline = S_MALLOC(WAISDocumentHeadlines); + + docHeadline->DocumentID = docID; + docHeadline->VersionNumber = versionNumber; + docHeadline->Source = source; + docHeadline->Date = date; + docHeadline->Headline = headline; + docHeadline->OriginCity = originCity; + + return (docHeadline); +} + +/*----------------------------------------------------------------------*/ + +void freeWAISDocumentHeadlines(WAISDocumentHeadlines *docHeadline) +{ + freeAny(docHeadline->DocumentID); + s_free(docHeadline->Source); + s_free(docHeadline->Date); + s_free(docHeadline->Headline); + s_free(docHeadline->OriginCity); + s_free(docHeadline); +} + +/*----------------------------------------------------------------------*/ + +char *writeWAISDocumentHeadlines(WAISDocumentHeadlines *docHeadline, char *buffer, + long *len) +{ + unsigned long header_len = userInfoTagSize(DT_DocumentHeadlineGroup, + DefWAISDocHeadlineSize); + char *buf = buffer + header_len; + unsigned long size; + + RESERVE_SPACE_FOR_WAIS_HEADER(len); + + buf = writeAny(docHeadline->DocumentID, DT_DocumentID, buf, len); + buf = writeNum(docHeadline->VersionNumber, DT_VersionNumber, buf, len); + buf = writeString(docHeadline->Source, DT_Source, buf, len); + buf = writeString(docHeadline->Date, DT_Date, buf, len); + buf = writeString(docHeadline->Headline, DT_Headline, buf, len); + buf = writeString(docHeadline->OriginCity, DT_OriginCity, buf, len); + + /* now write the header and size */ + size = buf - buffer; + buf = writeUserInfoHeader(DT_DocumentHeadlineGroup, + size, + header_len, + buffer, + len); + + return (buf); +} + +/*----------------------------------------------------------------------*/ + +char *readWAISDocumentHeadlines(WAISDocumentHeadlines **docHeadline, char *buffer) +{ + char *buf = buffer; + unsigned long size; + unsigned long headerSize; + data_tag tag1; + any *docID; + long versionNumber; + char *source, *date, *headline, *originCity; + + docID = NULL; + versionNumber = UNUSED; + source = date = headline = originCity = NULL; + + buf = readUserInfoHeader(&tag1, &size, buf); + headerSize = buf - buffer; + + while (buf < (buffer + size + headerSize)) { + data_tag tag = peekTag(buf); + + switch (tag) { + case DT_DocumentID: + buf = readAny(&docID, buf); + break; + case DT_VersionNumber: + buf = readNum(&versionNumber, buf); + break; + case DT_Source: + buf = readString(&source, buf); + break; + case DT_Date: + buf = readString(&date, buf); + break; + case DT_Headline: + buf = readString(&headline, buf); + break; + case DT_OriginCity: + buf = readString(&originCity, buf); + break; + default: + freeAny(docID); + s_free(source); + s_free(date); + s_free(headline); + s_free(originCity); + REPORT_READ_ERROR(buf); + break; + } + } + + *docHeadline = makeWAISDocumentHeadlines(docID, versionNumber, source, date, + headline, originCity); + return (buf); +} + +/*----------------------------------------------------------------------*/ + +WAISDocumentCodes *makeWAISDocumentCodes(any *docID, + long versionNumber, + char *stockCodes, + char *companyCodes, + char *industryCodes) +{ + WAISDocumentCodes *docCodes = S_MALLOC(WAISDocumentCodes); + + docCodes->DocumentID = docID; + docCodes->VersionNumber = versionNumber; + docCodes->StockCodes = stockCodes; + docCodes->CompanyCodes = companyCodes; + docCodes->IndustryCodes = industryCodes; + + return (docCodes); +} + +/*----------------------------------------------------------------------*/ + +void freeWAISDocumentCodes(WAISDocumentCodes *docCodes) +{ + freeAny(docCodes->DocumentID); + s_free(docCodes->StockCodes); + s_free(docCodes->CompanyCodes); + s_free(docCodes->IndustryCodes); + s_free(docCodes); +} + +/*----------------------------------------------------------------------*/ + +char *writeWAISDocumentCodes(WAISDocumentCodes *docCodes, + char *buffer, + long *len) +{ + unsigned long header_len = userInfoTagSize(DT_DocumentCodeGroup, + DefWAISDocCodeSize); + char *buf = buffer + header_len; + unsigned long size; + + RESERVE_SPACE_FOR_WAIS_HEADER(len); + + buf = writeAny(docCodes->DocumentID, DT_DocumentID, buf, len); + buf = writeNum(docCodes->VersionNumber, DT_VersionNumber, buf, len); + buf = writeString(docCodes->StockCodes, DT_StockCodes, buf, len); + buf = writeString(docCodes->CompanyCodes, DT_CompanyCodes, buf, len); + buf = writeString(docCodes->IndustryCodes, DT_IndustryCodes, buf, len); + + /* now write the header and size */ + size = buf - buffer; + buf = writeUserInfoHeader(DT_DocumentCodeGroup, size, header_len, buffer, len); + + return (buf); +} + +/*----------------------------------------------------------------------*/ + +char *readWAISDocumentCodes(WAISDocumentCodes **docCodes, + char *buffer) +{ + char *buf = buffer; + unsigned long size; + unsigned long headerSize; + data_tag tag1; + any *docID; + long versionNumber; + char *stockCodes, *companyCodes, *industryCodes; + + docID = NULL; + versionNumber = UNUSED; + stockCodes = companyCodes = industryCodes = NULL; + + buf = readUserInfoHeader(&tag1, &size, buf); + headerSize = buf - buffer; + + while (buf < (buffer + size + headerSize)) { + data_tag tag = peekTag(buf); + + switch (tag) { + case DT_DocumentID: + buf = readAny(&docID, buf); + break; + case DT_VersionNumber: + buf = readNum(&versionNumber, buf); + break; + case DT_StockCodes: + buf = readString(&stockCodes, buf); + break; + case DT_CompanyCodes: + buf = readString(&companyCodes, buf); + break; + case DT_IndustryCodes: + buf = readString(&industryCodes, buf); + break; + default: + freeAny(docID); + s_free(stockCodes); + s_free(companyCodes); + s_free(industryCodes); + REPORT_READ_ERROR(buf); + break; + } + } + + *docCodes = makeWAISDocumentCodes(docID, versionNumber, stockCodes, + companyCodes, industryCodes); + return (buf); +} + +/*----------------------------------------------------------------------*/ + +char *writePresentInfo(PresentAPDU * present GCC_UNUSED, char *buffer, + long *len GCC_UNUSED) +{ + /* The WAIS protocol doesn't use present info */ + return (buffer); +} + +/*----------------------------------------------------------------------*/ + +char *readPresentInfo(void **info, + char *buffer) +{ + /* The WAIS protocol doesn't use present info */ + *info = NULL; + return (buffer); +} + +/*----------------------------------------------------------------------*/ + +char *writePresentResponseInfo(PresentResponseAPDU * response GCC_UNUSED, char *buffer, + long *len GCC_UNUSED) +{ + /* The WAIS protocol doesn't use presentResponse info */ + return (buffer); +} + +/*----------------------------------------------------------------------*/ + +char *readPresentResponseInfo(void **info, + char *buffer) +{ + /* The WAIS protocol doesn't use presentResponse info */ + *info = NULL; + return (buffer); +} + +/*----------------------------------------------------------------------*/ + +/* support for type 1 queries */ + +/* new use values (for the chunk types) */ +#define BYTE "wb" +#define LINE "wl" +#define PARAGRAPH "wp" +#define DATA_TYPE "wt" + +/* WAIS supports the following semantics for type 1 queries: + + 1. retrieve the header/codes from a document: + + System_Control_Number = docID + Data Type = type (optional) + And + + 2. retrieve a fragment of the text of a document: + + System_Control_Number = docID + Data Type = type (optional) + And + Chunk >= start + And + Chunk < end + And + + Information from multiple documents may be requested by using + groups of the above joined by: + + OR + + ( XXX does an OR come after every group but the first, or do they + all come at the end? ) + + ( XXX return type could be in the element set) +*/ + +static query_term **makeWAISQueryTerms(DocObj **docs) +/* given a null terminated list of docObjs, construct the appropriate + query of the form given above + */ +{ + query_term **terms = NULL; + long numTerms = 0; + DocObj *doc = NULL; + long i; + + if (docs == NULL) + return ((query_term **) NULL); + + terms = (query_term **) s_malloc((size_t) (sizeof(query_term *) * 1)); + + terms[numTerms] = NULL; + + /* loop through the docs making terms for them all */ + for (i = 0, doc = docs[i]; doc != NULL; doc = docs[++i]) { + any *type = NULL; + + if (doc->Type != NULL) + type = stringToAny(doc->Type); + + if (doc->ChunkCode == CT_document) /* a whole document */ + { + terms = S_REALLOC2(query_term *, terms, numTerms + 2); + + terms[numTerms++] = makeAttributeTerm(SYSTEM_CONTROL_NUMBER, + EQUAL, IGNORE, IGNORE, + IGNORE, IGNORE, doc->DocumentID); + if (type != NULL) { + terms[numTerms++] = makeAttributeTerm(DATA_TYPE, EQUAL, + IGNORE, IGNORE, IGNORE, + IGNORE, type); + terms[numTerms++] = makeOperatorTerm(AND); + } + terms[numTerms] = NULL; + } else { /* a document fragment */ + char chunk_att[ATTRIBUTE_SIZE]; + any *startChunk = NULL; + any *endChunk = NULL; + + terms = S_REALLOC2(query_term *, terms, numTerms + 6); + + switch (doc->ChunkCode) { + case CT_byte: + case CT_line: + { + char start[20], end[20]; + + (doc->ChunkCode == CT_byte) ? + StrNCpy(chunk_att, BYTE, ATTRIBUTE_SIZE) : + StrNCpy(chunk_att, LINE, ATTRIBUTE_SIZE); + sprintf(start, "%ld", doc->ChunkStart.Pos); + startChunk = stringToAny(start); + sprintf(end, "%ld", doc->ChunkEnd.Pos); + endChunk = stringToAny(end); + } + break; + case CT_paragraph: + StrNCpy(chunk_att, PARAGRAPH, ATTRIBUTE_SIZE); + startChunk = doc->ChunkStart.ID; + endChunk = doc->ChunkEnd.ID; + break; + default: + /* error */ + break; + } + + terms[numTerms++] = makeAttributeTerm(SYSTEM_CONTROL_NUMBER, + EQUAL, IGNORE, IGNORE, + IGNORE, + IGNORE, doc->DocumentID); + if (type != NULL) { + terms[numTerms++] = makeAttributeTerm(DATA_TYPE, EQUAL, IGNORE, + IGNORE, IGNORE, IGNORE, + type); + terms[numTerms++] = makeOperatorTerm(AND); + } + terms[numTerms++] = makeAttributeTerm(chunk_att, + GREATER_THAN_OR_EQUAL, + IGNORE, IGNORE, IGNORE, + IGNORE, + startChunk); + terms[numTerms++] = makeOperatorTerm(AND); + terms[numTerms++] = makeAttributeTerm(chunk_att, LESS_THAN, + IGNORE, IGNORE, IGNORE, + IGNORE, + endChunk); + terms[numTerms++] = makeOperatorTerm(AND); + terms[numTerms] = NULL; + + if (doc->ChunkCode == CT_byte || doc->ChunkCode == CT_line) { + freeAny(startChunk); + freeAny(endChunk); + } + } + + freeAny(type); + + if (i != 0) /* multiple independent queries, need a disjunction */ + { + terms = S_REALLOC2(query_term *, terms, numTerms); + + terms[numTerms++] = makeOperatorTerm(OR); + terms[numTerms] = NULL; + } + } + + return (terms); +} + +/*----------------------------------------------------------------------*/ + +static DocObj **makeWAISQueryDocs(query_term **terms) +/* given a list of terms in the form given above, convert them to + DocObjs. + */ +{ + query_term *docTerm = NULL; + query_term *fragmentTerm = NULL; + DocObj **docs = NULL; + DocObj *doc = NULL; + long docNum, termNum; + + docNum = termNum = 0; + + docs = S_MALLOC(DocObj *); + + docs[docNum] = NULL; + + /* translate the terms into DocObjs */ + while (true) { + query_term *typeTerm = NULL; + char *type = NULL; + long startTermOffset; + + docTerm = terms[termNum]; + + if (docTerm == NULL) + break; /* we're done converting */ + + typeTerm = terms[termNum + 1]; /* get the lead Term if it exists */ + + if (strcmp(typeTerm->Use, DATA_TYPE) == 0) /* we do have a type */ + { + startTermOffset = 3; + type = anyToString(typeTerm->Term); + } else { /* no type */ + startTermOffset = 1; + typeTerm = NULL; + type = NULL; + } + + /* grow the doc list */ + docs = S_REALLOC2(DocObj *, docs, docNum); + + /* figure out what kind of docObj to build - and build it */ + fragmentTerm = terms[termNum + startTermOffset]; + if (fragmentTerm != NULL && fragmentTerm->TermType == TT_Attribute) { /* build a document fragment */ + query_term *startTerm = fragmentTerm; + query_term *endTerm = terms[termNum + startTermOffset + 2]; + + if (strcmp(startTerm->Use, BYTE) == 0) { /* a byte chunk */ + doc = makeDocObjUsingBytes(duplicateAny(docTerm->Term), + type, + anyToLong(startTerm->Term), + anyToLong(endTerm->Term)); + log_write("byte"); + } else if (strcmp(startTerm->Use, LINE) == 0) { /* a line chunk */ + doc = makeDocObjUsingLines(duplicateAny(docTerm->Term), + type, + anyToLong(startTerm->Term), + anyToLong(endTerm->Term)); + log_write("line"); + } else { + log_write("chunk"); /* a paragraph chunk */ + doc = makeDocObjUsingParagraphs(duplicateAny(docTerm->Term), + type, + duplicateAny(startTerm->Term), + duplicateAny(endTerm->Term)); + } + termNum += (startTermOffset + 4); /* point to next term */ + } else { /* build a full document */ + doc = makeDocObjUsingWholeDocument(duplicateAny(docTerm->Term), + type); + log_write("whole doc"); + termNum += startTermOffset; /* point to next term */ + } + + docs[docNum++] = doc; /* insert the new document */ + + docs[docNum] = NULL; /* keep the doc list terminated */ + + if (terms[termNum] != NULL) + termNum++; /* skip the OR operator it necessary */ + else + break; /* we are done */ + } + + return (docs); +} + +/*----------------------------------------------------------------------*/ + +any *makeWAISTextQuery(DocObj **docs) +/* given a list of DocObjs, return an any whose contents is the corresponding + type 1 query + */ +{ + any *buf = NULL; + query_term **terms = NULL; + + terms = makeWAISQueryTerms(docs); + buf = writeQuery(terms); + + doList((void **) terms, freeTerm); + s_free(terms); + + return (buf); +} + +/*----------------------------------------------------------------------*/ + +DocObj **readWAISTextQuery(any *buf) +/* given an any whose contents are type 1 queries of the WAIS sort, + construct a list of the corresponding DocObjs + */ +{ + query_term **terms = NULL; + DocObj **docs = NULL; + + terms = readQuery(buf); + docs = makeWAISQueryDocs(terms); + + doList((void **) terms, freeTerm); + s_free(terms); + + return (docs); +} + +/*----------------------------------------------------------------------*/ +/* Customized free WAIS object routines: */ +/* */ +/* This set of procedures is for applications to free a WAIS object */ +/* which was made with makeWAISFOO. */ +/* Each procedure frees only the memory that was allocated in its */ +/* associated makeWAISFOO routine, thus it's not necessary for the */ +/* caller to assign nulls to the pointer fields of the WAIS object. */ +/*----------------------------------------------------------------------*/ + +void CSTFreeWAISInitResponse(WAISInitResponse *init) +/* free an object made with makeWAISInitResponse */ +{ + s_free(init); +} + +/*----------------------------------------------------------------------*/ + +void CSTFreeWAISSearch(WAISSearch *query) +/* destroy an object made with makeWAISSearch() */ +{ + s_free(query); +} + +/*----------------------------------------------------------------------*/ + +void CSTFreeDocObj(DocObj *doc) +/* free a docObj */ +{ + s_free(doc); +} + +/*----------------------------------------------------------------------*/ + +void CSTFreeWAISDocumentHeader(WAISDocumentHeader *header) +{ + s_free(header); +} + +/*----------------------------------------------------------------------*/ + +void CSTFreeWAISDocumentShortHeader(WAISDocumentShortHeader *header) +{ + s_free(header); +} + +/*----------------------------------------------------------------------*/ + +void CSTFreeWAISDocumentLongHeader(WAISDocumentLongHeader *header) +{ + s_free(header); +} + +/*----------------------------------------------------------------------*/ + +void CSTFreeWAISSearchResponse(WAISSearchResponse * response) +{ + s_free(response); +} + +/*----------------------------------------------------------------------*/ + +void CSTFreeWAISDocumentText(WAISDocumentText *docText) +{ + s_free(docText); +} + +/*----------------------------------------------------------------------*/ + +void CSTFreeWAISDocHeadlines(WAISDocumentHeadlines *docHeadline) +{ + s_free(docHeadline); +} + +/*----------------------------------------------------------------------*/ + +void CSTFreeWAISDocumentCodes(WAISDocumentCodes *docCodes) +{ + s_free(docCodes); +} + +/*----------------------------------------------------------------------*/ + +void CSTFreeWAISTextQuery(any *query) +{ + freeAny(query); +} + +/*----------------------------------------------------------------------*/ + +/* + * Routines originally from WMessage.c -- FM + * + *----------------------------------------------------------------------*/ +/* WIDE AREA INFORMATION SERVER SOFTWARE + * No guarantees or restrictions. See the readme file for the full standard + * disclaimer. + * 3.26.90 + */ + +/* This file is for reading and writing the wais packet header. + * Morris@think.com + */ + +/* to do: + * add check sum + * what do you do when checksum is wrong? + */ + +/*---------------------------------------------------------------------*/ + +void readWAISPacketHeader(char *msgBuffer, + WAISMessage * header_struct) +{ + /* msgBuffer is a string containing at least HEADER_LENGTH bytes. */ + + memmove(header_struct->msg_len, msgBuffer, (size_t) 10); + header_struct->msg_type = char_downcase((unsigned long) msgBuffer[10]); + header_struct->hdr_vers = char_downcase((unsigned long) msgBuffer[11]); + memmove(header_struct->server, (void *) (msgBuffer + 12), (size_t) 10); + header_struct->compression = char_downcase((unsigned long) msgBuffer[22]); + header_struct->encoding = char_downcase((unsigned long) msgBuffer[23]); + header_struct->msg_checksum = char_downcase((unsigned long) msgBuffer[24]); +} + +/*---------------------------------------------------------------------*/ + +/* this modifies the header argument. See wais-message.h for the different + * options for the arguments. + */ + +void writeWAISPacketHeader(char *header, + long dataLen, + long type, + char *server, + long compression, + long encoding, + long version) +/* Puts together the new wais before-the-z39-packet header. */ +{ + char lengthBuf[11]; + char serverBuf[11]; + + long serverLen = strlen(server); + + if (serverLen > 10) + serverLen = 10; + + sprintf(lengthBuf, "%010ld", dataLen); + StrNCpy(header, lengthBuf, 10); + + header[10] = type & 0xFF; + header[11] = version & 0xFF; + + StrNCpy(serverBuf, server, serverLen); + StrNCpy((char *) (header + 12), serverBuf, serverLen); + + header[22] = compression & 0xFF; + header[23] = encoding & 0xFF; + header[24] = '0'; /* checkSum(header + HEADER_LENGTH,dataLen); XXX the result must be ascii */ +} + +/*---------------------------------------------------------------------*/ diff --git a/WWW/Library/Implementation/HTVMS_WaisProt.h b/WWW/Library/Implementation/HTVMS_WaisProt.h new file mode 100644 index 00000000..457d3cbb --- /dev/null +++ b/WWW/Library/Implementation/HTVMS_WaisProt.h @@ -0,0 +1,425 @@ +/* HTVMS_WAISProt.h + * + * Adaptation for Lynx by F.Macrides (macrides@sci.wfeb.edu) + * + * 31-May-1994 FM Initial version. + * + *----------------------------------------------------------------------*/ + +/* + * Routines originally from WProt.h -- FM + * + *----------------------------------------------------------------------*/ +/* WIDE AREA INFORMATION SERVER SOFTWARE: + * No guarantees or restrictions. See the readme file for the full standard + * disclaimer. + * + * 3.26.90 Harry Morris, morris@think.com + * 3.30.90 Harry Morris + * - removed chunk code from WAISSearchAPDU, + * - added makeWAISQueryType1Query() and readWAISType1Query() which + * replace makeWAISQueryTerms() and makeWAISQueryDocs(). + * 4.11.90 HWM - added definitions of wais element set names + * 4.14.90 HWM - changed symbol for relevance feedback query from QT_3 to + * QT_RelevanceFeedbackQuery added QT_TextRetrievalQuery as a + * synonym for QT_BooleanQuery + * - renamed makeWAISType1Query() to makeWAISTextQuery() + * renamed readWAISType1Query() to readWAISTextQuery() + * 5.29.90 TS - added CSTFreeWAISFoo functions + */ + +#ifndef _H_WAIS_protocol_ +#define _H_WAIS_protocol_ + +#ifndef HTUTILS_H +#include <HTUtils.h> +#endif + +#include <HTVMS_WaisUI.h> + +/*----------------------------------------------------------------------*/ +/* Data types / constants */ + +/* date factor constants */ +#define DF_INDEPENDENT 1 +#define DF_LATER 2 +#define DF_EARLIER 3 +#define DF_SPECIFIED_RANGE 4 + +/* chunk types */ +#define CT_document 0 +#define CT_byte 1 +#define CT_line 2 +#define CT_paragraph 3 + +/* relevance feedback query */ +#define QT_RelevanceFeedbackQuery "3" +#define QT_TextRetrievalQuery QT_BooleanQuery + +/* new data tags */ +#define DT_UserInformationLength (data_tag)99 +#define DT_ChunkCode (data_tag)100 +#define DT_ChunkIDLength (data_tag)101 +#define DT_ChunkMarker (data_tag)102 +#define DT_HighlightMarker (data_tag)103 +#define DT_DeHighlightMarker (data_tag)104 +#define DT_NewlineCharacters (data_tag)105 +#define DT_SeedWords (data_tag)106 +#define DT_DocumentIDChunk (data_tag)107 +#define DT_ChunkStartID (data_tag)108 +#define DT_ChunkEndID (data_tag)109 +#define DT_TextList (data_tag)110 +#define DT_DateFactor (data_tag)111 +#define DT_BeginDateRange (data_tag)112 +#define DT_EndDateRange (data_tag)113 +#define DT_MaxDocumentsRetrieved (data_tag)114 +#define DT_SeedWordsUsed (data_tag)115 +#define DT_DocumentID (data_tag)116 +#define DT_VersionNumber (data_tag)117 +#define DT_Score (data_tag)118 +#define DT_BestMatch (data_tag)119 +#define DT_DocumentLength (data_tag)120 +#define DT_Source (data_tag)121 +#define DT_Date (data_tag)122 +#define DT_Headline (data_tag)123 +#define DT_OriginCity (data_tag)124 +#define DT_PresentStartByte (data_tag)125 +#define DT_TextLength (data_tag)126 +#define DT_DocumentText (data_tag)127 +#define DT_StockCodes (data_tag)128 +#define DT_CompanyCodes (data_tag)129 +#define DT_IndustryCodes (data_tag)130 + +/* added by harry */ +#define DT_DocumentHeaderGroup (data_tag)150 +#define DT_DocumentShortHeaderGroup (data_tag)151 +#define DT_DocumentLongHeaderGroup (data_tag)152 +#define DT_DocumentTextGroup (data_tag)153 +#define DT_DocumentHeadlineGroup (data_tag)154 +#define DT_DocumentCodeGroup (data_tag)155 +#define DT_Lines (data_tag)131 +#define DT_TYPE_BLOCK (data_tag)132 +#define DT_TYPE (data_tag)133 + +/* wais element sets */ +#define ES_DocumentHeader "Document Header" +#define ES_DocumentShortHeader "Document Short Header" +#define ES_DocumentLongHeader "Document Long Header" +#define ES_DocumentText "Document Text" +#define ES_DocumentHeadline "Document Headline" +#define ES_DocumentCodes "Document Codes" + +typedef struct DocObj { /* specifies a section of a document */ + any *DocumentID; + char *Type; + long ChunkCode; + union { + long Pos; + any *ID; + } ChunkStart; + union { + long Pos; + any *ID; + } ChunkEnd; +} DocObj; + +/*----------------------------------------------------------------------*/ +/* WAIS APDU extensions */ + +typedef struct WAISInitResponse { + long ChunkCode; + long ChunkIDLength; + char *ChunkMarker; + char *HighlightMarker; + char *DeHighlightMarker; + char *NewlineCharacters; + /* XXX need to add UpdateFrequency and Update Time */ +} WAISInitResponse; + +typedef struct WAISSearch { + char *SeedWords; + DocObj **Docs; + char **TextList; + long DateFactor; + char *BeginDateRange; + char *EndDateRange; + long MaxDocumentsRetrieved; +} WAISSearch; + +typedef struct WAISDocumentHeader { + any *DocumentID; + long VersionNumber; + long Score; + long BestMatch; + long DocumentLength; + long Lines; + char **Types; + char *Source; + char *Date; + char *Headline; + char *OriginCity; +} WAISDocumentHeader; + +typedef struct WAISDocumentShortHeader { + any *DocumentID; + long VersionNumber; + long Score; + long BestMatch; + long DocumentLength; + long Lines; +} WAISDocumentShortHeader; + +typedef struct WAISDocumentLongHeader { + any *DocumentID; + long VersionNumber; + long Score; + long BestMatch; + long DocumentLength; + long Lines; + char **Types; + char *Source; + char *Date; + char *Headline; + char *OriginCity; + char *StockCodes; + char *CompanyCodes; + char *IndustryCodes; +} WAISDocumentLongHeader; + +typedef struct WAISDocumentText { + any *DocumentID; + long VersionNumber; + any *DocumentText; +} WAISDocumentText; + +typedef struct WAISDocumentHeadlines { + any *DocumentID; + long VersionNumber; + char *Source; + char *Date; + char *Headline; + char *OriginCity; +} WAISDocumentHeadlines; + +typedef struct WAISDocumentCodes { + any *DocumentID; + long VersionNumber; + char *StockCodes; + char *CompanyCodes; + char *IndustryCodes; +} WAISDocumentCodes; + +typedef struct WAISSearchResponse { + char *SeedWordsUsed; + WAISDocumentHeader **DocHeaders; + WAISDocumentShortHeader **ShortHeaders; + WAISDocumentLongHeader **LongHeaders; + WAISDocumentText **Text; + WAISDocumentHeadlines **Headlines; + WAISDocumentCodes **Codes; + diagnosticRecord **Diagnostics; +} WAISSearchResponse; + +/*----------------------------------------------------------------------*/ +/* Functions */ + +char *generate_search_apdu(char *buff, /* buffer to hold the apdu */ + long *buff_len, /* number of bytes written to the buffer */ + char *seed_words, /* string of the seed words */ + char *database_name, + DocObj **docobjs, + long maxDocsRetrieved); + +DocObj *makeDocObjUsingWholeDocument(any *aDocID, char *type); +DocObj *makeDocObjUsingBytes(any *aDocID, char *type, long start, long end); +DocObj *makeDocObjUsingLines(any *aDocID, char *type, long start, long end); +DocObj *makeDocObjUsingParagraphs(any *aDocID, char *type, any *start, any *end); +void freeDocObj(DocObj *doc); + +WAISInitResponse *makeWAISInitResponse(long chunkCode, long chunkIDLen, + char *chunkMarker, char *highlightMarker, + char *deHighlightMarker, char *newLineChars); +void freeWAISInitResponse(WAISInitResponse *init); + +WAISSearch *makeWAISSearch(char *seedWords, + DocObj **docs, + char **textList, + long dateFactor, + char *beginDateRange, + char *endDateRange, + long maxDocsRetrieved); +void freeWAISSearch(WAISSearch *query); + +WAISDocumentHeader *makeWAISDocumentHeader(any *aDocID, + long versionNumber, + long score, + long bestMatch, + long docLen, + long lines, + char **types, + char *source, + char *date, + char *headline, + char *originCity); +void freeWAISDocumentHeader(WAISDocumentHeader *header); +char *writeWAISDocumentHeader(WAISDocumentHeader *header, char *buffer, long *len); +char *readWAISDocumentHeader(WAISDocumentHeader **header, char *buffer); + +WAISDocumentShortHeader *makeWAISDocumentShortHeader(any *aDocID, + long versionNumber, + long score, + long bestMatch, + long docLen, + long lines); +void freeWAISDocumentShortHeader(WAISDocumentShortHeader *header); +char *writeWAISDocumentShortHeader(WAISDocumentShortHeader *header, + char *buffer, long *len); +char *readWAISDocumentShortHeader(WAISDocumentShortHeader **header, char *buffer); + +WAISDocumentLongHeader *makeWAISDocumentLongHeader(any *aDocID, + long versionNumber, + long score, + long bestMatch, + long docLen, + long lines, + char **types, + char *source, + char *date, char *headline, + char *originCity, + char *stockCodes, + char *companyCodes, + char *industryCodes); +void freeWAISDocumentLongHeader(WAISDocumentLongHeader *header); +char *writeWAISDocumentLongHeader(WAISDocumentLongHeader *header, + char *buffer, + long *len); +char *readWAISDocumentLongHeader(WAISDocumentLongHeader **header, char *buffer); + +WAISSearchResponse *makeWAISSearchResponse(char *seedWordsUsed, + WAISDocumentHeader **docHeaders, + WAISDocumentShortHeader **shortHeaders, + WAISDocumentLongHeader **longHeaders, + WAISDocumentText **text, WAISDocumentHeadlines **headlines, + WAISDocumentCodes **codes, + diagnosticRecord ** diagnostics); +void freeWAISSearchResponse(WAISSearchResponse * response); + +WAISDocumentText *makeWAISDocumentText(any *aDocID, long versionNumber, + any *documentText); +void freeWAISDocumentText(WAISDocumentText *docText); +char *writeWAISDocumentText(WAISDocumentText *docText, char *buffer, long *len); +char *readWAISDocumentText(WAISDocumentText **docText, char *buffer); + +WAISDocumentHeadlines *makeWAISDocumentHeadlines(any *aDocID, + long versionNumber, + char *source, + char *date, + char *headline, + char *originCity); +void freeWAISDocumentHeadlines(WAISDocumentHeadlines *docHeadline); +char *writeWAISDocumentHeadlines(WAISDocumentHeadlines *docHeadline, + char *buffer, + long *len); +char *readWAISDocumentHeadlines(WAISDocumentHeadlines **docHeadline, char *buffer); + +WAISDocumentCodes *makeWAISDocumentCodes(any *aDocID, + long versionNumber, + char *stockCodes, + char *companyCodes, + char *industryCodes); +void freeWAISDocumentCodes(WAISDocumentCodes *docCodes); +char *writeWAISDocumentCodes(WAISDocumentCodes *docCodes, char *buffer, long *len); +char *readWAISDocumentCodes(WAISDocumentCodes **docCodes, char *buffer); + +any *makeWAISTextQuery(DocObj **docs); +DocObj **readWAISTextQuery(any *terms); + +void CSTFreeWAISInitResponse(WAISInitResponse *init); +void CSTFreeWAISSearch(WAISSearch *query); +void CSTFreeDocObj(DocObj *doc); +void CSTFreeWAISDocumentHeader(WAISDocumentHeader *header); +void CSTFreeWAISDocumentShortHeader(WAISDocumentShortHeader *header); +void CSTFreeWAISDocumentLongHeader(WAISDocumentLongHeader *header); +void CSTFreeWAISSearchResponse(WAISSearchResponse * response); +void CSTFreeWAISDocumentText(WAISDocumentText *docText); +void CSTFreeWAISDocHeadlines(WAISDocumentHeadlines *docHeadline); +void CSTFreeWAISDocumentCodes(WAISDocumentCodes *docCodes); +void CSTFreeWAISTextQuery(any *query); + +/*----------------------------------------------------------------------*/ + +#endif /* ndef _H_WAIS_protocol_ */ + +/* + * Routines originally from WMessage.h -- FM + * + *----------------------------------------------------------------------*/ +/* WIDE AREA INFORMATION SERVER SOFTWARE + * No guarantees or restrictions. See the readme file for the full standard + * disclaimer. + * 3.26.90 + */ + +/* wais-message.h + * + * This is the header outside of WAIS Z39.50 messages. The header will be + * printable ascii, so as to be transportable. This header will precede each + * Z39.50 APDU, or zero-length message if it is an ACK or NACK. Be sure to + * change hdr_vers current value if you change the structure of the header. + * + * The characters in the header are case insensitive so that the systems from + * the past that only handle one case can at least read the header. + * + * 7.5.90 HWM - added constants + * 7/5/90 brewster added funtion prototypes and comments + * 11/30/90 HWM - went to version 2 (inits and typed retrieval) + */ + +#ifndef WMESSAGE_H +#define WMESSAGE_H + +#ifndef HTUTILS_H +#include <HTUtils.h> +#endif + +#include <HTVMS_WaisUI.h> + +typedef struct wais_header { + char msg_len[10]; /* length in bytes of following message */ + char msg_type; /* type of message: 'z'=Z39.50 APDU, + 'a'=ACK, 'n'=NACK */ + char hdr_vers; /* version of this header, currently = '2' */ + char server[10]; /* name or address of server */ + char compression; /* <sp>=no compression, 'u'=unix compress */ + char encoding; /* <sp>=no encoding, 'h'=hexize, + 'u'=uuencode */ + char msg_checksum; /* XOR of every byte of message */ +} WAISMessage; + +#define HEADER_LENGTH 25 /* number of bytes needed to write a + wais-header (not sizeof(wais_header)) */ + +#define HEADER_VERSION (long)'2' + +/* message type */ +#define Z3950 'z' +#define ACK 'a' +#define NAK 'n' + +/* compression */ +#define NO_COMPRESSION ' ' +#define UNIX_COMPRESSION 'u' + +/* encoding */ +#define NO_ENCODING ' ' +#define HEX_ENCODING 'h' /* Swartz 4/3 encoding */ +#define IBM_HEXCODING 'i' /* same as h but uses characters acceptable for IBM mainframes */ +#define UUENCODE 'u' + +void readWAISPacketHeader(char *msgBuffer, WAISMessage * header_struct); +long getWAISPacketLength(WAISMessage * header); +void writeWAISPacketHeader(char *header, long dataLen, long type, + char *server, long compression, + long encoding, long version); + +#endif /* ndef WMESSAGE_H */ diff --git a/WWW/Library/Implementation/HTVMS_WaisUI.c b/WWW/Library/Implementation/HTVMS_WaisUI.c new file mode 100644 index 00000000..d8f73019 --- /dev/null +++ b/WWW/Library/Implementation/HTVMS_WaisUI.c @@ -0,0 +1,2280 @@ +/* + * $LynxId: HTVMS_WaisUI.c,v 1.17 2010/10/29 21:10:14 tom Exp $ + * HTVMS_WAISUI.c + * + * Adaptation for Lynx by F.Macrides (macrides@sci.wfeb.edu) + * + * 30-May-1994 FM Initial version. + * + *----------------------------------------------------------------------*/ + +/* + * Routines originally from UI.c -- FM + * + *----------------------------------------------------------------------*/ +/* WIDE AREA INFORMATION SERVER SOFTWARE: + * No guarantees or restrictions. See the readme file for the full standard + * disclaimer. + * + * Brewster@think.com + */ + +/* + * this is a simple ui toolkit for building other ui's on top. + * -brewster + * + * top level functions: + * generate_search_apdu + * generate_retrieval_apdu + * interpret_message + * + */ + +/* to do: + * generate multiple queries for long documents. + * this will crash if the file being retrieved is larger than 100k. + * do log_write() + * + */ + +#include <HTUtils.h> + +#ifdef VMS +#include <HTVMS_WaisUI.h> +#include <HTVMS_WaisProt.h> +#include <HTTCP.h> + +#undef MAXINT /* we don't need it here, and www_tcp.h may conflict */ +#include <math.h> + +#include <LYexit.h> +#include <LYLeaks.h> + +void log_write(char *s GCC_UNUSED) +{ + return; +} + +/*----------------------------------------------------------------------*/ + +/* returns a pointer in the buffer of the first free byte. + if it overflows, then NULL is returned + */ +char *generate_search_apdu(char *buff, /* buffer to hold the apdu */ + long *buff_len, /* length of the buffer changed to reflect new data written */ + char *seed_words, /* string of the seed words */ + char *database_name, + DocObj **docobjs, + long maxDocsRetrieved) +{ + /* local variables */ + + SearchAPDU *search3; + char *end_ptr; + static char *database_names[2] = + {"", 0}; + any refID; + WAISSearch *query; + + refID.size = 1; + refID.bytes = "3"; + + database_names[0] = database_name; + query = makeWAISSearch(seed_words, + docobjs, /* DocObjsPtr */ + 0, + 1, /* DateFactor */ + 0, /* BeginDateRange */ + 0, /* EndDateRange */ + maxDocsRetrieved + ); + + search3 = makeSearchAPDU(30, + 5000, /* should be large */ + 30, + 1, /* replace indicator */ + "", /* result set name */ + database_names, /* database name */ + QT_RelevanceFeedbackQuery, /* query_type */ + 0, /* element name */ + NULL, /* reference ID */ + query); + + end_ptr = writeSearchAPDU(search3, buff, buff_len); + + CSTFreeWAISSearch(query); + freeSearchAPDU(search3); + return (end_ptr); +} + +/*----------------------------------------------------------------------*/ + +/* returns a pointer into the buffer of the next free byte. + if it overflowed, then NULL is returned + */ + +char *generate_retrieval_apdu(char *buff, + long *buff_len, /* length of the buffer changed to reflect new data written */ + any *docID, + long chunk_type, + long start, + long end, + char *type, + char *database_name) +{ + SearchAPDU *search; + char *end_ptr; + + static char *database_names[2]; + static char *element_names[3]; + any refID; + + DocObj *DocObjs[2]; + any *query; /* changed from char* by brewster */ + + if (NULL == type) + type = s_strdup("TEXT"); + + database_names[0] = database_name; + database_names[1] = NULL; + + element_names[0] = " "; + element_names[1] = ES_DocumentText; + element_names[2] = NULL; + + refID.size = 1; + refID.bytes = "3"; + + switch (chunk_type) { + case CT_line: + DocObjs[0] = makeDocObjUsingLines(docID, type, start, end); + break; + case CT_byte: + DocObjs[0] = makeDocObjUsingBytes(docID, type, start, end); + break; + } + DocObjs[1] = NULL; + + query = makeWAISTextQuery(DocObjs); + search = makeSearchAPDU(10, 16, 15, + 1, /* replace indicator */ + "FOO", /* result set name */ + database_names, /* database name */ + QT_TextRetrievalQuery, /* query_type */ + element_names, /* element name */ + &refID, /* reference ID */ + query); + end_ptr = writeSearchAPDU(search, buff, buff_len); + CSTFreeWAISTextQuery(query); + freeSearchAPDU(search); + return (end_ptr); +} + +/*----------------------------------------------------------------------*/ + +/* this is a safe version of unix 'read' it does all the checking + * and looping necessary + * to those trying to modify the transport code to use non-UNIX streams: + * This is the function to modify! + */ +static long read_from_stream(int d, char *buf, long nbytes) +{ + long didRead; + long toRead = nbytes; + long totalRead = 0; /* paranoia */ + + while (toRead > 0) { + didRead = NETREAD(d, buf, (int) toRead); + if (didRead == HT_INTERRUPTED) + return (HT_INTERRUPTED); + if (didRead == -1) /* error */ + return (-1); + if (didRead == 0) /* eof */ + return (-2); /* maybe this should return 0? */ + toRead -= didRead; + buf += didRead; + totalRead += didRead; + } + if (totalRead != nbytes) /* we overread for some reason */ + return (-totalRead); /* bad news */ + return (totalRead); +} + +/*----------------------------------------------------------------------*/ + +/* returns the length of the response, 0 if an error */ + +static long transport_message(long connection, + char *request_message, + long request_length, + char *response_message, + long response_buffer_length) +{ + WAISMessage header; + long response_length; + int rv; + + /* Write out message. Read back header. Figure out response length. */ + + if (request_length + HEADER_LENGTH != + NETWRITE(connection, request_message, + (int) (request_length + HEADER_LENGTH))) + return 0; + + /* read for the first '0' */ + + while (1) { + rv = read_from_stream(connection, response_message, 1); + if (rv == HT_INTERRUPTED) + return HT_INTERRUPTED; + if (rv < 0) + return 0; + if ('0' == response_message[0]) + break; + } + + rv = read_from_stream(connection, response_message + 1, HEADER_LENGTH - 1); + if (rv == HT_INTERRUPTED) + return HT_INTERRUPTED; + if (rv < 0) + return 0; + + readWAISPacketHeader(response_message, &header); + { + char length_array[11]; + + StrNCpy(length_array, header.msg_len, 10); + length_array[10] = '\0'; + response_length = atol(length_array); + /* + if(verbose){ + printf("WAIS header: '%s' length_array: '%s'\n", + response_message, length_array); + } + */ + if (response_length > response_buffer_length) { + /* we got a message that is too long, therefore empty the message out, + and return 0 */ + long i; + + for (i = 0; i < response_length; i++) { + rv = read_from_stream(connection, + response_message + HEADER_LENGTH, + 1); + if (rv == HT_INTERRUPTED) + return HT_INTERRUPTED; + if (rv < 0) + return 0; + } + return (0); + } + } + rv = read_from_stream(connection, + response_message + HEADER_LENGTH, + response_length); + if (rv == HT_INTERRUPTED) + return HT_INTERRUPTED; + if (rv < 0) + return 0; + return (response_length); +} + +/*----------------------------------------------------------------------*/ + +/* returns the number of bytes written. 0 if an error */ +long interpret_message(char *request_message, + long request_length, /* length of the buffer */ + char *response_message, + long response_buffer_length, + long connection, + boolean verbose GCC_UNUSED) +{ + long response_length; + + /* ? + if(verbose){ + printf ("sending"); + if(hostname_internal && strlen(hostname_internal) > 0) + printf(" to host %s", hostname_internal); + if(service_name && strlen(service_name) > 0) + printf(" for service %s", service_name); + printf("\n"); + twais_dsply_rsp_apdu(request_message + HEADER_LENGTH, + request_length); + } + + */ + + writeWAISPacketHeader(request_message, + request_length, + (long) 'z', /* Z39.50 */ + "wais ", /* server name */ + (long) NO_COMPRESSION, /* no compression */ + (long) NO_ENCODING, (long) HEADER_VERSION); + if (connection != 0) { + response_length = transport_message(connection, request_message, + request_length, + response_message, + response_buffer_length); + if (response_length == HT_INTERRUPTED) + return (HT_INTERRUPTED); + } else + return (0); + + return (response_length); +} + +/*----------------------------------------------------------------------*/ + +/* modifies the string to exclude all seeker codes. sets length to + the new length. */ +static char *delete_seeker_codes(char *string, long *length) +{ + long original_count; /* index into the original string */ + long new_count = 0; /* index into the collapsed string */ + + for (original_count = 0; original_count < *length; original_count++) { + if (27 == string[original_count]) { + /* then we have an escape code */ + /* if the next letter is '(' or ')', then ignore two letters */ + if ('(' == string[original_count + 1] || + ')' == string[original_count + 1]) + original_count += 1; /* it is a term marker */ + else + original_count += 4; /* it is a paragraph marker */ + } else + string[new_count++] = string[original_count]; + } + *length = new_count; + return (string); +} + +/*----------------------------------------------------------------------*/ + +#if defined(VMS) && defined(__GNUC__) /* 10-AUG-1995 [pr] */ +/* + Workaround for an obscure bug in gcc's 2.6.[123] and 2.7.0 vax/vms port; + sometimes global variables will end up not being defined properly, + causing first gas to assume they're routines, then the linker to complain + about unresolved symbols, and finally the program to reference the wrong + objects (provoking ACCVIO). It's triggered by the specific ordering of + variable usage in the source code, hence rarely appears. This bug is + fixed in gcc 2.7.1, and was not present in 2.6.0 and earlier. + + Make a reference to VAXCRTL's _ctype_[], and also one to this dummy + variable itself to prevent any "defined but not used" warning. + */ +static __const void *__const ctype_dummy[] = +{&_ctype_, &ctype_dummy}; +#endif /* VMS && __GNUC__ */ + +/* returns a pointer to a string with good stuff */ +char *trim_junk(char *headline) +{ + long length = strlen(headline) + 1; /* include the trailing null */ + size_t i; + + headline = delete_seeker_codes(headline, &length); + /* delete leading spaces */ + for (i = 0; i < strlen(headline); i++) { + if (isprint(headline[i])) { + break; + } + } + headline = headline + i; + /* delete trailing stuff */ + for (i = strlen(headline) - 1; i > 0; i--) { + if (isprint(headline[i])) { + break; + } + headline[i] = '\0'; + } + return (headline); +} + +/*----------------------------------------------------------------------*/ + +/* + * Routines originally from ZProt.c -- FM + * + *----------------------------------------------------------------------*/ +/* WIDE AREA INFORMATION SERVER SOFTWARE:` + * No guarantees or restrictions. See the readme file for the full standard + * disclaimer. + * + * 3.26.90 Harry Morris, morris@think.com + * 3.30.90 Harry Morris - Changed any->bits to any->bytes + * 4.11.90 HWM - generalized conditional includes (see c-dialect.h) + */ + +#define RESERVE_SPACE_FOR_HEADER(spaceLeft) \ + *spaceLeft -= HEADER_LEN; + +#define RELEASE_HEADER_SPACE(spaceLeft) \ + if (*spaceLeft > 0) \ + *spaceLeft += HEADER_LEN; + +/*----------------------------------------------------------------------*/ + +InitResponseAPDU *makeInitResponseAPDU(boolean result, + boolean search, + boolean present, + boolean deleteIt, + boolean accessControl, + boolean resourceControl, + long prefSize, + long maxMsgSize, + char *auth, + char *id, + char *name, + char *version, + any *refID, + void *userInfo) +/* build an initResponse APDU with user specified information */ +{ + InitResponseAPDU *init = (InitResponseAPDU *) s_malloc((size_t) sizeof(InitResponseAPDU)); + + init->PDUType = initResponseAPDU; + init->Result = result; + init->willSearch = search; + init->willPresent = present; + init->willDelete = deleteIt; + init->supportAccessControl = accessControl; + init->supportResourceControl = resourceControl; + init->PreferredMessageSize = prefSize; + init->MaximumRecordSize = maxMsgSize; + init->IDAuthentication = s_strdup(auth); + init->ImplementationID = s_strdup(id); + init->ImplementationName = s_strdup(name); + init->ImplementationVersion = s_strdup(version); + init->ReferenceID = duplicateAny(refID); + init->UserInformationField = userInfo; /* not copied! */ + + return (init); +} + +/*----------------------------------------------------------------------*/ + +void freeInitResponseAPDU(InitResponseAPDU *init) +/* free an initAPDU */ +{ + s_free(init->IDAuthentication); + s_free(init->ImplementationID); + s_free(init->ImplementationName); + s_free(init->ImplementationVersion); + freeAny(init->ReferenceID); + s_free(init); +} + +/*----------------------------------------------------------------------*/ + +char *writeInitResponseAPDU(InitResponseAPDU *init, char *buffer, long *len) +/* write the initResponse to a buffer, adding system information */ +{ + char *buf = buffer + HEADER_LEN; /* leave room for the header-length-indicator */ + long size; + bit_map *optionsBM = NULL; + + RESERVE_SPACE_FOR_HEADER(len); + + buf = writePDUType(init->PDUType, buf, len); + buf = writeBoolean(init->Result, buf, len); + buf = writeProtocolVersion(buf, len); + + optionsBM = makeBitMap((unsigned long) 5, init->willSearch, init->willPresent, + init->willDelete, init->supportAccessControl, + init->supportResourceControl); + buf = writeBitMap(optionsBM, DT_Options, buf, len); + freeBitMap(optionsBM); + + buf = writeNum(init->PreferredMessageSize, + DT_PreferredMessageSize, + buf, + len); + buf = writeNum(init->MaximumRecordSize, + DT_MaximumRecordSize, + buf, + len); + buf = writeString(init->IDAuthentication, + DT_IDAuthentication, + buf, + len); + buf = writeString(init->ImplementationID, + DT_ImplementationID, + buf, + len); + buf = writeString(init->ImplementationName, + DT_ImplementationName, + buf, + len); + buf = writeString(init->ImplementationVersion, + DT_ImplementationVersion, + buf, + len); + buf = writeAny(init->ReferenceID, + DT_ReferenceID, + buf, + len); + + /* go back and write the header-length-indicator */ + RELEASE_HEADER_SPACE(len); + size = buf - buffer - HEADER_LEN; + writeBinaryInteger(size, HEADER_LEN, buffer, len); + + if (init->UserInformationField != NULL) + buf = writeInitResponseInfo(init, buf, len); + + return (buf); +} + +/*----------------------------------------------------------------------*/ + +char *readInitResponseAPDU(InitResponseAPDU **init, char *buffer) +{ + char *buf = buffer; + boolean search, present, delete, accessControl, resourceControl; + long prefSize, maxMsgSize; + char *auth, *id, *name, *version; + long size; + pdu_type pduType; + bit_map *versionBM = NULL; + bit_map *optionsBM = NULL; + boolean result; + any *refID = NULL; + void *userInfo = NULL; + + auth = id = name = version = NULL; + refID = NULL; + + /* read required part */ + buf = readBinaryInteger(&size, HEADER_LEN, buf); + buf = readPDUType(&pduType, buf); + buf = readBoolean(&result, buf); + buf = readBitMap(&versionBM, buf); + buf = readBitMap(&optionsBM, buf); + buf = readNum(&prefSize, buf); + buf = readNum(&maxMsgSize, buf); + + /* decode optionsBM */ + search = bitAtPos(0, optionsBM); + present = bitAtPos(1, optionsBM); + delete = bitAtPos(2, optionsBM); + accessControl = bitAtPos(3, optionsBM); + resourceControl = bitAtPos(4, optionsBM); + + /* read optional part */ + while (buf < (buffer + size + HEADER_LEN)) { + data_tag tag = peekTag(buf); + + switch (tag) { + case DT_IDAuthentication: + buf = readString(&auth, buf); + break; + case DT_ImplementationID: + buf = readString(&id, buf); + break; + case DT_ImplementationName: + buf = readString(&name, buf); + break; + case DT_ImplementationVersion: + buf = readString(&version, buf); + break; + case DT_ReferenceID: + buf = readAny(&refID, buf); + break; + default: + freeBitMap(versionBM); + freeBitMap(optionsBM); + s_free(auth); + s_free(id); + s_free(name); + s_free(version); + freeAny(refID); + REPORT_READ_ERROR(buf); + break; + } + } + + buf = readInitResponseInfo(&userInfo, buf); + if (buf == NULL) { + freeBitMap(versionBM); + freeBitMap(optionsBM); + s_free(auth); + s_free(id); + s_free(name); + s_free(version); + freeAny(refID); + } + RETURN_ON_NULL(buf); + + /* construct the basic init object */ + *init = makeInitResponseAPDU(result, + search, + present, + delete, + accessControl, + resourceControl, + prefSize, + maxMsgSize, + auth, + id, + name, + version, + refID, + userInfo); + + freeBitMap(versionBM); + freeBitMap(optionsBM); + s_free(auth); + s_free(id); + s_free(name); + s_free(version); + freeAny(refID); + + return (buf); +} + +/*----------------------------------------------------------------------*/ + +InitResponseAPDU *replyToInitAPDU(InitAPDU * init, boolean result, void *userInfo) +/* respond to an init message in the default way - echoing back + the init info + */ +{ + InitResponseAPDU *initResp; + + initResp = makeInitResponseAPDU(result, + init->willSearch, + init->willPresent, + init->willDelete, + init->supportAccessControl, + init->supportResourceControl, + init->PreferredMessageSize, + init->MaximumRecordSize, + init->IDAuthentication, + defaultImplementationID(), + defaultImplementationName(), + defaultImplementationVersion(), + init->ReferenceID, + userInfo); + return (initResp); +} + +/*----------------------------------------------------------------------*/ + +SearchAPDU *makeSearchAPDU(long small, + long large, + long medium, + boolean replace, + char *name, + char **databases, + char *type, + char **elements, + any *refID, + void *queryInfo) +{ + char *ptr = NULL; + long i; + SearchAPDU *query = (SearchAPDU *) s_malloc((size_t) sizeof(SearchAPDU)); + + query->PDUType = searchAPDU; + query->SmallSetUpperBound = small; + query->LargeSetLowerBound = large; + query->MediumSetPresentNumber = medium; + query->ReplaceIndicator = replace; + query->ResultSetName = s_strdup(name); + query->DatabaseNames = NULL; + if (databases != NULL) { + for (i = 0, ptr = databases[i]; ptr != NULL; ptr = databases[++i]) { + if (query->DatabaseNames == NULL) + query->DatabaseNames = (char **) s_malloc((size_t) (sizeof(char + *) + * 2)); + + else + query->DatabaseNames = (char **) s_realloc((char *) query->DatabaseNames, + (size_t) (sizeof(char + *) * + (i + 2))); + + query->DatabaseNames[i] = s_strdup(ptr); + query->DatabaseNames[i + 1] = NULL; + } + } + query->QueryType = s_strdup(type); + query->ElementSetNames = NULL; + if (elements != NULL) { + for (i = 0, ptr = elements[i]; ptr != NULL; ptr = elements[++i]) { + if (query->ElementSetNames == NULL) + query->ElementSetNames = + (char **) s_malloc((size_t) (sizeof(char *) * 2)); + + else + query->ElementSetNames = (char **) s_realloc((char *) query->ElementSetNames, + (size_t) (sizeof(char + *) * + (i + 2))); + + query->ElementSetNames[i] = s_strdup(ptr); + query->ElementSetNames[i + 1] = NULL; + } + } + query->ReferenceID = duplicateAny(refID); + query->Query = queryInfo; /* not copied! */ + return (query); +} + +/*----------------------------------------------------------------------*/ + +void freeSearchAPDU(SearchAPDU *query) +{ + s_free(query->ResultSetName); + s_free(query->QueryType); + doList((void **) query->DatabaseNames, fs_free); /* can't use the macro here ! */ + s_free(query->DatabaseNames); + doList((void **) query->ElementSetNames, fs_free); /* can't use the macro here ! */ + s_free(query->ElementSetNames); + freeAny(query->ReferenceID); + s_free(query); +} + +/*----------------------------------------------------------------------*/ + +#define DB_DELIMITER "\037" /* hex 1F occurs between each database name */ +#define ES_DELIMITER_1 "\037" /* separates database name from element name */ +#define ES_DELIMITER_2 "\036" /* hex 1E separates <db,es> groups from one another */ + +char *writeSearchAPDU(SearchAPDU *query, char *buffer, long *len) +{ + char *buf = buffer + HEADER_LEN; /* leave room for the header-length-indicator */ + long size, i; + char *ptr = NULL; + char *scratch = NULL; + + RESERVE_SPACE_FOR_HEADER(len); + + buf = writePDUType(query->PDUType, buf, len); + buf = writeBinaryInteger(query->SmallSetUpperBound, (size_t) 3, buf, len); + buf = writeBinaryInteger(query->LargeSetLowerBound, (size_t) 3, buf, len); + buf = writeBinaryInteger(query->MediumSetPresentNumber, (size_t) 3, buf, len); + buf = writeBoolean(query->ReplaceIndicator, buf, len); + buf = writeString(query->ResultSetName, DT_ResultSetName, buf, len); + /* write database names */ + if (query->DatabaseNames != NULL) { + for (i = 0, scratch = NULL, ptr = query->DatabaseNames[i]; ptr != NULL; + ptr = query->DatabaseNames[++i]) { + if (scratch == NULL) + scratch = s_strdup(ptr); + else { + size_t newScratchSize = (size_t) (strlen(scratch) + + strlen(ptr) + 2); + + scratch = (char *) s_realloc(scratch, newScratchSize); + s_strncat(scratch, DB_DELIMITER, 2, newScratchSize); + s_strncat(scratch, ptr, strlen(ptr) + 1, newScratchSize); + } + } + buf = writeString(scratch, DT_DatabaseNames, buf, len); + s_free(scratch); + } + buf = writeString(query->QueryType, DT_QueryType, buf, len); + /* write element set names */ + if (query->ElementSetNames != NULL) { + for (i = 0, scratch = NULL, ptr = query->ElementSetNames[i]; + ptr != NULL; + ptr = query->ElementSetNames[++i]) { + if (scratch == NULL) { + if (query->ElementSetNames[i + 1] == NULL) /* there is a single element set name */ + { + scratch = (char *) s_malloc((size_t) strlen(ptr) + 2); + StrNCpy(scratch, ES_DELIMITER_1, 2); + s_strncat(scratch, ptr, strlen(ptr) + 1, strlen(ptr) + 2); + } else { /* this is the first of a series of element set names */ + size_t newScratchSize = (size_t) (strlen(ptr) + + strlen(query->ElementSetNames[i + + 1]) + + 2); + + scratch = s_strdup(ptr); /* the database name */ + ptr = query->ElementSetNames[++i]; /* the element set name */ + scratch = (char *) s_realloc(scratch, newScratchSize); + s_strncat(scratch, ES_DELIMITER_1, 2, newScratchSize); + s_strncat(scratch, ptr, strlen(ptr) + 1, newScratchSize); + } + } else { + char *esPtr = query->ElementSetNames[++i]; /* the element set name */ + size_t newScratchSize = (size_t) (strlen(scratch) + + strlen(ptr) + + strlen(esPtr) + + 3); + + scratch = (char *) s_realloc(scratch, newScratchSize); + s_strncat(scratch, ES_DELIMITER_2, 2, newScratchSize); + s_strncat(scratch, ptr, strlen(ptr) + 1, newScratchSize); + s_strncat(scratch, ES_DELIMITER_1, 2, newScratchSize); + s_strncat(scratch, esPtr, strlen(esPtr) + 1, newScratchSize); + } + } + buf = writeString(scratch, DT_ElementSetNames, buf, len); + s_free(scratch); + } + buf = writeAny(query->ReferenceID, DT_ReferenceID, buf, len); + + /* go back and write the header-length-indicator */ + RELEASE_HEADER_SPACE(len); + size = buf - buffer - HEADER_LEN; + writeBinaryInteger(size, HEADER_LEN, buffer, len); + + if (query->Query != NULL) + buf = writeSearchInfo(query, buf, len); + + return (buf); +} + +/*----------------------------------------------------------------------*/ + +SearchResponseAPDU *makeSearchResponseAPDU(long result, + long count, + long recordsReturned, + long nextPos, + long resultStatus, + long presentStatus, + any *refID, + void *records) +{ + SearchResponseAPDU *query = + (SearchResponseAPDU *) s_malloc((size_t) sizeof(SearchResponseAPDU)); + + query->PDUType = searchResponseAPDU; + query->SearchStatus = result; + query->ResultCount = count; + query->NumberOfRecordsReturned = recordsReturned; + query->NextResultSetPosition = nextPos; + query->ResultSetStatus = resultStatus; + query->PresentStatus = presentStatus; + query->ReferenceID = duplicateAny(refID); + query->DatabaseDiagnosticRecords = records; + return (query); +} + +/*----------------------------------------------------------------------*/ + +void freeSearchResponseAPDU(SearchResponseAPDU *queryResponse) +{ + freeAny(queryResponse->ReferenceID); + s_free(queryResponse); +} + +/*----------------------------------------------------------------------*/ + +char *writeSearchResponseAPDU(SearchResponseAPDU *queryResponse, char *buffer, + long *len) +{ + char *buf = buffer + HEADER_LEN; /* leave room for the header-length-indicator */ + long size; + + RESERVE_SPACE_FOR_HEADER(len); + + buf = writePDUType(queryResponse->PDUType, + buf, + len); + buf = writeBinaryInteger(queryResponse->SearchStatus, + (size_t) 1, + buf, + len); + buf = writeBinaryInteger(queryResponse->ResultCount, + (size_t) 3, + buf, + len); + buf = writeBinaryInteger(queryResponse->NumberOfRecordsReturned, + (size_t) 3, + buf, + len); + buf = writeBinaryInteger(queryResponse->NextResultSetPosition, + (size_t) 3, + buf, + len); + buf = writeNum(queryResponse->ResultSetStatus, + DT_ResultSetStatus, + buf, + len); + buf = writeNum(queryResponse->PresentStatus, + DT_PresentStatus, + buf, + len); + buf = writeAny(queryResponse->ReferenceID, + DT_ReferenceID, + buf, + len); + + /* go back and write the header-length-indicator */ + RELEASE_HEADER_SPACE(len); + size = buf - buffer - HEADER_LEN; + writeBinaryInteger(size, HEADER_LEN, buffer, len); + + if (queryResponse->DatabaseDiagnosticRecords != NULL) + buf = writeSearchResponseInfo(queryResponse, buf, len); + + return (buf); +} + +/*----------------------------------------------------------------------*/ + +char *readSearchResponseAPDU(SearchResponseAPDU **queryResponse, char *buffer) +{ + char *buf = buffer; + long size; + pdu_type pduType; + long result, count, recordsReturned, nextPos; + long resultStatus, presentStatus; + any *refID = NULL; + void *userInfo = NULL; + + /* read required part */ + buf = readBinaryInteger(&size, HEADER_LEN, buf); + buf = readPDUType(&pduType, buf); + buf = readBinaryInteger(&result, (size_t) 1, buf); + buf = readBinaryInteger(&count, (size_t) 3, buf); + buf = readBinaryInteger(&recordsReturned, (size_t) 3, buf); + buf = readBinaryInteger(&nextPos, (size_t) 3, buf); + + resultStatus = presentStatus = UNUSED; + refID = NULL; + + /* read optional part */ + while (buf < (buffer + size + HEADER_LEN)) { + data_tag tag = peekTag(buf); + + switch (tag) { + case DT_ResultSetStatus: + buf = readNum(&resultStatus, buf); + break; + case DT_PresentStatus: + buf = readNum(&presentStatus, buf); + break; + case DT_ReferenceID: + buf = readAny(&refID, buf); + break; + default: + freeAny(refID); + REPORT_READ_ERROR(buf); + break; + } + } + + buf = readSearchResponseInfo(&userInfo, buf); + if (buf == NULL) + freeAny(refID); + RETURN_ON_NULL(buf); + + /* construct the search object */ + *queryResponse = makeSearchResponseAPDU(result, + count, + recordsReturned, + nextPos, + (long) resultStatus, + (long) presentStatus, + refID, + userInfo); + + freeAny(refID); + + return (buf); +} + +/* + * Routines originally from ZUtil.c -- FM + * + *----------------------------------------------------------------------*/ +/* WIDE AREA INFORMATION SERVER SOFTWARE: + * No guarantees or restrictions. See the readme file for the full standard + * disclaimer. + * + * 3.26.90 Harry Morris, morris@think.com + * 3.30.90 Harry Morris - Changed any->bits to any->bytes + * 4.11.90 HWM - fixed include file names, changed + * - writeCompressedIntegerWithPadding() to + * writeCompressedIntWithPadding() + * - generalized conditional includes (see c-dialect.h) + * 3.7.91 Jonny Goldman. Replaced "short" in makeBitMap with "int" line 632. + */ + +char *readErrorPosition = NULL; /* pos where buf stoped making sense */ + +/*----------------------------------------------------------------------*/ +/* A note on error handling + read - these are low level routines, they do not check the type tags + which (sometimes) preceed the data (this is done by the higher + level functions which call these functions). There is no + attempt made to check that the reading does not exceed the read + buffer. Such cases should be very rare and usually will be + caught by the calling functions. (note - it is unlikely that + a series of low level reads will go far off the edge without + triggering a type error. However, it is possible for a single + bad read in an array function (eg. readAny) to attempt to read a + large ammount, possibly causing a segmentation violation or out + of memory condition. + */ +/*----------------------------------------------------------------------*/ + +diagnosticRecord *makeDiag(boolean surrogate, char *code, char *addInfo) +{ + diagnosticRecord *diag = + (diagnosticRecord *) s_malloc((size_t) sizeof(diagnosticRecord)); + + diag->SURROGATE = surrogate; + MemCpy(diag->DIAG, code, DIAGNOSTIC_CODE_SIZE); + diag->ADDINFO = s_strdup(addInfo); + + return (diag); +} + +/*----------------------------------------------------------------------*/ + +void freeDiag(diagnosticRecord * diag) +{ + if (diag != NULL) { + if (diag->ADDINFO != NULL) + s_free(diag->ADDINFO); + s_free(diag); + } +} + +/*----------------------------------------------------------------------*/ + +#define END_OF_RECORD 0x1D + +char *writeDiag(diagnosticRecord * diag, char *buffer, long *len) +/* diagnostics (as per Appendix D) have a very weird format - this changes + in SR-1 + */ +{ + char *buf = buffer; + long length; + + if (diag == NULL) /* handle unspecified optional args */ + return (buf); + + buf = writeTag(DT_DatabaseDiagnosticRecords, buf, len); + CHECK_FOR_SPACE_LEFT(0, len); + + length = 3; + if (diag->ADDINFO != NULL) + length += strlen(diag->ADDINFO); + + if (length >= 0xFFFF) /* make sure the length is reasonable */ + { + length = 0xFFFF - 1; + diag->ADDINFO[0xFFFF - 3 - 1] = '\0'; + } + + buf = writeBinaryInteger(length, 2, buf, len); + + CHECK_FOR_SPACE_LEFT(1, len); + buf[0] = diag->DIAG[0]; + buf++; + + CHECK_FOR_SPACE_LEFT(1, len); + buf[0] = diag->DIAG[1]; + buf++; + + if (length > 3) { + CHECK_FOR_SPACE_LEFT(3, len); + MemCpy(buf, diag->ADDINFO, length - 3); + buf += length - 3; + } + + CHECK_FOR_SPACE_LEFT(1, len); + buf[0] = diag->SURROGATE; + buf++; + + CHECK_FOR_SPACE_LEFT(1, len); + buf[0] = END_OF_RECORD; + buf++; + + return (buf); +} + +/*----------------------------------------------------------------------*/ + +char *readDiag(diagnosticRecord ** diag, char *buffer) +{ + char *buf = buffer; + diagnosticRecord *d = (diagnosticRecord *) s_malloc((size_t) sizeof(diagnosticRecord)); + data_tag tag; + long len; + + buf = readTag(&tag, buf); + + buf = readBinaryInteger(&len, 2, buf); + + d->DIAG[0] = buf[0]; + d->DIAG[1] = buf[1]; + d->DIAG[2] = '\0'; + + if (len > 3) { + d->ADDINFO = (char *) s_malloc((size_t) (len - 3 + 1)); + MemCpy(d->ADDINFO, (char *) (buf + 2), len - 3); + d->ADDINFO[len - 3] = '\0'; + } else + d->ADDINFO = NULL; + + d->SURROGATE = buf[len - 1]; + + *diag = d; + + return (buf + len + 1); +} + +/*----------------------------------------------------------------------*/ + +#define continueBit 0x80 +#define dataMask 0x7F +#define dataBits 7 + +char *writeCompressedInteger(unsigned long num, char *buf, long *len) +/* write a binary integer in the format described on p. 40. + this might be sped up +*/ +{ + char byte; + unsigned long i; + unsigned long size; + + size = writtenCompressedIntSize(num); + CHECK_FOR_SPACE_LEFT(size, len); + + for (i = size - 1; i != 0; i--) { + byte = num & dataMask; + if (i != (size - 1)) /* turn on continue bit */ + byte = (char) (byte | continueBit); + buf[i] = byte; + num = num >> dataBits; /* don't and here */ + } + + return (buf + size); +} + +/*----------------------------------------------------------------------*/ + +char *readCompressedInteger(unsigned long *num, char *buf) +/* read a binary integer in the format described on p. 40. + this might be sped up +*/ +{ + long i = 0; + unsigned char byte; + + *num = 0; + + do { + byte = buf[i++]; + *num = *num << dataBits; + *num += (byte & dataMask); + } + while (byte & continueBit); + + return (buf + i); +} + +/*----------------------------------------------------------------------*/ + +#define pad 128 /* high bit is set */ + +char *writeCompressedIntWithPadding(unsigned long num, + unsigned long size, + char *buffer, + long *len) +/* Like writeCompressedInteger, except writes padding (128) to make + sure that size bytes are used. This can be read correctly by + readCompressedInteger() +*/ +{ + char *buf = buffer; + unsigned long needed, padding; + long i; + + CHECK_FOR_SPACE_LEFT(size, len); + + needed = writtenCompressedIntSize(num); + padding = size - needed; + i = padding - 1; + + for (i = padding - 1; i >= 0; i--) { + buf[i] = pad; + } + + buf = writeCompressedInteger(num, buf + padding, len); + + return (buf); +} + +/*----------------------------------------------------------------------*/ + +unsigned long writtenCompressedIntSize(unsigned long num) +/* return the number of bytes needed to represnet the value num in + compressed format. curently limited to 4 bytes + */ +{ + if (num < CompressedInt1Byte) + return (1); + else if (num < CompressedInt2Byte) + return (2); + else if (num < CompressedInt3Byte) + return (3); + else + return (4); +} + +/*----------------------------------------------------------------------*/ + +char *writeTag(data_tag tag, char *buf, long *len) +/* write out a data tag */ +{ + return (writeCompressedInteger(tag, buf, len)); +} + +/*----------------------------------------------------------------------*/ + +char *readTag(data_tag *tag, char *buf) +/* read a data tag */ +{ + return (readCompressedInteger(tag, buf)); +} + +/*----------------------------------------------------------------------*/ + +unsigned long writtenTagSize(data_tag tag) +{ + return (writtenCompressedIntSize(tag)); +} + +/*----------------------------------------------------------------------*/ + +data_tag peekTag(char *buf) +/* read a data tag without advancing the buffer */ +{ + data_tag tag; + + readTag(&tag, buf); + return (tag); +} + +/*----------------------------------------------------------------------*/ + +any *makeAny(unsigned long size, char *data) +{ + any *a = (any *) s_malloc((size_t) sizeof(any)); + + a->size = size; + a->bytes = data; + return (a); +} + +/*----------------------------------------------------------------------*/ + +void freeAny(any *a) +/* destroy an any and its associated data. Assumes a->bytes was + allocated using the s_malloc family of libraries + */ +{ + if (a != NULL) { + if (a->bytes != NULL) + s_free(a->bytes); + s_free(a); + } +} + +/*----------------------------------------------------------------------*/ + +any *duplicateAny(any *a) +{ + any *copy = NULL; + + if (a == NULL) + return (NULL); + + copy = (any *) s_malloc((size_t) sizeof(any)); + + copy->size = a->size; + if (a->bytes == NULL) + copy->bytes = NULL; + else { + copy->bytes = (char *) s_malloc((size_t) copy->size); + MemCpy(copy->bytes, a->bytes, copy->size); + } + return (copy); +} + +/*----------------------------------------------------------------------*/ + +char *writeAny(any *a, data_tag tag, char *buffer, long *len) +/* write an any + tag and size info */ +{ + char *buf = buffer; + + if (a == NULL) /* handle unspecified optional args */ + return (buf); + + /* write the tags */ + buf = writeTag(tag, buf, len); + buf = writeCompressedInteger(a->size, buf, len); + + /* write the bytes */ + CHECK_FOR_SPACE_LEFT(a->size, len); + MemCpy(buf, a->bytes, a->size); + + return (buf + a->size); +} + +/*----------------------------------------------------------------------*/ + +char *readAny(any **anAny, char *buffer) +/* read an any + tag and size info */ +{ + char *buf; + any *a; + data_tag tag; + + a = (any *) s_malloc((size_t) sizeof(any)); + + buf = buffer; + + buf = readTag(&tag, buf); + + buf = readCompressedInteger(&a->size, buf); + + /* now simply copy the bytes */ + a->bytes = (char *) s_malloc((size_t) a->size); + MemCpy(a->bytes, buf, a->size); + *anAny = a; + + return (buf + a->size); +} + +/*----------------------------------------------------------------------*/ + +unsigned long writtenAnySize(data_tag tag, any *a) +{ + unsigned long size; + + if (a == NULL) + return (0); + + size = writtenTagSize(tag); + size += writtenCompressedIntSize(a->size); + size += a->size; + return (size); +} + +/*----------------------------------------------------------------------*/ + +any *stringToAny(char *s) +{ + any *a = NULL; + + if (s == NULL) + return (NULL); + + a = (any *) s_malloc((size_t) sizeof(any)); + + a->size = strlen(s); + a->bytes = (char *) s_malloc((size_t) a->size); + MemCpy(a->bytes, s, a->size); + return (a); +} + +/*----------------------------------------------------------------------*/ + +char *anyToString(any *a) +{ + char *s = NULL; + + if (a == NULL) + return (NULL); + + s = s_malloc((size_t) (a->size + 1)); + MemCpy(s, a->bytes, a->size); + s[a->size] = '\0'; + return (s); +} + +/*----------------------------------------------------------------------*/ + +char *writeString(char *s, data_tag tag, char *buffer, long *len) +/* Write a C style string. The terminating null is not written. + This function is not part of the Z39.50 spec. It is provided + for the convienience of those wishing to pass C strings in + the place of an any. + */ +{ + char *buf = buffer; + any *data = NULL; + + if (s == NULL) + return (buffer); /* handle unused optional item before making an any */ + data = (any *) s_malloc((size_t) sizeof(any)); + + data->size = strlen(s); + data->bytes = s; /* save a copy here by not using stringToAny() */ + buf = writeAny(data, tag, buf, len); + s_free(data); /* don't use freeAny() since it will free s too */ + return (buf); +} + +/*----------------------------------------------------------------------*/ + +char *readString(char **s, char *buffer) +/* Read an any and convert it into a C style string. + This function is not part of the Z39.50 spec. It is provided + for the convienience of those wishing to pass C strings in + the place of an any. + */ +{ + any *data = NULL; + char *buf = readAny(&data, buffer); + + *s = anyToString(data); + freeAny(data); + return (buf); +} + +/*----------------------------------------------------------------------*/ + +unsigned long writtenStringSize(data_tag tag, char *s) +{ + unsigned long size; + + if (s == NULL) + return (0); + + size = writtenTagSize(tag); + size += writtenCompressedIntSize(size); + size += strlen(s); + return (size); +} + +/*----------------------------------------------------------------------*/ + +any *longToAny(long num) +/* a convienience function */ +{ + char s[40]; + + sprintf(s, "%ld", num); + + return (stringToAny(s)); +} + +/*----------------------------------------------------------------------*/ + +long anyToLong(any *a) +/* a convienience function */ +{ + long num; + char *str = NULL; + + str = anyToString(a); + sscanf(str, "%ld", &num); /* could check the result and return + an error */ + s_free(str); + return (num); +} + +/*----------------------------------------------------------------------*/ + +#define bitsPerByte 8 + +bit_map *makeBitMap(unsigned long numBits,...) +/* construct and return a bitmap with numBits elements */ +{ + va_list ap; + unsigned long i, j; + bit_map *bm = NULL; + + LYva_start(ap, numBits); + + bm = (bit_map *) s_malloc((size_t) sizeof(bit_map)); + + bm->size = (unsigned long) (ceil((double) numBits / bitsPerByte)); + bm->bytes = (char *) s_malloc((size_t) bm->size); + + /* fill up the bits */ + for (i = 0; i < bm->size; i++) /* iterate over bytes */ + { + char byte = 0; + + for (j = 0; j < bitsPerByte; j++) /* iterate over bits */ + { + if ((i * bitsPerByte + j) < numBits) { + boolean bit = false; + + bit = (boolean) va_arg(ap, boolean); + + if (bit) { + byte = byte | (1 << (bitsPerByte - j - 1)); + } + } + } + bm->bytes[i] = byte; + } + + va_end(ap); + return (bm); +} + +/*----------------------------------------------------------------------*/ + +void freeBitMap(bit_map *bm) +/* destroy a bit map created by makeBitMap() */ +{ + s_free(bm->bytes); + s_free(bm); +} + +/*----------------------------------------------------------------------*/ + +/* use this routine to interpret a bit map. pos specifies the bit + number. bit 0 is the Leftmost bit of the first byte. + Could do bounds checking. + */ + +boolean bitAtPos(unsigned long pos, bit_map *bm) +{ + if (pos > bm->size * bitsPerByte) + return false; + else + return ((bm->bytes[(pos / bitsPerByte)] & + (0x80 >> (pos % bitsPerByte))) ? + true : false); +} + +/*----------------------------------------------------------------------*/ + +char *writeBitMap(bit_map *bm, data_tag tag, char *buffer, long *len) +/* write a bitmap + type and size info */ +{ + return (writeAny((any *) bm, tag, buffer, len)); +} + +/*----------------------------------------------------------------------*/ + +char *readBitMap(bit_map **bm, char *buffer) +/* read a bitmap + type and size info */ +{ + char *c; + + c = readAny((any **) bm, buffer); + return (c); +} + +/*----------------------------------------------------------------------*/ + +char *writeByte(unsigned long byte, char *buf, long *len) +{ + CHECK_FOR_SPACE_LEFT(1, len); + buf[0] = byte & 0xFF; /* we really only want the first byte */ + return (buf + 1); +} + +/*----------------------------------------------------------------------*/ + +char *readByte(unsigned char *byte, char *buf) +{ + *byte = buf[0]; + return (buf + 1); +} + +/*----------------------------------------------------------------------*/ + +char *writeBoolean(boolean flag, char *buf, long *len) +{ + return (writeByte(flag, buf, len)); +} + +/*----------------------------------------------------------------------*/ + +char *readBoolean(boolean *flag, char *buffer) +{ + unsigned char byte; + char *buf = readByte(&byte, buffer); + + *flag = (byte == true) ? true : false; + return (buf); +} + +/*----------------------------------------------------------------------*/ + +char *writePDUType(pdu_type pduType, char *buf, long *len) +/* PDUType is a single byte */ +{ + return (writeBinaryInteger((long) pduType, (unsigned long) 1, buf, len)); +} + +/*----------------------------------------------------------------------*/ + +char *readPDUType(pdu_type *pduType, char *buf) +/* PDUType is a single byte */ +{ + return (readBinaryInteger((long *) pduType, (unsigned long) 1, buf)); +} + +/*----------------------------------------------------------------------*/ + +pdu_type peekPDUType(char *buf) +/* read the next pdu without advancing the buffer, Note that this + function is to be used on a buffer that is known to contain an + APDU. The pdu_type is written HEADER_LEN bytes into the buffer + */ +{ + pdu_type pdu; + + readPDUType(&pdu, buf + HEADER_LEN); + return (pdu); +} + +/*----------------------------------------------------------------------*/ + +#define BINARY_INTEGER_BYTES sizeof(long) /* the number of bytes used by + a "binary integer" */ +char *writeBinaryInteger(long num, unsigned long size, char *buf, long *len) +/* write out first size bytes of num - no type info + XXX should this take unsigned longs instead ??? */ +{ + long i; + char byte; + + if (size < 1 || size > BINARY_INTEGER_BYTES) + return (NULL); /* error */ + + CHECK_FOR_SPACE_LEFT(size, len); + + for (i = size - 1; i >= 0; i--) { + byte = (char) (num & 255); + buf[i] = byte; + num = num >> bitsPerByte; /* don't and here */ + } + + return (buf + size); +} + +/*----------------------------------------------------------------------*/ + +char *readBinaryInteger(long *num, unsigned long size, char *buf) +/* read in first size bytes of num - no type info + XXX this should take unsigned longs instead !!! */ +{ + unsigned long i; + unsigned char byte; + + if (size < 1 || size > BINARY_INTEGER_BYTES) + return (buf); /* error */ + *num = 0; + + for (i = 0; i < size; i++) { + byte = buf[i]; + *num = *num << bitsPerByte; + *num += byte; + } + + return (buf + size); +} + +/*----------------------------------------------------------------------*/ + +unsigned long writtenCompressedBinIntSize(long num) +/* return the number of bytes needed to represent the value num. + currently limited to max of 4 bytes + Only compresses for positive nums - negatives get whole 4 bytes + */ +{ + if (num < 0L) + return (4); + else if (num < 256L) /* 2**8 */ + return (1); + else if (num < 65536L) /* 2**16 */ + return (2); + else if (num < 16777216L) /* 2**24 */ + return (3); + else + return (4); +} + +/*----------------------------------------------------------------------*/ + +char *writeNum(long num, data_tag tag, char *buffer, long *len) +/* write a binary integer + size and tag info */ +{ + char *buf = buffer; + long size = writtenCompressedBinIntSize(num); + + if (num == UNUSED) + return (buffer); + + buf = writeTag(tag, buf, len); + buf = writeCompressedInteger(size, buf, len); + buf = writeBinaryInteger(num, (unsigned long) size, buf, len); + return (buf); +} + +/*----------------------------------------------------------------------*/ + +char *readNum(long *num, char *buffer) +/* read a binary integer + size and tag info */ +{ + char *buf = buffer; + data_tag tag; + unsigned long size; + unsigned long val; + + buf = readTag(&tag, buf); + buf = readCompressedInteger(&val, buf); + size = (unsigned long) val; + buf = readBinaryInteger(num, size, buf); + return (buf); +} + +/*----------------------------------------------------------------------*/ + +unsigned long writtenNumSize(data_tag tag, long num) +{ + long dataSize = writtenCompressedBinIntSize(num); + long size; + + size = writtenTagSize(tag); /* space for the tag */ + size += writtenCompressedIntSize(dataSize); /* space for the size */ + size += dataSize; /* space for the data */ + + return (size); +} + +/*----------------------------------------------------------------------*/ + +typedef void (voidfunc) (void *); + +void doList(void **list, voidfunc * func) +/* call func on each element of the NULL terminated list of pointers */ +{ + register long i; + register void *ptr = NULL; + + if (list == NULL) + return; + for (i = 0, ptr = list[i]; ptr != NULL; ptr = list[++i]) + (*func) (ptr); +} + +/*----------------------------------------------------------------------*/ + +char *writeProtocolVersion(char *buf, long *len) +/* write a bitmap describing the protocols available */ +{ + static bit_map *version = NULL; + + if (version == NULL) { + version = makeBitMap((unsigned long) 1, true); /* version 1! */ + } + + return (writeBitMap(version, DT_ProtocolVersion, buf, len)); +} + +/*----------------------------------------------------------------------*/ + +char *defaultImplementationID(void) +{ + static char ImplementationID[] = "TMC"; + + return (ImplementationID); +} + +/*----------------------------------------------------------------------*/ + +char *defaultImplementationName(void) +{ + static char ImplementationName[] = "Thinking Machines Corporation Z39.50"; + + return (ImplementationName); +} + +/*----------------------------------------------------------------------*/ + +char *defaultImplementationVersion(void) +{ + static char ImplementationVersion[] = "2.0A"; + + return (ImplementationVersion); +} + +/*----------------------------------------------------------------------*/ + +/* + * Routines originally from ZType1.c -- FM + * + *----------------------------------------------------------------------*/ +/* WIDE AREA INFORMATION SERVER SOFTWARE: + * No guarantees or restrictions. See the readme file for the full standard + * disclaimer. + * + * 3.26.90 Harry Morris, morris@think.com + * 4.11.90 HWM - generalized conditional includes (see c-dialect.h) + */ +/*----------------------------------------------------------------------*/ + +query_term *makeAttributeTerm(char *use, + char *relation, + char *position, + char *structure, + char *truncation, + char *completeness, + any *term) +{ + query_term *qt = (query_term *) s_malloc((size_t) sizeof(query_term)); + + qt->TermType = TT_Attribute; + + /* copy in the attributes */ + StrNCpy(qt->Use, use, ATTRIBUTE_SIZE); + StrNCpy(qt->Relation, relation, ATTRIBUTE_SIZE); + StrNCpy(qt->Position, position, ATTRIBUTE_SIZE); + StrNCpy(qt->Structure, structure, ATTRIBUTE_SIZE); + StrNCpy(qt->Truncation, truncation, ATTRIBUTE_SIZE); + StrNCpy(qt->Completeness, completeness, ATTRIBUTE_SIZE); + + qt->Term = duplicateAny(term); + + qt->ResultSetID = NULL; + + return (qt); +} + +/*----------------------------------------------------------------------*/ + +query_term *makeResultSetTerm(any *resultSet) +{ + query_term *qt = (query_term *) s_malloc((size_t) sizeof(query_term)); + + qt->TermType = TT_ResultSetID; + + qt->ResultSetID = duplicateAny(resultSet); + + qt->Term = NULL; + + return (qt); +} + +/*----------------------------------------------------------------------*/ + +query_term *makeOperatorTerm(char *operatorCode) +{ + query_term *qt = (query_term *) s_malloc((size_t) sizeof(query_term)); + + qt->TermType = TT_Operator; + + StrNCpy(qt->Operator, operatorCode, OPERATOR_SIZE); + + qt->Term = NULL; + qt->ResultSetID = NULL; + + return (qt); +} + +/*----------------------------------------------------------------------*/ + +void freeTerm(void *param) +{ + query_term *qt = (query_term *) param; + + switch (qt->TermType) { + case TT_Attribute: + freeAny(qt->Term); + break; + case TT_ResultSetID: + freeAny(qt->ResultSetID); + break; + case TT_Operator: + /* do nothing */ + break; + default: + panic("Implementation error: Unknown term type %ld", + qt->TermType); + break; + } + s_free(qt); +} + +/*----------------------------------------------------------------------*/ + +#define ATTRIBUTE_LIST_SIZE ATTRIBUTE_SIZE * 6 +#define AT_DELIMITER " " + +char *writeQueryTerm(query_term *qt, char *buffer, long *len) +{ + char *buf = buffer; + char attributes[ATTRIBUTE_LIST_SIZE]; + + switch (qt->TermType) { + case TT_Attribute: + StrNCpy(attributes, qt->Use, ATTRIBUTE_LIST_SIZE); + s_strncat(attributes, AT_DELIMITER, sizeof(AT_DELIMITER) + 1, ATTRIBUTE_LIST_SIZE); + s_strncat(attributes, qt->Relation, ATTRIBUTE_SIZE, ATTRIBUTE_LIST_SIZE); + s_strncat(attributes, AT_DELIMITER, sizeof(AT_DELIMITER) + 1, ATTRIBUTE_LIST_SIZE); + s_strncat(attributes, qt->Position, ATTRIBUTE_SIZE, ATTRIBUTE_LIST_SIZE); + s_strncat(attributes, AT_DELIMITER, sizeof(AT_DELIMITER) + 1, ATTRIBUTE_LIST_SIZE); + s_strncat(attributes, qt->Structure, ATTRIBUTE_SIZE, ATTRIBUTE_LIST_SIZE); + s_strncat(attributes, AT_DELIMITER, sizeof(AT_DELIMITER) + 1, ATTRIBUTE_LIST_SIZE); + s_strncat(attributes, qt->Truncation, ATTRIBUTE_SIZE, ATTRIBUTE_LIST_SIZE); + s_strncat(attributes, AT_DELIMITER, sizeof(AT_DELIMITER) + 1, ATTRIBUTE_LIST_SIZE); + s_strncat(attributes, qt->Completeness, ATTRIBUTE_SIZE, ATTRIBUTE_LIST_SIZE); + buf = writeString(attributes, DT_AttributeList, buf, len); + buf = writeAny(qt->Term, DT_Term, buf, len); + break; + case TT_ResultSetID: + buf = writeAny(qt->ResultSetID, DT_ResultSetID, buf, len); + break; + case TT_Operator: + buf = writeString(qt->Operator, DT_Operator, buf, len); + break; + default: + panic("Implementation error: Unknown term type %ld", + qt->TermType); + break; + } + + return (buf); +} + +/*----------------------------------------------------------------------*/ + +char *readQueryTerm(query_term **qt, char *buffer) +{ + char *buf = buffer; + char *attributeList = NULL; + char *operator = NULL; + any *term; + char *use = NULL; + char *relation = NULL; + char *position = NULL; + char *structure = NULL; + char *truncation = NULL; + char *completeness; + any *resultSetID = NULL; + data_tag tag; + + tag = peekTag(buffer); + + switch (tag) { + case DT_AttributeList: + buf = readString(&attributeList, buf); + buf = readAny(&term, buf); + use = strtok(attributeList, AT_DELIMITER); + relation = strtok(NULL, AT_DELIMITER); + position = strtok(NULL, AT_DELIMITER); + structure = strtok(NULL, AT_DELIMITER); + truncation = strtok(NULL, AT_DELIMITER); + completeness = strtok(NULL, AT_DELIMITER); + *qt = makeAttributeTerm(use, relation, position, structure, + truncation, completeness, term); + s_free(attributeList); + freeAny(term); + break; + case DT_ResultSetID: + buf = readAny(&resultSetID, buf); + *qt = makeResultSetTerm(resultSetID); + freeAny(resultSetID); + break; + case DT_Operator: + buf = readString(&operator, buf); + *qt = makeOperatorTerm(operator); + s_free(operator); + break; + default: + REPORT_READ_ERROR(buf); + break; + } + + return (buf); +} + +/*----------------------------------------------------------------------*/ + +static unsigned long getQueryTermSize(query_term *qt); + +static unsigned long getQueryTermSize(query_term *qt) +/* figure out how many bytes it will take to write this query */ +{ + unsigned long size = 0; + static char attributes[] = "11 22 33 44 55 66"; /* we just need this to + + calculate its written + size */ + + switch (qt->TermType) { + case TT_Attribute: + size = writtenStringSize(DT_AttributeList, attributes); + size += writtenAnySize(DT_Term, qt->Term); + break; + case TT_ResultSetID: + size = writtenAnySize(DT_ResultSetID, qt->ResultSetID); + break; + case TT_Operator: + size = writtenStringSize(DT_Operator, qt->Operator); + break; + default: + panic("Implementation error: Unknown term type %ld", + qt->TermType); + break; + } + + return (size); +} + +/*----------------------------------------------------------------------*/ + +/* A query is simply a null terminated list of query terms. For + transmission, a query is written into an any which is sent as + the user information field. */ + +any *writeQuery(query_term **terms) +{ + any *info = NULL; + char *writePos = NULL; + char *data = NULL; + unsigned long size = 0; + long remaining = 0; + long i; + query_term *qt = NULL; + + if (terms == NULL) + return (NULL); + + /* calculate the size of write buffer */ + for (i = 0, qt = terms[i]; qt != NULL; qt = terms[++i]) + size += getQueryTermSize(qt); + + data = (char *) s_malloc((size_t) size); + + /* write the terms */ + writePos = data; + remaining = size; + for (i = 0, qt = terms[i]; qt != NULL; qt = terms[++i]) + writePos = writeQueryTerm(qt, writePos, &remaining); + + info = makeAny(size, data); + + return (info); +} + +/*----------------------------------------------------------------------*/ + +query_term **readQuery(any *info) +{ + char *readPos = info->bytes; + query_term **terms = NULL; + query_term *qt = NULL; + long numTerms = 0L; + char tmp[100]; + + sprintf(tmp, "readquery: bytes: %ld", info->size); + log_write(tmp); + + while (readPos < info->bytes + info->size) { + readPos = readQueryTerm(&qt, readPos); + + if (terms == NULL) { + terms = (query_term **) s_malloc((size_t) (sizeof(query_term *) * 2)); + } else { + terms = + (query_term **) s_realloc((char *) terms, + (size_t) (sizeof(query_term *) * + (numTerms + 2))); + } + if (qt == NULL) + log_write("qt = null"); + terms[numTerms++] = qt; + terms[numTerms] = NULL; + } + + return (terms); +} + +/*----------------------------------------------------------------------*/ + +/* + * Routines originally from panic.c -- FM + * + *----------------------------------------------------------------------*/ +/* WIDE AREA INFORMATION SERVER SOFTWARE: + * No guarantees or restrictions. See the readme file for the full standard + * disclaimer. + * + * Morris@think.com + */ + +/* panic is an error system interface. On the Mac, it will pop + * up a little window to explain the problem. + * On a unix box, it will print out the error and call perror() + */ + +/*----------------------------------------------------------------------*/ + +static void exitAction(long error); + +static void exitAction(long error GCC_UNUSED) +{ + exit_immediately(EXIT_SUCCESS); +} + +/*----------------------------------------------------------------------*/ + +#define PANIC_HEADER "Fatal Error: " + +void panic(char *format,...) +{ + va_list ap; /* the variable arguments */ + + fprintf(stderr, PANIC_HEADER); + LYva_start(ap, format); /* init ap */ + vfprintf(stderr, format, ap); /* print the contents */ + va_end(ap); /* free ap */ + fflush(stderr); + + exitAction(0); +} + +/*----------------------------------------------------------------------*/ + +/* + * Routines originally from cutil.c -- FM + * + *----------------------------------------------------------------------*/ +/* Wide AREA INFORMATION SERVER SOFTWARE + * No guarantees or restrictions. See the readme file for the full standard + * disclaimer. + * + * 3.26.90 Harry Morris, morris@think.com + * 4.11.90 HWM - generalized conditional includes (see c-dialect.h) + */ + +/*----------------------------------------------------------------------*/ + +void fs_checkPtr(void *ptr) +/* If the ptr is NULL, give an error */ +{ + if (ptr == NULL) + panic("checkPtr found a NULL pointer"); +} + +/*----------------------------------------------------------------------*/ + +void *fs_malloc(size_t size) +/* does safety checks and optional accounting */ +{ + register void *ptr = NULL; + + ptr = (void *) calloc((size_t) size, (size_t) 1); + s_checkPtr(ptr); + + return (ptr); +} + +/*----------------------------------------------------------------------*/ + +void *fs_realloc(void *ptr, size_t size) +/* does safety checks and optional accounting + note - we don't know how big ptr's memory is, so we can't ensure + that any new memory allocated is NULLed! + */ +{ + register void *nptr = NULL; + + if (ptr == NULL) /* this is really a malloc */ + return (s_malloc(size)); + + nptr = (void *) realloc(ptr, size); + s_checkPtr(ptr); + + return (nptr); +} + +/*----------------------------------------------------------------------*/ + +void fs_free(void *ptr) +/* does safety checks and optional accounting */ +{ + if (ptr != NULL) /* some non-ansi compilers/os's cant handle freeing null */ + { /* if we knew the size of this block of memory, we could clear it - oh well */ + free(ptr); + ptr = NULL; + } +} + +/*----------------------------------------------------------------------*/ + +char *s_strdup(char *s) + +/* return a copy of s. This is identical to the standard library routine + strdup(), except that it is safe. If s == NULL or malloc fails, + appropriate action is taken. + */ +{ + unsigned long len; + char *copy = NULL; + + if (s == NULL) /* saftey check to postpone stupid errors */ + return (NULL); + + len = strlen(s); /* length of string - terminator */ + copy = (char *) s_malloc((size_t) (sizeof(char) * (len + 1))); + + StrNCpy(copy, s, len + 1); + return (copy); +} + +/*----------------------------------------------------------------------*/ + +char *fs_strncat(char *dst, char *src, size_t maxToAdd, size_t maxTotal) + +/* like strncat, except the fourth argument limits the maximum total + length of the resulting string + */ +{ + size_t dstSize = strlen(dst); + size_t srcSize = strlen(src); + + if (dstSize + srcSize < maxTotal) /* use regular old strncat */ + return (StrNCat(dst, src, maxToAdd)); + else { + size_t truncateTo = maxTotal - dstSize - 1; + char saveChar = src[truncateTo]; + char *result = NULL; + + src[truncateTo] = '\0'; + result = StrNCat(dst, src, maxToAdd); + src[truncateTo] = saveChar; + return (result); + } +} + +/*----------------------------------------------------------------------*/ + +char char_downcase(unsigned long long_ch) +{ + unsigned char ch = long_ch & 0xFF; /* just want one byte */ + + /* when ansi is the way of the world, this can be tolower */ + return (((ch >= 'A') && (ch <= 'Z')) ? (ch + 'a' - 'A') : ch); +} + +char *string_downcase(char *word) +{ + long i = 0; + + while (word[i] != '\0') { + word[i] = char_downcase((unsigned long) word[i]); + i++; + } + return (word); +} + +/*----------------------------------------------------------------------*/ +#endif /* VMS */ diff --git a/WWW/Library/Implementation/HTVMS_WaisUI.h b/WWW/Library/Implementation/HTVMS_WaisUI.h new file mode 100644 index 00000000..4f072a51 --- /dev/null +++ b/WWW/Library/Implementation/HTVMS_WaisUI.h @@ -0,0 +1,664 @@ +/* HTVMS_WAISUI.h + * + * Adaptation for Lynx by F.Macrides (macrides@sci.wfeb.edu) + * + * 31-May-1994 FM Initial version. + */ + +#ifndef HTVMSWAIS_H +#define HTVMSWAIS_H + +#ifndef HTUTILS_H +#include <HTUtils.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + void log_write(char *); + +/* + * Routines originally from Panic.h -- FM + * + *----------------------------------------------------------------------*/ + + void panic(char *format,...); + +/*----------------------------------------------------------------------*/ + +/* + * Routines originally from CUtil.h -- FM + * + *----------------------------------------------------------------------*/ + +/* types and constants */ + +#ifndef boolean +#define boolean unsigned long +#endif /* boolean */ + +#ifndef true +#define true (boolean)1L +#endif /* true */ + +#ifndef false +#define false (boolean)0L /* used to be (!true), but broke + some compilers */ +#endif /* false */ + +#ifndef TRUE +#define TRUE true +#endif /* TRUE */ + +#ifndef FALSE +#define FALSE false +#endif /* FALSE */ + +/*----------------------------------------------------------------------*/ +/* functions */ + +/* enhanced memory handling functions - don't call them directly, use the + macros below */ + void fs_checkPtr(void *ptr); + void *fs_malloc(size_t size); + void *fs_realloc(void *ptr, size_t size); + void fs_free(void *ptr); + char *fs_strncat(char *dst, char *src, size_t maxToAdd, size_t maxTotal); + +/* macros for memory functions. call these in your program. */ +#define s_checkPtr(ptr) fs_checkPtr(ptr) +#define s_malloc(size) fs_malloc(size) +#define s_realloc(ptr,size) fs_realloc((ptr),(size)) +#define s_free(ptr) { fs_free((char*)ptr); ptr = NULL; } +#define s_strncat(dst,src,maxToAdd,maxTotal) fs_strncat((dst),(src),(maxToAdd),(maxTotal)) + + char *s_strdup(char *s); + +#define IS_DELIMITER 1 +#define NOT_DELIMITER !IS_DELIMITER + + char char_downcase(unsigned long ch); + char *string_downcase(char *word); + +/*----------------------------------------------------------------------*/ + +/* + * Routines originally from ZUtil.c -- FM + * + *----------------------------------------------------------------------*/ + +/* Data types / constants */ + +/* bytes to leave for the header size info */ +#define HEADER_LEN (size_t)2 + + typedef long pdu_type; + +#define initAPDU (pdu_type)20 +#define initResponseAPDU (pdu_type)21 +#define searchAPDU (pdu_type)22 +#define searchResponseAPDU (pdu_type)23 +#define presentAPDU (pdu_type)24 +#define presentResponseAPDU (pdu_type)25 +#define deteteAPDU (pdu_type)26 +#define deleteResponseAPDU (pdu_type)27 +#define accessControlAPDU (pdu_type)28 +#define accessControlResponseAPDU (pdu_type)29 +#define resourceControlAPDU (pdu_type)30 +#define resourceControlResponseAPDU (pdu_type)31 + + typedef struct any { /* an any is a non-ascii string of characters */ + unsigned long size; + char *bytes; + } any; + + typedef any bit_map; /* a bit_map is a group of packed bits */ + + typedef unsigned long data_tag; + +#define DT_PDUType (data_tag)1 +#define DT_ReferenceID (data_tag)2 +#define DT_ProtocolVersion (data_tag)3 +#define DT_Options (data_tag)4 +#define DT_PreferredMessageSize (data_tag)5 +#define DT_MaximumRecordSize (data_tag)6 +#define DT_IDAuthentication (data_tag)7 +#define DT_ImplementationID (data_tag)8 +#define DT_ImplementationName (data_tag)9 +#define DT_ImplementationVersion (data_tag)10 +#define DT_UserInformationField (data_tag)11 +#define DT_Result (data_tag)12 +#define DT_SmallSetUpperBound (data_tag)13 +#define DT_LargeSetLowerBound (data_tag)14 +#define DT_MediumSetPresentNumber (data_tag)15 +#define DT_ReplaceIndicator (data_tag)16 +#define DT_ResultSetName (data_tag)17 +#define DT_DatabaseNames (data_tag)18 +#define DT_ElementSetNames (data_tag)19 +#define DT_QueryType (data_tag)20 +#define DT_Query (data_tag)21 +#define DT_SearchStatus (data_tag)22 +#define DT_ResultCount (data_tag)23 +#define DT_NumberOfRecordsReturned (data_tag)24 +#define DT_NextResultSetPosition (data_tag)25 +#define DT_ResultSetStatus (data_tag)26 +#define DT_PresentStatus (data_tag)27 +#define DT_DatabaseDiagnosticRecords (data_tag)28 +#define DT_NumberOfRecordsRequested (data_tag)29 +#define DT_ResultSetStartPosition (data_tag)30 +#define DT_ResultSetID (data_tag)31 +#define DT_DeleteOperation (data_tag)32 +#define DT_DeleteStatus (data_tag)33 +#define DT_NumberNotDeleted (data_tag)34 +#define DT_BulkStatuses (data_tag)35 +#define DT_DeleteMSG (data_tag)36 +#define DT_SecurityChallenge (data_tag)37 +#define DT_SecurityChallengeResponse (data_tag)38 +#define DT_SuspendedFlag (data_tag)39 +#define DT_ResourceReport (data_tag)40 +#define DT_PartialResultsAvailable (data_tag)41 +#define DT_ContinueFlag (data_tag)42 +#define DT_ResultSetWanted (data_tag)43 + +#define UNUSED -1 + +/* number of bytes required to represent the following sizes in compressed + integer format + */ +#define CompressedInt1Byte 128 /* 2 ^ 7 */ +#define CompressedInt2Byte 16384 /* 2 ^ 14 */ +#define CompressedInt3Byte 2097152 /* 2 ^ 21 */ +/* others may follow ... */ + +/* types of query */ +#define QT_0 "0" /* query whose non-standard format has been agreed upon + client and server */ +/* values for InitAPDU option element */ +#define WILL_USE TRUE +#define WILL_NOT_USE FALSE +#define WILL_SUPPORT TRUE +#define WILL_NOT_SUPPORT FALSE + +/* values for InitResponseAPDU result element */ +#define ACCEPT TRUE +#define REJECT FALSE + +/* values for SearchResponseAPDU search status element */ +#define SUCCESS 0 /* intuitive huh? */ +#define FAILURE 1 + +/* values for SearchResponseAPDU result set status element */ +#define SUBSET 1 +#define INTERIM 2 +#define NONE 3 + +/* values for SearchResponseAPDU present status element */ +/* SUCCESS already defined */ +#define PARTIAL_1 1 +#define PARTIAL_2 2 +#define PARTIAL_3 3 +#define PARTIAL_4 4 +#define PS_NONE 5 /* can't use NONE since it was used by result + set status */ + +#define DIAGNOSTIC_CODE_SIZE (size_t)3 + + typedef struct diagnosticRecord { + boolean SURROGATE; + char DIAG[DIAGNOSTIC_CODE_SIZE]; + char *ADDINFO; + } diagnosticRecord; + +#define D_PermanentSystemError "S1" +#define D_TemporarySystemError "S2" +#define D_UnsupportedSearch "S3" +#define D_TermsOnlyStopWords "S5" +#define D_TooManyArgumentWords "S6" +#define D_TooManyBooleanOperators "S7" +#define D_TooManyTruncatedWords "S8" +#define D_TooMany IncompleteSubfields "S9" +#define D_TruncatedWordsTooShort "SA" +#define D_InvalidFormatForRecordNumber "SB" +#define D_TooManyCharactersInSearch "SC" +#define D_TooManyRecordsRetrieved "SD" +#define D_PresentRequestOutOfRange "SF" +#define D_SystemErrorInPresentRecords "SG" +#define D_RecordNotAuthorizedToBeSent "SH" +#define D_RecordExceedsPrefMessageSize "SI" +#define D_RecordExceedsMaxRecordSize "SJ" +#define D_ResultSetNotSuppAsSearchTerm "SK" +#define D_OnlyOneRsltSetAsSrchTermSupp "SL" +#define D_OnlyANDingOfASnglRsltSetSupp "SM" +#define D_RsltSetExistsNoReplace "SN" +#define D_ResultSetNamingNotSupported "SO" +#define D_CombinationDatabasesNotSupp "SP" +#define D_ElementSetNamesNotSupported "SQ" +#define D_ElementSetNameNotValid "SR" +#define D_OnlyASingleElmntSetNameSupp "SS" +#define D_ResultSetDeletedByTarget "ST" +#define D_ResultSetIsInUse "SU" +#define D_DatabasesIsLocked "SV" +#define D_TerminatedByNoContinueResp "SW" +#define D_ResultSetDoesNotExist "SX" +#define D_ResExNoResultsAvailable "SY" +#define D_ResExUnpredictableResults "SZ" +#define D_ResExValidSubsetOfResults "T1" +#define D_AccessControlFailure "T2" +#define D_SecurityNotIssuedReqTerm "T3" +#define D_SecurityNotBeIssuedRecNotInc "T4" + +/*----------------------------------------------------------------------*/ + +/* for internal error handling */ + + extern char *readErrorPosition; /* pos where buf stoped making sense */ + +/* the following are macros so that they can return OUT of the function + which calls them + */ + +#define RETURN_ON_NULL(var) \ + if (var == NULL) \ + return(NULL); /* jump out of caller */ + +#define REPORT_READ_ERROR(pos) \ + { readErrorPosition = (pos); \ + return(NULL); /* jump out of caller */ \ + } + +#define CHECK_FOR_SPACE_LEFT(spaceNeeded,spaceLeft) \ + { if (*spaceLeft >= spaceNeeded) \ + (*spaceLeft) -= spaceNeeded; \ + else \ + { *spaceLeft = 0; \ + return(NULL); /* jump out of the caller */ \ + } \ + } + +/*----------------------------------------------------------------------*/ + + diagnosticRecord *makeDiag(boolean surrogate, char *code, char *addInfo); + void freeDiag(diagnosticRecord * diag); + char *writeDiag(diagnosticRecord * diag, char *buffer, long *len); + char *readDiag(diagnosticRecord ** diag, char *buffer); + + char *writeCompressedInteger(unsigned long num, char *buf, long *len); + char *readCompressedInteger(unsigned long *num, char *buf); + char *writeCompressedIntWithPadding(unsigned long num, unsigned long size, + char *buffer, long *len); + unsigned long writtenCompressedIntSize(unsigned long num); + + char *writeTag(data_tag tag, char *buf, long *len); + char *readTag(data_tag *tag, char *buf); + data_tag peekTag(char *buf); + unsigned long writtenTagSize(data_tag tag); + + any *makeAny(unsigned long size, char *data); + void freeAny(any *a); + any *duplicateAny(any *a); + char *writeAny(any *a, data_tag tag, char *buffer, long *len); + char *readAny(any **anAny, char *buffer); + unsigned long writtenAnySize(data_tag tag, any *a); + + any *stringToAny(char *s); + char *anyToString(any *a); + unsigned long writtenStringSize(data_tag tag, char *s); + + any *longToAny(long Num); + long anyToLong(any *a); + + char *writeString(char *s, data_tag tag, char *buffer, long *len); + char *readString(char **s, char *buffer); + + bit_map *makeBitMap(unsigned long numBits,...); + + void freeBitMap(bit_map *bm); + boolean bitAtPos(unsigned long pos, bit_map *bm); + char *writeBitMap(bit_map *bm, data_tag tag, char *buffer, long *len); + char *readBitMap(bit_map **bm, char *buffer); + + char *writeByte(unsigned long byte, char *buf, long *len); + char *readByte(unsigned char *byte, char *buf); + + char *writeBoolean(boolean flag, char *buf, long *len); + char *readBoolean(boolean *flag, char *buf); + + char *writePDUType(pdu_type pduType, char *buf, long *len); + char *readPDUType(pdu_type *pduType, char *buf); + pdu_type peekPDUType(char *buf); + + char *writeBinaryInteger(long num, unsigned long size, + char *buf, long *len); + char *readBinaryInteger(long *num, unsigned long size, char *buf); + unsigned long writtenCompressedBinIntSize(long num); + + char *writeNum(long num, data_tag tag, char *buffer, long *len); + char *readNum(long *num, char *buffer); + unsigned long writtenNumSize(data_tag tag, long num); + + void doList(void **list, void (*func) (void *)); + + char *writeProtocolVersion(char *buf, long *len); + char *defaultImplementationID(void); + char *defaultImplementationName(void); + char *defaultImplementationVersion(void); + +/*----------------------------------------------------------------------*/ + +/* + * Routines originally from ZType1.c -- FM + * + *----------------------------------------------------------------------*/ + +/* This file implements the type 1 query defined in appendices B & C + of the SR 1 spec. + */ + +/*----------------------------------------------------------------------*/ +/* types and constants */ + +/* new data tags */ +#define DT_AttributeList (data_tag)44 +#define DT_Term (data_tag)45 +#define DT_Operator (data_tag)46 + +#define QT_BooleanQuery "1" /* standard boolean query */ + +/* general attribute code - use in place of any attribute */ +#define IGNORE "ig" + +/* use value codes */ +#define UV_ISBN "ub" +#define CORPORATE_NAME "uc" +#define ISSN "us" +#define PERSONAL_NAME "up" +#define SUBJECT "uj" +#define TITLE "ut" +#define GEOGRAPHIC_NAME "ug" +#define CODEN "ud" +#define SUBJECT_SUBDIVISION "ue" +#define SERIES_TITLE "uf" +#define MICROFORM_GENERATION "uh" +#define PLACE_OF_PUBLICATION "ui" +#define NUC_CODE "uk" +#define LANGUAGE "ul" +#define COMBINATION_OF_USE_VALUES "um" +#define SYSTEM_CONTROL_NUMBER "un" +#define DATE "uo" +#define LC_CONTROL_NUMBER "ur" +#define MUSIC_PUBLISHERS_NUMBER "uu" +#define GOVERNMENT_DOCUMENTS_NUMBER "uv" +#define SUBJECT_CLASSIFICATION "uw" +#define RECORD_TYPE "uy" + +/* relation value codes */ +#define EQUAL "re" +#define GREATER_THAN "rg" +#define GREATER_THAN_OR_EQUAL "ro" +#define LESS_THAN "rl" +#define LESS_THAN_OR_EQUAL "rp" +#define NOT_EQUAL "rn" + +/* position value codes */ +#define FIRST_IN_FIELD "pf" +#define FIRST_IN_SUBFIELD "ps" +#define FIRST_IN_A_SUBFIELD "pa" +#define FIRST_IN_NOT_A_SUBFIELD "pt" +#define ANY_POSITION_IN_FIELD "py" + +/* structure value codes */ +#define PHRASE "sp" +#define WORD "sw" +#define KEY "sk" +#define WORD_LIST "sl" + +/* truncation value codes */ +#define NO_TRUNCATION "tn" +#define RIGHT_TRUNCATION "tr" +#define PROC_NUM_INCLUDED_IN_SEARCH_ARG "ti" + +/* completeness value codes */ +#define INCOMPLETE_SUBFIELD "ci" +#define COMPLETE_SUBFIELD "cs" +#define COMPLETEFIELD "cf" + +/* operator codes */ +#define AND "a" +#define OR "o" +#define AND_NOT "n" + +/* term types */ +#define TT_Attribute 1 +#define TT_ResultSetID 2 +#define TT_Operator 3 + +#define ATTRIBUTE_SIZE 3 +#define OPERATOR_SIZE 2 + + typedef struct query_term { + /* type */ + long TermType; + /* for term */ + char Use[ATTRIBUTE_SIZE]; + char Relation[ATTRIBUTE_SIZE]; + char Position[ATTRIBUTE_SIZE]; + char Structure[ATTRIBUTE_SIZE]; + char Truncation[ATTRIBUTE_SIZE]; + char Completeness[ATTRIBUTE_SIZE]; + any *Term; + /* for result set */ + any *ResultSetID; + /* for operator */ + char Operator[OPERATOR_SIZE]; + } query_term; + +/*----------------------------------------------------------------------*/ +/* functions */ + + query_term *makeAttributeTerm(char *use, char *relation, char *position, char *structure, + char *truncation, char *completeness, any *term); + query_term *makeResultSetTerm(any *resultSet); + query_term *makeOperatorTerm(char *operatorCode); + void freeTerm(void *qt); + char *writeQueryTerm(query_term *qt, char *buffer, long *len); + char *readQueryTerm(query_term **qt, char *buffer); + any *writeQuery(query_term **terms); + query_term **readQuery(any *info); + +/*----------------------------------------------------------------------*/ + +/* + * Routines originally from UI.c -- FM + * + *----------------------------------------------------------------------*/ + + char *generate_retrieval_apdu(char *buff, + long *buff_len, + any *docID, + long chunk_type, + long start_line, long end_line, + char *type, + char *database_name); + + long interpret_message(char *request_message, + long request_length, + char *response_message, + long response_buffer_length, /* length of the buffer (modified) */ + long connection, + boolean verbose); + + char *trim_junk(char *headline); + +/* + * Routines originally from ZProt.c -- FM + * + *----------------------------------------------------------------------*/ + +/* APDU types */ + + typedef struct InitAPDU { + pdu_type PDUType; + boolean willSearch, willPresent, willDelete; + boolean supportAccessControl, supportResourceControl; + long PreferredMessageSize; + long MaximumRecordSize; + char *IDAuthentication; + char *ImplementationID; + char *ImplementationName; + char *ImplementationVersion; + any *ReferenceID; + void *UserInformationField; + } InitAPDU; + + typedef struct InitResponseAPDU { + pdu_type PDUType; + boolean Result; + boolean willSearch, willPresent, willDelete; + boolean supportAccessControl, supportResourceControl; + long PreferredMessageSize; + long MaximumRecordSize; + char *IDAuthentication; + char *ImplementationID; + char *ImplementationName; + char *ImplementationVersion; + any *ReferenceID; + void *UserInformationField; + } InitResponseAPDU; + + typedef struct SearchAPDU { + pdu_type PDUType; + long SmallSetUpperBound; + long LargeSetLowerBound; + long MediumSetPresentNumber; + boolean ReplaceIndicator; + char *ResultSetName; + char **DatabaseNames; + char *QueryType; + char **ElementSetNames; + any *ReferenceID; + void *Query; + } SearchAPDU; + + typedef struct SearchResponseAPDU { + pdu_type PDUType; + long SearchStatus; + long ResultCount; + long NumberOfRecordsReturned; + long NextResultSetPosition; + long ResultSetStatus; + long PresentStatus; + any *ReferenceID; + void *DatabaseDiagnosticRecords; + } SearchResponseAPDU; + + typedef struct PresentAPDU { + pdu_type PDUType; + long NumberOfRecordsRequested; + long ResultSetStartPosition; + char *ResultSetID; + char *ElementSetNames; + any *ReferenceID; + void *PresentInfo; + } PresentAPDU; + + typedef struct PresentResponseAPDU { + pdu_type PDUType; + boolean PresentStatus; + long NumberOfRecordsReturned; + long NextResultSetPosition; + any *ReferenceID; + void *DatabaseDiagnosticRecords; + } PresentResponseAPDU; + +/*----------------------------------------------------------------------*/ +/* Functions */ + + InitAPDU *makeInitAPDU(boolean search, boolean present, boolean deleteIt, + boolean accessControl, boolean resourceControl, long prefMsgSize, + long maxMsgSize, + char *auth, + char *id, + char *name, + char *version, + any *refID, + void *userInfo); + void freeInitAPDU(InitAPDU * init); + char *writeInitAPDU(InitAPDU * init, char *buffer, long *len); + char *readInitAPDU(InitAPDU ** init, char *buffer); + + InitResponseAPDU *makeInitResponseAPDU(boolean result, + boolean search, + boolean present, + boolean deleteIt, + boolean accessControl, + boolean resourceControl, + long prefMsgSize, + long maxMsgSize, + char *auth, + char *id, + char *name, + char *version, + any *refID, + void *userInfo); + void freeInitResponseAPDU(InitResponseAPDU *init); + char *writeInitResponseAPDU(InitResponseAPDU *init, char *buffer, long *len); + char *readInitResponseAPDU(InitResponseAPDU **init, char *buffer); + InitResponseAPDU *replyToInitAPDU(InitAPDU * init, boolean result, void *userInfo); + + SearchAPDU *makeSearchAPDU(long small, long large, long medium, + boolean replace, char *name, char **databases, + char *type, char **elements, any *refID, void *queryInfo); + void freeSearchAPDU(SearchAPDU *query); + char *writeSearchAPDU(SearchAPDU *query, char *buffer, long *len); + char *readSearchAPDU(SearchAPDU **query, char *buffer); + + SearchResponseAPDU *makeSearchResponseAPDU(long result, long count, + long recordsReturned, long nextPos, + long resultStatus, long presentStatus, + any *refID, void *records); + void freeSearchResponseAPDU(SearchResponseAPDU *queryResponse); + char *writeSearchResponseAPDU(SearchResponseAPDU *queryResponse, char + *buffer, long *len); + char *readSearchResponseAPDU(SearchResponseAPDU **queryResponse, char *buffer); + + PresentAPDU *makePresentAPDU(long recsReq, long startPos, + char *resultID, any *refID, void *info); + void freePresentAPDU(PresentAPDU * present); + char *writePresentAPDU(PresentAPDU * present, char *buffer, long *len); + char *readPresentAPDU(PresentAPDU ** present, char *buffer); + + PresentResponseAPDU *makePresentResponseAPDU(boolean status, long recsRet, + long nextPos, any *refID, + void *records); + void freePresentResponseAPDU(PresentResponseAPDU * present); + char *writePresentResponseAPDU(PresentResponseAPDU * present, char + *buffer, long *len); + char *readPresentResponseAPDU(PresentResponseAPDU ** present, char *buffer); + +/*----------------------------------------------------------------------*/ +/* user extension hooks: */ + + extern char *writeInitInfo(InitAPDU * init, char *buffer, long *len); + extern char *readInitInfo(void **info, char *buffer); + + extern char *writeInitResponseInfo(InitResponseAPDU *init, char *buffer, long *len); + extern char *readInitResponseInfo(void **info, char *buffer); + + extern char *writeSearchInfo(SearchAPDU *query, char *buffer, long *len); + extern char *readSearchInfo(void **info, char *buffer); + + extern char *writeSearchResponseInfo(SearchResponseAPDU *query, char + *buffer, long *len); + extern char *readSearchResponseInfo(void **info, char *buffer); + + extern char *writePresentInfo(PresentAPDU * present, char *buffer, long *len); + extern char *readPresentInfo(void **info, char *buffer); + + extern char *writePresentResponseInfo(PresentResponseAPDU * present, char + *buffer, long *len); + extern char *readPresentResponseInfo(void **info, char *buffer); + +#ifdef __cplusplus +} +#endif +#endif /* HTVMSWAIS_H */ diff --git a/WWW/Library/Implementation/HTWAIS.c b/WWW/Library/Implementation/HTWAIS.c new file mode 100644 index 00000000..5c998ea8 --- /dev/null +++ b/WWW/Library/Implementation/HTWAIS.c @@ -0,0 +1,1082 @@ +/* + * $LynxId: HTWAIS.c,v 1.36 2011/06/11 12:13:32 tom Exp $ + * + * WorldWideWeb - Wide Area Informaion Server Access HTWAIS.c + * ================================================== + * + * This module allows a WWW server or client to read data from a + * remote WAIS + * server, and provide that data to a WWW client in hypertext form. + * Source files, once retrieved, are stored and used to provide + * information about the index when that is acessed. + * + * Authors + * BK Brewster Kahle, Thinking Machines, <Brewster@think.com> + * TBL Tim Berners-Lee, CERN <timbl@info.cern.ch> + * FM Foteos Macrides, WFEB <macrides@sci.wfeb.edu> + * + * History + * Sep 91 TBL adapted shell-ui.c (BK) with HTRetrieve.c from WWW. + * Feb 91 TBL Generated HTML cleaned up a bit (quotes, escaping) + * Refers to lists of sources. + * Mar 93 TBL Lib 2.0 compatible module made. + * May 94 FM Added DIRECT_WAIS support for VMS. + * + * Bugs + * Uses C stream i/o to read and write sockets, which won't work + * on VMS TCP systems. + * + * Should cache connections. + * + * ANSI C only as written + * + * Bugs fixed + * NT Nathan Torkington (Nathan.Torkington@vuw.ac.nz) + * + * WAIS comments: + * + * 1. Separate directories for different system's .o would help + * 2. Document ids are rather long! + * + * W WW Address mapping convention: + * + * /servername/database/type/length/document-id + * + * /servername/database?word+word+word + */ +/* WIDE AREA INFORMATION SERVER SOFTWARE: + No guarantees or restrictions. See the readme file for the full standard + disclaimer. + + Brewster@think.com +*/ + +#include <HTUtils.h> +#include <HTParse.h> +#include <HTAccess.h> /* We implement a protocol */ +#include <HTML.h> /* The object we will generate */ +#include <HTWSRC.h> +#include <HTTCP.h> +#include <HTCJK.h> +#include <HTAlert.h> +#include <LYStrings.h> + +#undef lines /* term.h conflict with wais.h */ +#undef alloca /* alloca.h conflict with wais.h */ + +/* From WAIS + * --------- + */ +#ifdef VMS +#include <HTVMS_WaisUI.h> +#include <HTVMS_WaisProt.h> +#elif defined(HAVE_WAIS_H) +#include <wais.h> +#else +#include <ui.h> +#endif /* VMS */ + +#define MAX_MESSAGE_LEN 100000 +#define CHARS_PER_PAGE 10000 /* number of chars retrieved in each request */ + +#define WAISSEARCH_DATE "Fri Jul 19 1991" + +/* FROM WWW + * -------- + */ +#include <LYUtils.h> +#include <LYLeaks.h> + +#define DIRECTORY "/cnidr.org:210/directory-of-servers" +/* #define DIRECTORY "/quake.think.com:210/directory-of-servers" */ + +#define BIG 1024 /* identifier size limit @@@@@ */ + +#define BUFFER_SIZE 4096 /* Arbitrary size for efficiency */ + +#define HEX_ESCAPE '%' + +static BOOL as_gate; /* Client is using us as gateway */ + +static char line[2048]; /* For building strings to display */ + + /* Must be able to take id */ + +#define PUTC(c) (*target->isa->put_character)(target, c) +#define PUTS(s) (*target->isa->put_string)(target, s) +#define START(e) (*target->isa->start_element)(target, e, 0, 0, -1, 0) +#define END(e) (*target->isa->end_element)(target, e, 0) +#define MAYBE_END(e) if (HTML_dtd.tags[e].contents != SGML_EMPTY) \ + (*target->isa->end_element)(target, e, 0) +#define FREE_TARGET (*target->isa->_free)(target) + +struct _HTStructured { + const HTStructuredClass *isa; + /* ... */ +}; + +/* ------------------------------------------------------------------------ */ +/* ---------------- Local copy of connect_to_server calls ----------------- */ +/* ------------------------------------------------------------------------ */ +/* Returns 1 on success, 0 on fail, -1 on interrupt. */ +static int fd_mosaic_connect_to_server(char *host_name, + long port, + long *fd) +{ + char *dummy = NULL; + int status; + int result; + + HTSprintf0(&dummy, "%s//%s:%ld/", STR_WAIS_URL, host_name, port); + + status = HTDoConnect(dummy, "WAIS", 210, (int *) fd); + if (status == HT_INTERRUPTED) { + result = -1; + } else if (status < 0) { + result = 0; + } else { + result = 1; + } + FREE(dummy); + return result; +} + +/* Returns 1 on success, 0 on fail, -1 on interrupt. */ +#ifdef VMS +static int mosaic_connect_to_server(char *host_name, + long port, + long *fdp) +#else +static int mosaic_connect_to_server(char *host_name, + long port, + FILE **fp) +#endif /* VMS */ +{ +#ifndef VMS + FILE *file; +#endif /* VMS */ + long fd; + int rv; + + rv = fd_mosaic_connect_to_server(host_name, port, &fd); + if (rv == 0) { + HTAlert(gettext("Could not connect to WAIS server.")); + return 0; + } else if (rv == -1) { + HTAlert(CONNECTION_INTERRUPTED); + return -1; + } +#ifndef VMS + if ((file = fdopen(fd, "r+")) == NULL) { + HTAlert(gettext("Could not open WAIS connection for reading.")); + return 0; + } + + *fp = file; +#else + *fdp = fd; +#endif /* VMS */ + return 1; +} +/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------------------ */ + +/* showDiags +*/ +/* modified from Jonny G's version in ui/question.c */ +static void showDiags(HTStream *target, diagnosticRecord ** d) +{ + long i; + + for (i = 0; d[i] != NULL; i++) { + if (d[i]->ADDINFO != NULL) { + PUTS(gettext("Diagnostic code is ")); + PUTS(d[i]->DIAG); + PUTC(' '); + PUTS(d[i]->ADDINFO); + PUTC('\n'); + } + } +} + +/* Matrix of allowed characters in filenames + * ----------------------------------------- + */ + +static BOOL acceptable[256]; +static BOOL acceptable_inited = NO; + +static void init_acceptable(void) +{ + unsigned int i; + char *good = + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./-_$"; + + for (i = 0; i < 256; i++) + acceptable[i] = NO; + for (; *good; good++) + acceptable[(unsigned int) *good] = YES; + acceptable_inited = YES; +} + +/* Transform file identifier into WWW address + * ------------------------------------------ + * + * + * On exit, + * returns nil if error + * pointer to malloced string (must be freed) if ok + */ +static char *WWW_from_archie(char *file) +{ + char *end; + char *result; + char *colon; + + for (end = file; *end > ' '; end++) ; /* assumes ASCII encoding */ + result = (char *) malloc(10 + (end - file)); + if (!result) + return result; /* Malloc error */ + strcpy(result, "file://"); + StrNCat(result, file, end - file); + colon = strchr(result + 7, ':'); /* Expect colon after host */ + if (colon) { + for (; colon[0]; colon[0] = colon[1], colon++) ; /* move down */ + } + return result; +} /* WWW_from_archie */ + +/* Transform document identifier into URL + * -------------------------------------- + * + * Bugs: A static buffer of finite size is used! + * The format of the docid MUST be good! + * + * On exit, + * returns nil if error + * pointer to malloced string (must be freed) if ok + */ +static char hex[17] = "0123456789ABCDEF"; + +static char *WWW_from_WAIS(any *docid) +{ + static char buf[BIG]; + char *q = buf; + char *p = (docid->bytes); + char *result = NULL; + int i, l; + + if (TRACE) { + char *p2; + + fprintf(tfp, "WAIS id (%d bytes) is ", (int) docid->size); + for (p2 = docid->bytes; p2 < docid->bytes + docid->size; p2++) { + if ((*p2 >= ' ') && (*p2 <= '~')) /* Assume ASCII! */ + fprintf(tfp, "%c", *p2); + else + fprintf(tfp, "<%x>", (unsigned) *p2); + } + fprintf(tfp, "\n"); + } + for (p = docid->bytes; + (p < docid->bytes + docid->size) && (q < &buf[BIG]);) { + CTRACE((tfp, " Record type %d, length %d\n", p[0], p[1])); + if (*p > 10) { + CTRACE((tfp, "Eh? DOCID record type of %d!\n", *p)); + return 0; + } { /* Bug fix -- allow any byte value 15 Apr 93 */ + unsigned int i2 = (unsigned) *p++; + + if (i2 > 99) { + *q++ = (i2 / 100) + '0'; + i2 = i2 % 100; + } + if (i2 > 9) { + *q++ = (i2 / 10) + '0'; + i2 = i2 % 10; + } + *q++ = i2 + '0'; /* Record type */ + } + *q++ = '='; /* Separate */ + l = *p++; /* Length */ + for (i = 0; i < l; i++, p++) { + if (!acceptable[UCH(*p)]) { + *q++ = HEX_ESCAPE; /* Means hex coming */ + *q++ = hex[(*p) >> 4]; + *q++ = hex[(*p) & 15]; + } else + *q++ = *p; + } + *q++ = ';'; /* Terminate field */ + } + *q++ = 0; /* Terminate string */ + CTRACE((tfp, "WWW form of id: %s\n", buf)); + StrAllocCopy(result, buf); + return result; +} /* WWW_from_WAIS */ + +/* Transform URL into WAIS document identifier + * ------------------------------------------- + * + * On entry, + * docname points to valid name produced originally by + * WWW_from_WAIS + * On exit, + * docid->size is valid + * docid->bytes is malloced and must later be freed. + */ +static any *WAIS_from_WWW(any *docid, char *docname) +{ + char *z; /* Output pointer */ + char *sor; /* Start of record - points to size field. */ + char *p; /* Input pointer */ + char *q; /* Poisition of "=" */ + char *s; /* Position of semicolon */ + int n; /* size */ + + CTRACE((tfp, "WWW id (to become WAIS id): %s\n", docname)); + for (n = 0, p = docname; *p; p++) { /* Count sizes of strings */ + n++; + if (*p == ';') + n--; /* Not converted */ + else if (*p == HEX_ESCAPE) + n = n - 2; /* Save two bytes */ + docid->size = n; + } + + if (!(docid->bytes = (char *) malloc(docid->size))) /* result record */ + outofmem(__FILE__, "WAIS_from_WWW"); + z = docid->bytes; + + for (p = docname; *p;) { /* Convert of strings */ + /* Record type */ + + *z = 0; /* Initialize record type */ + while (*p >= '0' && *p <= '9') { + *z = *z * 10 + (*p++ - '0'); /* Decode decimal record type */ + } + z++; + if (*p != '=') + return 0; + q = p; + +/* *z++ = *p++ - '0'; + q = strchr(p , '='); + if (!q) return 0; +*/ + s = strchr(q, ';'); /* (Check only) */ + if (!s) + return 0; /* Bad! No ';'; */ + sor = z; /* Remember where the size field was */ + z++; /* Skip record size for now */ + for (p = q + 1; *p != ';';) { + if (*p == HEX_ESCAPE) { + char c; + unsigned int b; + + p++; + c = *p++; + b = from_hex(c); + c = *p++; + if (!c) + break; /* Odd number of chars! */ + *z++ = (b << 4) + from_hex(c); + } else { + *z++ = *p++; /* Record */ + } + } + *sor = (z - sor - 1); /* Fill in size -- not counting size itself */ + p++; /* After semicolon: start of next record */ + } + + if (TRACE) { + char *p2; + + fprintf(tfp, "WAIS id (%d bytes) is ", (int) docid->size); + for (p2 = docid->bytes; p2 < docid->bytes + docid->size; p2++) { + if ((*p2 >= ' ') && (*p2 <= '~')) /* Assume ASCII! */ + fprintf(tfp, "%c", *p2); + else + fprintf(tfp, "<%x>", (unsigned) *p2); + } + fprintf(tfp, "\n"); + } + return docid; /* Ok */ + +} /* WAIS_from_WWW */ + +/* Send a plain text record to the client output_text_record() + * -------------------------------------- + */ +static void output_text_record(HTStream *target, + WAISDocumentText *record, + boolean binary) +{ + unsigned long count; + + /* printf(" Text\n"); + print_any(" DocumentID: ", record->DocumentID); + printf(" VersionNumber: %d\n", record->VersionNumber); + */ + + if (binary) { + (*target->isa->put_block) (target, + record->DocumentText->bytes, + record->DocumentText->size); + return; + } + + for (count = 0; count < record->DocumentText->size; count++) { + long ch = (unsigned char) record->DocumentText->bytes[count]; + + if (ch == 27) { /* What is this in for? Tim */ + /* then we have an escape code */ + /* if the next letter is '(' or ')', then ignore two letters */ + if ('(' == record->DocumentText->bytes[count + 1] || + ')' == record->DocumentText->bytes[count + 1]) + count += 1; /* it is a term marker */ + else + count += 4; /* it is a paragraph marker */ + } else if (ch == '\n' || ch == '\r') { + PUTC('\n'); + } else if (IS_CJK_TTY || ch == '\t' || isprint(ch)) { + PUTC(ch); + } + } +} /* output text record */ + +/* Format A Search response for the client display_search_response + * --------------------------------------- + */ +/* modified from tracy shen's version in wutil.c + * displays either a text record or a set of headlines. + */ +static void display_search_response(HTStructured * target, SearchResponseAPDU *response, + char *the_database, + char *keywords) +{ + WAISSearchResponse *info; + long i, k; + + BOOL archie = strstr(the_database, "archie") != 0; /* Special handling */ + + CTRACE((tfp, "HTWAIS: Displaying search response\n")); + PUTS(gettext("Index ")); + START(HTML_EM); + PUTS(the_database); + END(HTML_EM); + sprintf(line, gettext(" contains the following %d item%s relevant to \""), + (int) (response->NumberOfRecordsReturned), + response->NumberOfRecordsReturned == 1 ? "" : "s"); + PUTS(line); + START(HTML_EM); + PUTS(keywords); + END(HTML_EM); + PUTS("\".\n"); + PUTS(gettext("The first figure after each entry is its relative score, ")); + PUTS(gettext("the second is the number of lines in the item.")); + START(HTML_BR); + START(HTML_BR); + PUTC('\n'); + START(HTML_OL); + + if (response->DatabaseDiagnosticRecords != 0) { + info = (WAISSearchResponse *) response->DatabaseDiagnosticRecords; + i = 0; + + if (info->Diagnostics != NULL) + showDiags((HTStream *) target, info->Diagnostics); + + if (info->DocHeaders != 0) { + for (k = 0; info->DocHeaders[k] != 0; k++) { + WAISDocumentHeader *head = info->DocHeaders[k]; + char *headline = trim_junk(head->Headline); + any *docid = head->DocumentID; + char *docname; /* printable version of docid */ + + i++; + /* + * Make a printable string out of the document id. + */ + CTRACE((tfp, "HTWAIS: %2ld: Score: %4ld, lines:%4ld '%s'\n", + i, + (long int) (info->DocHeaders[k]->Score), + (long int) (info->DocHeaders[k]->Lines), + headline)); + + START(HTML_LI); + + if (archie) { + char *www_name = WWW_from_archie(headline); + + if (www_name) { + HTStartAnchor(target, NULL, www_name); + PUTS(headline); + END(HTML_A); + FREE(www_name); + } else { + PUTS(headline); + PUTS(gettext(" (bad file name)")); + } + } else { /* Not archie */ + docname = WWW_from_WAIS(docid); + if (docname) { + if ((head->Types) && + (!strcmp(head->Types[0], "URL"))) { + HTStartAnchor(target, NULL, headline); + } else { + char *dbname = HTEscape(the_database, URL_XPALPHAS); + char *w3_address = NULL; + + HTSprintf0(&w3_address, + "/%s/%s/%d/%s", + dbname, + head->Types ? head->Types[0] : "TEXT", + (int) (head->DocumentLength), + docname); + HTStartAnchor(target, NULL, w3_address); + FREE(w3_address); + FREE(dbname); + } + PUTS(headline); + END(HTML_A); + FREE(docname); + } else { + PUTS(gettext("(bad doc id)")); + } + } + + sprintf(line, "%5ld %5ld ", + head->Score, + head->Lines); + PUTS(line); + MAYBE_END(HTML_LI); + } /* next document header */ + } + /* if there were any document headers */ + if (info->ShortHeaders != 0) { + k = 0; + while (info->ShortHeaders[k] != 0) { + i++; + PUTS(gettext("(Short Header record, can't display)")); + } + } + if (info->LongHeaders != 0) { + k = 0; + while (info->LongHeaders[k] != 0) { + i++; + PUTS(gettext("\nLong Header record, can't display\n")); + } + } + if (info->Text != 0) { + k = 0; + while (info->Text[k] != 0) { + i++; + PUTS(gettext("\nText record\n")); + output_text_record((HTStream *) target, + info->Text[k++], false); + } + } + if (info->Headlines != 0) { + k = 0; + while (info->Headlines[k] != 0) { + i++; + PUTS(gettext("\nHeadline record, can't display\n")); + /* dsply_headline_record( info->Headlines[k++]); */ + } + } + if (info->Codes != 0) { + k = 0; + while (info->Codes[k] != 0) { + i++; + PUTS(gettext("\nCode record, can't display\n")); + /* dsply_code_record( info->Codes[k++]); */ + } + } + } /* Loop: display user info */ + END(HTML_OL); + PUTC('\n'); +} + +/* Load by name HTLoadWAIS + * ============ + * + * This renders any object or search as required. + */ +int HTLoadWAIS(const char *arg, + HTParentAnchor *anAnchor, + HTFormat format_out, + HTStream *sink) +#define MAX_KEYWORDS_LENGTH 1000 +#define MAX_SERVER_LENGTH 1000 +#define MAX_DATABASE_LENGTH 1000 +#define MAX_SERVICE_LENGTH 1000 +#define MAXDOCS 200 + +{ + char *key; /* pointer to keywords in URL */ + char *request_message = NULL; /* arbitrary message limit */ + char *response_message = NULL; /* arbitrary message limit */ + long request_buffer_length; /* how of the request is left */ + SearchResponseAPDU *retrieval_response = 0; + char keywords[MAX_KEYWORDS_LENGTH + 1]; + char *the_server_name; + char *wais_database = NULL; /* name of current database */ + char *www_database; /* Same name escaped */ + char *service; + char *doctype; + char *doclength; + long document_length = 0; + char *docname = 0; + +#ifdef VMS + long connection = 0; + +#else + FILE *connection = NULL; +#endif /* VMS */ + char *names; /* Copy of arg to be hacked up */ + BOOL ok = NO; + int return_status = HT_LOADED; + int rv; + + if (!acceptable_inited) + init_acceptable(); + + /* Decipher and check syntax of WWW address: + * ---------------------------------------- + * + * First we remove the "wais:" if it was specified. 920110 + */ + names = HTParse(arg, "", PARSE_HOST | PARSE_PATH | PARSE_PUNCTUATION); + key = strchr(names, '?'); + + if (key) { + char *p; + + *key++ = 0; /* Split off keywords */ + for (p = key; *p; p++) + if (*p == '+') + *p = ' '; + HTUnEscape(key); + } + if (names[0] == '/') { + the_server_name = names + 1; + if ((as_gate = (*the_server_name == '/')) != 0) + the_server_name++; /* Accept one or two */ + www_database = strchr(the_server_name, '/'); + if (www_database) { + *www_database++ = 0; /* Separate database name */ + doctype = strchr(www_database, '/'); + if (key) + ok = YES; /* Don't need doc details */ + else if (doctype) { /* If not search parse doc details */ + *doctype++ = 0; /* Separate rest of doc address */ + doclength = strchr(doctype, '/'); + if (doclength) { + *doclength++ = 0; + document_length = atol(doclength); + if (document_length) { + docname = strchr(doclength, '/'); + if (docname) { + *docname++ = 0; + ok = YES; /* To avoid a goto! */ + } /* if docname */ + } /* if document_length valid */ + } /* if doclength */ + } else { /* no doctype? Assume index required */ + if (!key) + key = ""; + ok = YES; + } /* if doctype */ + } /* if database */ + } + + if (!ok) + return HTLoadError(sink, 500, gettext("Syntax error in WAIS URL")); + + CTRACE((tfp, "HTWAIS: Parsed OK\n")); + + service = strchr(names, ':'); + if (service) + *service++ = 0; + else + service = "210"; + + if (the_server_name[0] == 0) { +#ifdef VMS + connection = 0; +#else + connection = NULL; +#endif /* VMS */ + + } else if (!(key && !*key)) { + int status; + + CTRACE((tfp, "===WAIS=== calling mosaic_connect_to_server\n")); + status = mosaic_connect_to_server(the_server_name, + atoi(service), + &connection); + if (status == 0) { + CTRACE((tfp, "===WAIS=== connection failed\n")); + FREE(names); + return HT_NOT_LOADED; + } else if (status == -1) { + CTRACE((tfp, "===WAIS=== connection interrupted\n")); + FREE(names); + return HT_NOT_LOADED; + } + } + + StrAllocCopy(wais_database, www_database); + HTUnEscape(wais_database); + + /* + * This below fixed size stuff is terrible. + */ +#ifdef VMS + if ((request_message = typecallocn(char, MAX_MESSAGE_LEN)) == 0) + outofmem(__FILE__, "HTLoadWAIS"); + if ((response_message = typecallocn(char, MAX_MESSAGE_LEN)) == 0) + outofmem(__FILE__, "HTLoadWAIS"); + +#else + request_message = (char *) s_malloc((size_t) MAX_MESSAGE_LEN * sizeof(char)); + response_message = (char *) s_malloc((size_t) MAX_MESSAGE_LEN * sizeof(char)); +#endif /* VMS */ + + /* + * If keyword search is performed but there are no keywords, the user has + * followed a link to the index itself. It would be appropriate at this + * point to send him the .SRC file - how? + */ + if (key && !*key) { /* I N D E X */ +#ifdef CACHE_FILE_PREFIX + char *filename = NULL; + FILE *fp; +#endif + HTStructured *target = HTML_new(anAnchor, format_out, sink); + + START(HTML_HEAD); + PUTC('\n'); + HTStartIsIndex(target, HTWAIS_SOLICIT_QUERY, NULL); + PUTC('\n'); + + { + START(HTML_TITLE); + PUTS(wais_database); + PUTS(gettext(" (WAIS Index)")); + END(HTML_TITLE); + PUTC('\n'); + END(HTML_HEAD); + PUTC('\n'); + + START(HTML_H1); + PUTS(gettext("WAIS Index: ")); + START(HTML_EM); + PUTS(wais_database); + END(HTML_EM); + END(HTML_H1); + PUTC('\n'); + PUTS(gettext("This is a link for searching the ")); + START(HTML_EM); + PUTS(wais_database); + END(HTML_EM); + PUTS(gettext(" WAIS Index.\n")); + + } + /* + * If we have seen a source file for this database, use that. + */ +#ifdef CACHE_FILE_PREFIX + HTSprintf0(&filename, "%sWSRC-%s:%s:%.100s.txt", + CACHE_FILE_PREFIX, + the_server_name, service, www_database); + + fp = fopen(filename, "r"); /* Have we found this already? */ + CTRACE((tfp, "HTWAIS: Description of server %s %s.\n", + filename, + fp ? "exists already" : "does NOT exist!")); + + if (fp) { + char c; + + START(HTML_PRE); /* Preformatted description */ + PUTC('\n'); + while ((c = getc(fp)) != EOF) + PUTC(c); /* Transfer file */ + END(HTML_PRE); + fclose(fp); + } + FREE(filename); +#endif + START(HTML_P); + PUTS(gettext("\nEnter the 's'earch command and then specify search words.\n")); + + FREE_TARGET; + } else if (key) { /* S E A R C H */ + char *p; + HTStructured *target; + + StrNCpy(keywords, key, MAX_KEYWORDS_LENGTH); + while ((p = strchr(keywords, '+')) != 0) + *p = ' '; + + /* + * Send advance title to get something fast to the other end. + */ + target = HTML_new(anAnchor, format_out, sink); + + START(HTML_HEAD); + PUTC('\n'); + HTStartIsIndex(target, HTWAIS_SOLICIT_QUERY, NULL); + PUTC('\n'); + START(HTML_TITLE); + PUTS(keywords); + PUTS(gettext(" (in ")); + PUTS(wais_database); + PUTC(')'); + END(HTML_TITLE); + PUTC('\n'); + END(HTML_HEAD); + PUTC('\n'); + + START(HTML_H1); + PUTS(gettext("WAIS Search of \"")); + START(HTML_EM); + PUTS(keywords); + END(HTML_EM); + PUTS(gettext("\" in: ")); + START(HTML_EM); + PUTS(wais_database); + END(HTML_EM); + END(HTML_H1); + PUTC('\n'); + + request_buffer_length = MAX_MESSAGE_LEN; /* Amount left */ + CTRACE((tfp, "HTWAIS: Search for `%s' in `%s'\n", + keywords, wais_database)); + if (NULL == + generate_search_apdu(request_message + HEADER_LENGTH, + &request_buffer_length, + keywords, wais_database, NULL, MAXDOCS)) { +#ifdef VMS + HTAlert(gettext("HTWAIS: Request too large.")); + return_status = HT_NOT_LOADED; + FREE_TARGET; + goto CleanUp; +#else + panic("request too large"); +#endif /* VMS */ + } + + HTProgress(gettext("Searching WAIS database...")); + rv = interpret_message(request_message, + MAX_MESSAGE_LEN - request_buffer_length, + response_message, + MAX_MESSAGE_LEN, + connection, + false /* true verbose */ + ); + + if (rv == HT_INTERRUPTED) { + HTAlert(gettext("Search interrupted.")); + return_status = HT_INTERRUPTED; + FREE_TARGET; + goto CleanUp; + } else if (!rv) { +#ifdef VMS + HTAlert(HTWAIS_MESSAGE_TOO_BIG); + return_status = HT_NOT_LOADED; + FREE_TARGET; + goto CleanUp; +#else + panic("returned message too large"); +#endif /* VMS */ + } else { /* returned message ok */ + SearchResponseAPDU *query_response = 0; + + readSearchResponseAPDU(&query_response, + response_message + HEADER_LENGTH); + display_search_response(target, + query_response, wais_database, keywords); + if (query_response->DatabaseDiagnosticRecords) + freeWAISSearchResponse(query_response->DatabaseDiagnosticRecords); + freeSearchResponseAPDU(query_response); + } /* returned message not too large */ + FREE_TARGET; + } else { /* D O C U M E N T F E T C H */ + HTFormat format_in; + boolean binary; /* how to transfer stuff coming over */ + HTStream *target; + long count; + any doc_chunk; + any *docid = &doc_chunk; + + CTRACE((tfp, + "HTWAIS: Retrieve document id `%s' type `%s' length %ld\n", + NonNull(docname), doctype, document_length)); + + format_in = + !strcmp(doctype, "WSRC") ? HTAtom_for("application/x-wais-source") : + !strcmp(doctype, "TEXT") ? HTAtom_for("text/plain") : + !strcmp(doctype, "HTML") ? HTAtom_for("text/html") : + !strcmp(doctype, "GIF") ? HTAtom_for("image/gif") : + HTAtom_for("application/octet-stream"); + binary = + 0 != strcmp(doctype, "WSRC") && + 0 != strcmp(doctype, "TEXT") && + 0 != strcmp(doctype, "HTML"); + + target = HTStreamStack(format_in, format_out, sink, anAnchor); + if (!target) + return HTLoadError(sink, 500, + gettext("Can't convert format of WAIS document")); + /* + * Decode hex or litteral format for document ID. + */ + WAIS_from_WWW(docid, docname); + + /* + * Loop over slices of the document. + */ + for (count = 0; + count * CHARS_PER_PAGE < document_length; + count++) { +#ifdef VMS + char *type = NULL; + + StrAllocCopy(type, doctype); +#else + char *type = s_strdup(doctype); /* Gets freed I guess */ +#endif /* VMS */ + request_buffer_length = MAX_MESSAGE_LEN; /* Amount left */ + CTRACE((tfp, "HTWAIS: Slice number %ld\n", count)); + + if (HTCheckForInterrupt()) { + HTAlert(TRANSFER_INTERRUPTED); + (*target->isa->_abort) (target, NULL); +#ifdef VMS + FREE(type); +#endif /* VMS */ + return_status = HT_NOT_LOADED; + goto CleanUp; + } + + if (0 == + generate_retrieval_apdu(request_message + HEADER_LENGTH, + &request_buffer_length, + docid, + CT_byte, + count * CHARS_PER_PAGE, + (((count + 1) * CHARS_PER_PAGE <= document_length) + ? (count + 1) * CHARS_PER_PAGE + : document_length), + type, + wais_database)) { +#ifdef VMS + HTAlert(gettext("HTWAIS: Request too long.")); + return_status = HT_NOT_LOADED; + FREE_TARGET; + FREE(type); + FREE(docid->bytes); + goto CleanUp; +#else + panic("request too long"); +#endif /* VMS */ + } + + /* + * Actually do the transaction given by request_message. + */ + HTProgress(gettext("Fetching WAIS document...")); + rv = interpret_message(request_message, + MAX_MESSAGE_LEN - request_buffer_length, + response_message, + MAX_MESSAGE_LEN, + connection, + false /* true verbose */ + ); + if (rv == HT_INTERRUPTED) { + HTAlert(TRANSFER_INTERRUPTED); + return_status = HT_INTERRUPTED; + FREE_TARGET; + FREE(type); + FREE(docid->bytes); + goto CleanUp; + } else if (!rv) { +#ifdef VMS + HTAlert(HTWAIS_MESSAGE_TOO_BIG); + return_status = HT_NOT_LOADED; + FREE_TARGET; + FREE(type); + FREE(docid->bytes); + goto CleanUp; +#else + panic("Returned message too large"); +#endif /* VMS */ + } + + /* + * Parse the result which came back into memory. + */ + readSearchResponseAPDU(&retrieval_response, + response_message + HEADER_LENGTH); + + if (NULL == + ((WAISSearchResponse *) + retrieval_response->DatabaseDiagnosticRecords)->Text) { + /* display_search_response(target, retrieval_response, + wais_database, keywords); */ + PUTS(gettext("No text was returned!\n")); + /* panic("No text was returned"); */ + } else { + output_text_record(target, + ((WAISSearchResponse *) + retrieval_response->DatabaseDiagnosticRecords)->Text[0], + binary); + } /* If text existed */ + +#ifdef VMS + FREE(type); +#endif /* VMS */ + } /* Loop over slices */ + + FREE_TARGET; + FREE(docid->bytes); + + freeWAISSearchResponse(retrieval_response->DatabaseDiagnosticRecords); + freeSearchResponseAPDU(retrieval_response); + + } /* If document rather than search */ + + CleanUp: + /* + * (This postponed until later, after a timeout:) + */ +#ifdef VMS + if (connection) + NETCLOSE((int) connection); +#else + if (connection) + fclose(connection); +#endif /* VMS */ + FREE(wais_database); +#ifdef VMS + FREE(request_message); + FREE(response_message); +#else + s_free(request_message); + s_free(response_message); +#endif /* VMS */ + FREE(names); + return (return_status); +} + +#ifdef GLOBALDEF_IS_MACRO +#define _HTWAIS_C_1_INIT { "wais", HTLoadWAIS, NULL } +GLOBALDEF(HTProtocol, HTWAIS, _HTWAIS_C_1_INIT); +#else +GLOBALDEF HTProtocol HTWAIS = +{"wais", HTLoadWAIS, NULL}; +#endif /* GLOBALDEF_IS_MACRO */ diff --git a/WWW/Library/Implementation/HTWAIS.h b/WWW/Library/Implementation/HTWAIS.h new file mode 100644 index 00000000..4ba6f678 --- /dev/null +++ b/WWW/Library/Implementation/HTWAIS.h @@ -0,0 +1,43 @@ +/* WAIS protocol module for the W3 library + WAIS PROTOCOL INTERFACE + + This module does not actually perform the WAIS protocol directly, but it does using one + or more libraries of the freeWAIS distribution. The ui.a library came with the old free + WAIS from TMC, the client.a and wais.a libraries are needed from the freeWAIS from + CNIDR. + + If you include this module in the library, you must also + + Register the HTWAIS protocol at initialisation (e.g., HTInit or HTSInit) by compiling + it with -DDIRECT_WAIS + + Link with the WAIS libraries + + The wais source files are parsed by a separate and independent module, HTWSRC . You + can include HTWSRC without including direct wais using this module, and your WWW code + will be able to read source files, and access WAIS indexes through a gateway. + + A WAIS-WWW gateway is just a normal W3 server with a libwww compiled with this module. + + Anyways, this interface won't change much: + + */ +#ifndef HTWAIS_H +#define HTWAIS_H + +#include <HTAccess.h> + +#ifdef __cplusplus +extern "C" { +#endif +#ifdef GLOBALREF_IS_MACRO + extern GLOBALREF (HTProtocol, HTWAIS); + +#else + GLOBALREF HTProtocol HTWAIS; +#endif /* GLOBALDEF_IS_MACRO */ + +#ifdef __cplusplus +} +#endif +#endif /* HTWAIS_H */ diff --git a/WWW/Library/Implementation/HTWSRC.c b/WWW/Library/Implementation/HTWSRC.c new file mode 100644 index 00000000..cd161009 --- /dev/null +++ b/WWW/Library/Implementation/HTWSRC.c @@ -0,0 +1,487 @@ +/* + * $LynxId: HTWSRC.c,v 1.28 2011/06/11 12:11:53 tom Exp $ + * + * Parse WAIS Source file HTWSRC.c + * ====================== + * + * This module parses a stream with WAIS source file + * format information on it and creates a structured stream. + * That structured stream is then converted into whatever. + * + * 3 June 93 Bug fix: Won't crash if no description + */ + +#define HTSTREAM_INTERNAL 1 + +#include <HTUtils.h> + +#include <HTWSRC.h> +#include <LYUtils.h> + +#include <HTML.h> +#include <HTParse.h> + +#include <LYLeaks.h> + +#define BIG 10000 /* Arbitrary limit to value length */ +#define PARAM_MAX BIG +#define CACHE_PERIOD (7*86400) /* Time to keep .src file in seconds */ + +struct _HTStructured { + const HTStructuredClass *isa; + /* ... */ +}; + +#define PUTC(c) (*me->target->isa->put_character)(me->target, c) +#define PUTS(s) (*me->target->isa->put_string)(me->target, s) +#define START(e) (*me->target->isa->start_element)(me->target, e, 0, 0, -1, 0) +#define END(e) (*me->target->isa->end_element)(me->target, e, 0) +#define MAYBE_END(e) if (HTML_dtd.tags[e].contents != SGML_EMPTY) \ + (*me->target->isa->end_element)(me->target, e, 0) + +/* Here are the parameters which can be specified in a source file +*/ +static const char *par_name[] = +{ + "version", + "ip-address", +#define PAR_IP_NAME 2 + "ip-name", +#define PAR_TCP_PORT 3 + "tcp-port", +#define PAR_DATABASE_NAME 4 + "database-name", +#define PAR_COST 5 + "cost", +#define PAR_COST_UNIT 6 + "cost-unit", +#define PAR_FREE 7 + "free", +#define PAR_MAINTAINER 8 + "maintainer", +#define PAR_DESCRIPTION 9 + "description", + "keyword-list", + "source", + "window-geometry", + "configuration", + "script", + "update-time", + "contact-at", + "last-contacted", + "confidence", + "num-docs-to-request", + "font", + "font-size", +#define PAR_UNKNOWN 22 + "unknown", + 0, /* Terminate list */ +#define PAR_COUNT 23 +}; + +enum tokenstate { + beginning, + before_tag, + colon, + before_value, + value, + bracketed_value, + quoted_value, + escape_in_quoted, + done +}; + +/* Stream Object + * ------------ + * + * The target is the structured stream down which the + * parsed results will go. + * + * all the static stuff below should go in here to make it reentrant + */ + +struct _HTStream { + const HTStreamClass *isa; + HTStructured *target; + char *par_value[PAR_COUNT]; + enum tokenstate state; + char param[BIG + 1]; + int param_number; + int param_count; +}; + +/* Decode one hex character +*/ +char from_hex(char c) +{ + return (char) ((c >= '0') && (c <= '9') ? c - '0' + : (c >= 'A') && (c <= 'F') ? c - 'A' + 10 + : (c >= 'a') && (c <= 'f') ? c - 'a' + 10 + : 0); +} + +/* State machine + * ------------- + * + * On entry, + * me->state is a valid state (see WSRC_init) + * c is the next character + * On exit, + * returns 1 Done with file + * 0 Continue. me->state is updated if necessary. + * -1 Syntax error error + */ + +/* Treat One Character + * ------------------- + */ +static void WSRCParser_put_character(HTStream *me, int c) +{ + switch (me->state) { + case beginning: + if (c == '(') + me->state = before_tag; + break; + + case before_tag: + if (c == ')') { + me->state = done; + return; /* Done with input file */ + } else if (c == ':') { + me->param_count = 0; + me->state = colon; + } /* Ignore other text */ + break; + + case colon: + if (WHITE(c)) { + me->param[me->param_count++] = 0; /* Terminate */ + for (me->param_number = 0; + par_name[me->param_number]; + me->param_number++) { + if (0 == strcmp(par_name[me->param_number], me->param)) { + break; + } + } + if (!par_name[me->param_number]) { /* Unknown field */ + CTRACE((tfp, "HTWSRC: Unknown field `%s' in source file\n", + me->param)); + me->param_number = PAR_UNKNOWN; + me->state = before_value; /* Could be better ignore */ + return; + } + me->state = before_value; + } else { + if (me->param_count < PARAM_MAX) + me->param[me->param_count++] = (char) c; + } + break; + + case before_value: + if (c == ')') { + me->state = done; + return; /* Done with input file */ + } + if (WHITE(c)) + return; /* Skip white space */ + me->param_count = 0; + if (c == '"') { + me->state = quoted_value; + break; + } + me->state = (c == '"') ? quoted_value : + (c == '(') ? bracketed_value : value; + me->param[me->param_count++] = (char) c; /* Don't miss first character */ + break; + + case value: + if (WHITE(c)) { + me->param[me->param_count] = 0; + StrAllocCopy(me->par_value[me->param_number], me->param); + me->state = before_tag; + } else { + if (me->param_count < PARAM_MAX) + me->param[me->param_count++] = (char) c; + } + break; + + case bracketed_value: + if (c == ')') { + me->param[me->param_count] = 0; + StrAllocCopy(me->par_value[me->param_number], me->param); + me->state = before_tag; + break; + } + if (me->param_count < PARAM_MAX) + me->param[me->param_count++] = (char) c; + break; + + case quoted_value: + if (c == '"') { + me->param[me->param_count] = 0; + StrAllocCopy(me->par_value[me->param_number], me->param); + me->state = before_tag; + break; + } + + if (c == '\\') { /* Ignore escape but switch state */ + me->state = escape_in_quoted; + break; + } + /* Fall through! */ + + case escape_in_quoted: + if (me->param_count < PARAM_MAX) + me->param[me->param_count++] = (char) c; + me->state = quoted_value; + break; + + case done: /* Ignore anything after EOF */ + return; + + } /* switch me->state */ +} + +/* Open Cache file + * =============== + * + * Bugs: Maybe for filesystem-challenged platforms (MSDOS for example) we + * should make a hash code for the filename. + */ + +#ifdef CACHE_FILE_PREFIX +static BOOL write_cache(HTStream *me) +{ + FILE *fp; + char *cache_file_name = NULL; + char *www_database; + int result = NO; + + if (!me->par_value[PAR_DATABASE_NAME] + || !me->par_value[PAR_IP_NAME] + ) + return NO; + + www_database = HTEscape(me->par_value[PAR_DATABASE_NAME], URL_XALPHAS); + HTSprintf0(&cache_file_name, "%sWSRC-%s:%s:%.100s.txt", + CACHE_FILE_PREFIX, + me->par_value[PAR_IP_NAME], + (me->par_value[PAR_TCP_PORT] + ? me->par_value[PAR_TCP_PORT] + : "210"), + www_database); + + if ((fp = fopen(cache_file_name, TXT_W)) != 0) { + result = YES; + if (me->par_value[PAR_DESCRIPTION]) + fputs(me->par_value[PAR_DESCRIPTION], fp); + else + fputs("Description not available\n", fp); + fclose(fp); + } + FREE(www_database); + FREE(cache_file_name); + return result; +} +#endif + +/* Output equivalent HTML + * ---------------------- + * + */ + +static void give_parameter(HTStream *me, int p) +{ + PUTS(par_name[p]); + if (me->par_value[p]) { + PUTS(": "); + PUTS(me->par_value[p]); + PUTS("; "); + } else { + PUTS(gettext(" NOT GIVEN in source file; ")); + } +} + +/* Generate Outout + * =============== + */ +static void WSRC_gen_html(HTStream *me, int source_file) +{ + if (me->par_value[PAR_DATABASE_NAME]) { + char *shortname = 0; + int l; + + StrAllocCopy(shortname, me->par_value[PAR_DATABASE_NAME]); + l = (int) strlen(shortname); + if (l > 4 && !strcasecomp(shortname + l - 4, ".src")) { + shortname[l - 4] = 0; /* Chop of .src -- boring! */ + } + + START(HTML_HEAD); + PUTC('\n'); + START(HTML_TITLE); + PUTS(shortname); + PUTS(source_file ? gettext(" WAIS source file") : INDEX_SEGMENT); + END(HTML_TITLE); + PUTC('\n'); + END(HTML_HEAD); + + START(HTML_H1); + PUTS(shortname); + PUTS(source_file ? gettext(" description") : INDEX_SEGMENT); + END(HTML_H1); + PUTC('\n'); + FREE(shortname); + } + + START(HTML_DL); /* Definition list of details */ + + if (source_file) { + START(HTML_DT); + PUTS(gettext("Access links")); + MAYBE_END(HTML_DT); + START(HTML_DD); + if (me->par_value[PAR_IP_NAME] && + me->par_value[PAR_DATABASE_NAME]) { + + char *WSRC_address = NULL; + char *www_database; + + www_database = HTEscape(me->par_value[PAR_DATABASE_NAME], + URL_XALPHAS); + HTSprintf0(&WSRC_address, "%s//%s%s%s/%s", + STR_WAIS_URL, + me->par_value[PAR_IP_NAME], + me->par_value[PAR_TCP_PORT] ? ":" : "", + (me->par_value[PAR_TCP_PORT] + ? me->par_value[PAR_TCP_PORT] + : ""), + www_database); + + HTStartAnchor(me->target, NULL, WSRC_address); + PUTS(gettext("Direct access")); + END(HTML_A); + /** Proxy will be used if defined, so let user know that - FM **/ + PUTS(gettext(" (or via proxy server, if defined)")); + + FREE(www_database); + FREE(WSRC_address); + + } else { + give_parameter(me, PAR_IP_NAME); + give_parameter(me, PAR_DATABASE_NAME); + } + MAYBE_END(HTML_DD); + + } + /* end if source_file */ + if (me->par_value[PAR_MAINTAINER]) { + START(HTML_DT); + PUTS(gettext("Maintainer")); + MAYBE_END(HTML_DT); + START(HTML_DD); + PUTS(me->par_value[PAR_MAINTAINER]); + MAYBE_END(HTML_DD); + } + if (me->par_value[PAR_IP_NAME]) { + START(HTML_DT); + PUTS(gettext("Host")); + MAYBE_END(HTML_DT); + START(HTML_DD); + PUTS(me->par_value[PAR_IP_NAME]); + MAYBE_END(HTML_DD); + } + + END(HTML_DL); + + if (me->par_value[PAR_DESCRIPTION]) { + START(HTML_PRE); /* Preformatted description */ + PUTS(me->par_value[PAR_DESCRIPTION]); + END(HTML_PRE); + } + + (*me->target->isa->_free) (me->target); + + return; +} /* generate html */ + +static void WSRCParser_put_string(HTStream *context, const char *str) +{ + const char *p; + + for (p = str; *p; p++) + WSRCParser_put_character(context, *p); +} + +static void WSRCParser_write(HTStream *context, const char *str, + int l) +{ + const char *p; + const char *e = str + l; + + for (p = str; p < e; p++) + WSRCParser_put_character(context, *p); +} + +static void WSRCParser_free(HTStream *me) +{ + WSRC_gen_html(me, YES); +#ifdef CACHE_FILE_PREFIX + write_cache(me); +#endif + { + int p; + + for (p = 0; par_name[p]; p++) { /* Clear out old values */ + FREE(me->par_value[p]); + } + } + FREE(me); +} + +static void WSRCParser_abort(HTStream *me, HTError e GCC_UNUSED) +{ + WSRCParser_free(me); +} + +/* Stream subclass -- method routines + * --------------- + */ + +static HTStreamClass WSRCParserClass = +{ + "WSRCParser", + WSRCParser_free, + WSRCParser_abort, + WSRCParser_put_character, + WSRCParser_put_string, + WSRCParser_write +}; + +/* Converter from WAIS Source to whatever + * -------------------------------------- + */ +HTStream *HTWSRCConvert(HTPresentation *pres, HTParentAnchor *anchor, + HTStream *sink) +{ + HTStream *me = (HTStream *) malloc(sizeof(*me)); + + if (!me) + outofmem(__FILE__, "HTWSRCConvert"); + + assert(me != NULL); + + me->isa = &WSRCParserClass; + me->target = HTML_new(anchor, pres->rep_out, sink); + + { + int p; + + for (p = 0; p < PAR_COUNT; p++) { /* Clear out parameter values */ + me->par_value[p] = 0; + } + } + me->state = beginning; + + return me; +} diff --git a/WWW/Library/Implementation/HTWSRC.h b/WWW/Library/Implementation/HTWSRC.h new file mode 100644 index 00000000..152d27ab --- /dev/null +++ b/WWW/Library/Implementation/HTWSRC.h @@ -0,0 +1,43 @@ +/* A parser for WAIS source files + WAIS SOURCE FILE PARSER + + This converter returns a stream object into which a WAIS source file can be + written. The result is put via a structured stream into whatever format was + required for the output stream. + + See also: HTWAIS protocol interface module + + */ +#ifndef HTWSRC_H +#define HTWSRC_H + +#include <HTFormat.h> + +#ifdef __cplusplus +extern "C" { +#endif + extern char from_hex(char c); + + extern HTStream *HTWSRCConvert(HTPresentation *pres, + HTParentAnchor *anchor, + HTStream *sink); + +/* + +Escaping Strings + + HTDeSlash takes out the invlaid characters in a URL path ELEMENT by + converting them into hex-escaped characters. HTEnSlash does the reverse. + + Each returns a pointer to a newly allocated string which must eventually be + freed by the caller. + + */ + extern char *HTDeSlash(const char *str); + + extern char *HTEnSlash(const char *str); + +#ifdef __cplusplus +} +#endif +#endif /* HTWSRC_H */ diff --git a/WWW/Library/Implementation/HText.h b/WWW/Library/Implementation/HText.h new file mode 100644 index 00000000..84ec87c7 --- /dev/null +++ b/WWW/Library/Implementation/HText.h @@ -0,0 +1,219 @@ +/* + * $LynxId: HText.h,v 1.16 2010/09/25 11:41:08 tom Exp $ + * Rich Hypertext object for libWWW + * RICH HYPERTEXT OBJECT + * + * This is the C interface to the Objective-C (or whatever) Style-oriented + * HyperText class. It is used when a style-oriented text object is available + * or craeted in order to display hypertext. + */ +#ifndef HTEXT_H +#define HTEXT_H + +#include <HTAnchor.h> +#include <HTStyle.h> +#include <HTStream.h> +#include <SGML.h> + +#ifdef __cplusplus +extern "C" { +#endif +#ifndef THINK_C +#ifndef HyperText /* Objective C version defined HyperText */ + typedef struct _HText HText; /* Normal Library */ +#endif +#else + class CHyperText; /* Mac Think-C browser hook */ + typedef CHyperText HText; +#endif + + extern HText *HTMainText; /* Pointer to current main text */ + extern HTParentAnchor *HTMainAnchor; /* Pointer to current text's anchor */ + + extern const char *HTAppName; /* Application name */ + extern const char *HTAppVersion; /* Application version */ + +/* + +Creation and deletion + + HTEXT_NEW: CREATE HYPERTEXT OBJECT + + There are several methods depending on how much you want to specify. The + output stream is used with objects which need to output the hypertext to a + stream. The structure is for objects which need to refer to the structure + which is kep by the creating stream. + + */ + extern HText *HText_new(HTParentAnchor *anchor); + + extern HText *HText_new2(HTParentAnchor *anchor, + HTStream *output_stream); + + extern HText *HText_new3(HTParentAnchor *anchor, + HTStream *output_stream, + HTStructured * structure); + +/* + + FREE HYPERTEXT OBJECT + + */ + extern void HText_free(HText *me); + +/* + +Object Building methods + + These are used by a parser to build the text in an object HText_beginAppend + must be called, then any combination of other append calls, then + HText_endAppend. This allows optimised handling using buffers and caches + which are flushed at the end. + + */ + extern void HText_beginAppend(HText *text); + + extern void HText_endAppend(HText *text); + +/* + + SET THE STYLE FOR FUTURE TEXT + + */ + + extern void HText_setStyle(HText *text, HTStyle *style); + +/* + + ADD ONE CHARACTER + + */ + extern void HText_appendCharacter(HText *text, int ch); + +/* + + ADD A ZERO-TERMINATED STRING + + */ + + extern void HText_appendText(HText *text, const char *str); + +/* + + NEW PARAGRAPH + + and similar things + + */ + extern void HText_appendParagraph(HText *text); + + extern void HText_appendLineBreak(HText *text); + + extern void HText_appendHorizontalRule(HText *text); + +/* + + START/END SENSITIVE TEXT + + */ + +/* + + The anchor object is created and passed to HText_beginAnchor. The senstive + text is added to the text object, and then HText_endAnchor is called. + Anchors may not be nested. + + */ + extern int HText_beginAnchor(HText *text, int underline, + HTChildAnchor *anc); + extern void HText_endAnchor(HText *text, int number); + extern BOOL HText_isAnchorBlank(HText *text, int number); + +/* + + APPEND AN INLINE IMAGE + + The image is handled by the creation of an anchor whose destination is the + image document to be included. The semantics is the intended inline display + of the image. + + An alternative implementation could be, for example, to begin an anchor, + append the alternative text or "IMAGE", then end the anchor. This would + simply generate some text linked to the image itself as a separate document. + + */ + extern void HText_appendImage(HText *text, HTChildAnchor *anc, + const char *alternative_text, + int alignment, + int isMap); + +/* + + RETURN THE ANCHOR ASSOCIATED WITH THIS NODE + + */ + extern HTParentAnchor *HText_nodeAnchor(HText *me); + +/* + +Browsing functions + + */ + +/* + + BRING TO FRONT AND HIGHLIGHT IT + + */ + + extern BOOL HText_select(HText *text); + extern BOOL HText_selectAnchor(HText *text, HTChildAnchor *anchor); + +/* + +Editing functions + + These are called from the application. There are many more functions not + included here from the orginal text object. These functions NEED NOT BE + IMPLEMENTED in a browser which cannot edit. + + */ +/* Style handling: +*/ +/* Apply this style to the selection +*/ + extern void HText_applyStyle(HText *me, HTStyle *style); + +/* Update all text with changed style. +*/ + extern void HText_updateStyle(HText *me, HTStyle *style); + +/* Return style of selection +*/ + extern HTStyle *HText_selectionStyle(HText *me, HTStyleSheet *sheet); + +/* Paste in styled text +*/ + extern void HText_replaceSel(HText *me, const char *aString, + HTStyle *aStyle); + +/* Apply this style to the selection and all similarly formatted text + * (style recovery only) + */ + extern void HTextApplyToSimilar(HText *me, HTStyle *style); + +/* Select the first unstyled run. + * (style recovery only) + */ + extern void HTextSelectUnstyled(HText *me, HTStyleSheet *sheet); + +/* Anchor handling: +*/ + extern void HText_unlinkSelection(HText *me); + extern HTAnchor *HText_referenceSelected(HText *me); + extern HTAnchor *HText_linkSelTo(HText *me, HTAnchor * anchor); + +#ifdef __cplusplus +} +#endif +#endif /* HTEXT_H */ diff --git a/WWW/Library/Implementation/HTioctl.h b/WWW/Library/Implementation/HTioctl.h new file mode 100644 index 00000000..99f86321 --- /dev/null +++ b/WWW/Library/Implementation/HTioctl.h @@ -0,0 +1,11 @@ +/* + * A routine to mimic the ioctl function for UCX. + * Bjorn S. Nilsson, 25-Nov-1993. Based on an example in the UCX manual. + */ +#include <iodef.h> +#define IOC_OUT (int)0x40000000 +extern int vaxc$get_sdc(), sys$qiow(); + +#ifndef UCX$C_IOCTL +#define UCX$C_IOCTL TCPIP$C_IOCTL +#endif diff --git a/WWW/Library/Implementation/LYLeaks.h b/WWW/Library/Implementation/LYLeaks.h new file mode 100644 index 00000000..b3672734 --- /dev/null +++ b/WWW/Library/Implementation/LYLeaks.h @@ -0,0 +1,299 @@ +/* + * $LynxId: LYLeaks.h,v 1.14 2012/02/10 00:15:56 tom Exp $ + */ +#ifndef __LYLEAKS_H +/* + * Avoid include redundancy + * Include only if finding memory leaks. + */ +#define __LYLEAKS_H + +/* + * Copyright (c) 1994, University of Kansas, All Rights Reserved + * + * Include File: LYLeaks.h + * Purpose: Header to convert requests for allocation to Lynx + * custom functions to track memory leaks. + * Remarks/Portability/Dependencies/Restrictions: + * For the stdlib.h allocation functions to be overriden by the + * Lynx memory tracking functions all modules allocating, + * freeing, or resizing memory must have LY_FIND_LEAKS + * defined before including this file. + * This header file should be included in every source file which + * does any memory manipulation through use of the + * stdlib.h memory functions. + * For proper reporting of memory leaks, the function LYLeaks + * should be registered for execution by atexit as the + * very first executable statement in main. + * This code is slow and should not be used except in debugging + * circumstances (don't define LY_FIND_LEAKS). + * If you are using LY_FIND_LEAKS and don't want the LYLeak* + * memory functions to be used in a certain file, + * define NO_MEMORY_TRACKING before including this file. + * The only safe way to call the LYLeak* functions is to use + * the below macros because they depend on the static + * string created by __FILE__ to not be dynamic in + * nature (don't free it and assume will exist at all + * times during execution). + * If you are using LY_FIND_LEAKS and LY_FIND_LEAKS_EXTENDED and + * want only normal memory tracking (not extended for + * HTSprintf/HTSprintf0) to be used in a certain file, + * define NO_EXTENDED_MEMORY_TRACKING and don't define + * NO_MEMORY_TRACKING before including this file. + * Revision History: + * 05-26-94 created for Lynx 2-3-1, Garrett Arch Blythe + * 10-30-97 modified to handle StrAllocCopy() and + * StrAllocCat(). - KW & FM + * 1999-10-17 modified to handle HTSprintf0 and HTSprintf(), + * and to provide mark_malloced, if + * LY_FIND_LEAKS_EXTENDED is defined. - kw + * 2003-01-22 add sequence-id for counting mallocs/frees -TD + * 2004-04-27 ANSIfy'd -TD + * 2012-02-09 add bstring interfaces -TD + */ + +/* Undefine this to get no improved HTSprintf0/HTSprintf tracking: */ +#define LY_FIND_LEAKS_EXTENDED + +/* + * Required includes + */ + +#ifndef HTUTILS_H +#include <HTUtils.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif +/* + * Constant defines + */ +#define MAX_CONTENT_LENGTH 50 +#ifdef VMS +#define LEAKAGE_SINK "sys$login:Lynx.leaks" +#else +#define LEAKAGE_SINK "Lynx.leaks" +#endif /* VMS */ +/* + * Data structures + */ typedef struct SourceLocation_tag { + /* + * The file name and line number of where an event took place. + */ + const char *cp_FileName; + short ssi_LineNumber; + } SourceLocation; + + typedef struct AllocationList_tag { + /* + * A singly linked list. + */ + struct AllocationList_tag *ALp_Next; + + /* + * Count the number of mallocs. + */ + long st_Sequence; + + /* + * The memory pointer allocated. If set to NULL, then an invalid request + * was made. The invalid pointer also. + */ + void *vp_Alloced; + void *vp_BadRequest; + + /* + * The size in bytes of the allocated memory. + */ + size_t st_Bytes; + + /* + * The source location of specific event (calloc, malloc, free). realloc + * kept separate since will track last realloc on pointer. + */ + SourceLocation SL_memory; + SourceLocation SL_realloc; + } AllocationList; + +/* + * Global variable declarations + */ + +/* + * Macros + */ +#if defined(LY_FIND_LEAKS) && !defined(NO_MEMORY_TRACKING) +/* + * Only use these macros if we are to track memory allocations. The reason for + * using a macro instead of a define is that we want to track where the initial + * allocation took place or where the last reallocation took place. Track + * where the allocation took place by the __FILE__ and __LINE__ defines which + * are automatic to the compiler. + */ +#ifdef malloc +#undef malloc +#endif /* malloc */ +#define malloc(st_bytes) LYLeakMalloc(st_bytes, __FILE__, __LINE__) + +#ifdef calloc +#undef calloc +#endif /* calloc */ +#define calloc(st_number, st_bytes) LYLeakCalloc(st_number, st_bytes, \ + __FILE__, __LINE__) + +#ifdef realloc +#undef realloc +#endif /* realloc */ +#define realloc(vp_alloced, st_newbytes) LYLeakRealloc(vp_alloced, \ + st_newbytes, __FILE__, __LINE__) + +#ifdef free +#undef free +#endif /* free */ +#define free(vp_alloced) LYLeakFree(vp_alloced, __FILE__, __LINE__) + +/* + * Added the following two defines to track Lynx's frequent use of those + * macros. - KW 1997-10-12 + */ +#ifdef StrAllocCopy +#undef StrAllocCopy +#endif /* StrAllocCopy */ +#define StrAllocCopy(dest, src) LYLeakSACopy(&(dest), src, __FILE__, __LINE__) + +#ifdef StrAllocCat +#undef StrAllocCat +#endif /* StrAllocCat */ +#define StrAllocCat(dest, src) LYLeakSACat(&(dest), src, __FILE__, __LINE__) + +#ifdef BStrAlloc +#undef BStrAlloc +#endif +#define BStrAlloc(d,n) LYLeakSABAlloc( &(d), n, __FILE__, __LINE__) + +#ifdef BStrCopy +#undef BStrCopy +#endif +#define BStrCopy(d,s) LYLeakSABCopy( &(d), BStrData(s), BStrLen(s), __FILE__, __LINE__) + +#ifdef BStrCopy0 +#undef BStrCopy0 +#endif +#define BStrCopy0(d,s) LYLeakSABCopy0( &(d), s, __FILE__, __LINE__) + +#ifdef BStrCat +#undef BStrCat +#endif +#define BStrCat(d,s) LYLeakSABCat( &(d), BStrData(s), BStrLen(s), __FILE__, __LINE__) + +#ifdef BStrCat0 +#undef BStrCat0 +#endif +#define BStrCat0(d,s) LYLeakSABCat0( &(d), s, __FILE__, __LINE__) + +#define mark_malloced(a,size) LYLeak_mark_malloced(a,size, __FILE__, __LINE__) + +#if defined(LY_FIND_LEAKS_EXTENDED) && !defined(NO_EXTENDED_MEMORY_TRACKING) + +#ifdef HTSprintf0 +#undef HTSprintf0 +#endif /* HTSprintf0 */ +#define HTSprintf0 (Get_htsprintf0_fn(__FILE__,__LINE__)) + +#ifdef HTSprintf +#undef HTSprintf +#endif /* HTSprintf */ +#define HTSprintf (Get_htsprintf_fn(__FILE__,__LINE__)) + +#endif /* LY_FIND_LEAKS_EXTENDED and not NO_EXTENDED_MEMORY_TRACKING */ + +#else /* LY_FIND_LEAKS && !NO_MEMORY_TRACKING */ + +#define mark_malloced(a,size) /* no-op */ +#define LYLeakSequence() (-1) + +#endif /* LY_FIND_LEAKS && !NO_MEMORY_TRACKING */ + +#if defined(LY_FIND_LEAKS) +#define PUBLIC_IF_FIND_LEAKS /* nothing */ +#else +#define PUBLIC_IF_FIND_LEAKS static +#endif + +/* + * Function declarations. + * See the appropriate source file for usage. + */ +#ifndef LYLeakSequence + extern long LYLeakSequence(void); +#endif + extern void LYLeaks(void); + +#ifdef LY_FIND_LEAKS_EXTENDED + extern AllocationList *LYLeak_mark_malloced(void *vp_alloced, + size_t st_bytes, + const char *cp_File, + const short ssi_Line); +#endif /* LY_FIND_LEAKS_EXTENDED */ + extern void *LYLeakMalloc(size_t st_bytes, const char *cp_File, + const short ssi_Line); + extern void *LYLeakCalloc(size_t st_number, size_t st_bytes, const char *cp_File, + const short ssi_Line); + extern void *LYLeakRealloc(void *vp_alloced, + size_t st_newbytes, + const char *cp_File, + const short ssi_Line); + extern void LYLeakFree(void *vp_alloced, + const char *cp_File, + const short ssi_Line); + extern char *LYLeakSACopy(char **dest, + const char *src, + const char *cp_File, + const short ssi_Line); + extern char *LYLeakSACat(char **dest, + const char *src, + const char *cp_File, + const short ssi_Line); + extern void LYLeakSABAlloc(bstring **dest, + int len, + const char *cp_File, + const short ssi_Line); + extern void LYLeakSABCopy(bstring **dest, + const char *src, + int len, + const char *cp_File, + const short ssi_Line); + extern void LYLeakSABCopy0(bstring **dest, + const char *src, + const char *cp_File, + const short ssi_Line); + extern void LYLeakSABCat(bstring **dest, + const char *src, + int len, + const char *cp_File, + const short ssi_Line); + extern void LYLeakSABCat0(bstring **dest, + const char *src, + const char *cp_File, + const short ssi_Line); + extern void LYLeakSABFree(bstring **ptr, + const char *cp_File, + const short ssi_Line); + +#ifdef LY_FIND_LEAKS_EXTENDED +/* + * Trick to get tracking of var arg functions without relying on var arg + * preprocessor macros: + */ + typedef char *HTSprintflike(char **, const char *,...); + extern HTSprintflike *Get_htsprintf_fn(const char *cp_File, + const short ssi_Line); + extern HTSprintflike *Get_htsprintf0_fn(const char *cp_File, + const short ssi_Line); +#endif /* LY_FIND_LEAKS_EXTENDED */ + +#ifdef __cplusplus +} +#endif +#endif /* __LYLEAKS_H */ diff --git a/WWW/Library/Implementation/LYexit.h b/WWW/Library/Implementation/LYexit.h new file mode 100644 index 00000000..d32e2079 --- /dev/null +++ b/WWW/Library/Implementation/LYexit.h @@ -0,0 +1,67 @@ +#ifndef __LYEXIT_H +/* + * Avoid include redundancy + */ +#define __LYEXIT_H + +/* + * Copyright (c) 1994, University of Kansas, All Rights Reserved + * + * Include File: LYexit.h + * Purpose: Provide an atexit function for libraries without such. + * Remarks/Portability/Dependencies/Restrictions: + * Include this header in every file that you have an exit or + * atexit statment. + * Revision History: + * 06-15-94 created Lynx 2-3-1 Garrett Arch Blythe + */ + +/* + * Required includes + */ +#ifdef _WINDOWS +#include <process.h> /* declares exit() */ +#endif + +#ifndef HTUTILS_H +#include <HTUtils.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif +/* + * Constant defines + */ +#ifdef exit +#undef exit +#endif +#define exit(code) LYexit(code) +#define atexit LYatexit +#define ATEXITSIZE 50 + +/* + * Data structures + */ + +/* + * Global variable declarations + */ + +/* + * Macros + */ + +/* + * Function declarations + */ + extern void outofmem(const char *fname, const char *func); + extern void reset_signals(void); + extern void exit_immediately(int status) GCC_NORETURN; + extern void LYexit(int status) GCC_NORETURN; + extern int LYatexit(void (*function) (void)); + +#ifdef __cplusplus +} +#endif +#endif /* __LYEXIT_H */ diff --git a/WWW/Library/Implementation/SGML.c b/WWW/Library/Implementation/SGML.c new file mode 100644 index 00000000..86908a01 --- /dev/null +++ b/WWW/Library/Implementation/SGML.c @@ -0,0 +1,4978 @@ +/* + * $LynxId: SGML.c,v 1.148 2012/02/10 18:32:26 tom Exp $ + * + * General SGML Parser code SGML.c + * ======================== + * + * This module implements an HTStream object. To parse an + * SGML file, create this object which is a parser. The object + * is (currently) created by being passed a DTD structure, + * and a target HTStructured object at which to throw the parsed stuff. + * + * 6 Feb 93 Binary searches used. Interface modified. + */ + +#define HTSTREAM_INTERNAL 1 + +#include <HTUtils.h> + +#include <SGML.h> +#include <HTMLDTD.h> +#include <HTAccess.h> +#include <HTCJK.h> /* FIXME: this doesn't belong in SGML.c */ +#include <UCMap.h> +#include <UCDefs.h> +#include <UCAux.h> + +#include <HTChunk.h> +#include <HTUtils.h> + +#include <LYCharSets.h> +#include <LYCharVals.h> /* S/390 -- gil -- 0635 */ +#include <LYGlobalDefs.h> +#include <LYStrings.h> +#include <LYLeaks.h> +#include <LYUtils.h> + +#ifdef USE_COLOR_STYLE +# include <LYStyle.h> +#endif +#ifdef USE_PRETTYSRC +# include <LYPrettySrc.h> +#endif + +#define AssumeCP1252(context) \ + (((context)->inUCLYhndl == LATIN1 \ + || (context)->inUCLYhndl == US_ASCII) \ + && html5_charsets) + +#define INVALID (-1) + +static int sgml_offset; + +#ifdef USE_PRETTYSRC + +static char *entity_string; /* this is used for printing entity name. + + Unconditionally added since redundant assigments don't hurt much */ + +static void fake_put_character(void *p GCC_UNUSED, + char c GCC_UNUSED) +{ +} + +#define START TRUE +#define STOP FALSE + +#define PUTS_TR(x) psrc_convert_string = TRUE; PUTS(x) + +#endif + +/* my_casecomp() - optimized by the first character, NOT_ASCII ok */ +#define my_casecomp(a,b) ((TOUPPER(*a) == TOUPPER(*b)) ? \ + AS_casecomp(a,b) : \ + (TOASCII(TOUPPER(*a)) - TOASCII(TOUPPER(*b)))) + + /* will use partially inlined version */ +#define orig_HTChunkPutUtf8Char HTChunkPutUtf8Char +#undef HTChunkPutUtf8Char + +/* ...used for comments and attributes value like href... */ +#define HTChunkPutUtf8Char(ch,x) \ + { \ + if ((TOASCII(x) < 128) && (ch->size < ch->allocated)) \ + ch->data[ch->size++] = (char)x; \ + else \ + orig_HTChunkPutUtf8Char(ch,x); \ + } + +#define PUTS(str) ((*context->actions->put_string)(context->target, str)) +#define PUTC(ch) ((*context->actions->put_character)(context->target, (char) ch)) +#define PUTUTF8(code) (UCPutUtf8_charstring((HTStream *)context->target, \ + (putc_func_t*)(context->actions->put_character), code)) + +#ifdef USE_PRETTYSRC +#define PRETTYSRC_PUTC(c) if (psrc_view) PUTC(c) +#else +#define PRETTYSRC_PUTC(c) /* nothing */ +#endif + +/*the following macros are used for pretty source view. */ +#define IS_C(attr) (attr.type == HTMLA_CLASS) + +HTCJKlang HTCJK = NOCJK; /* CJK enum value. */ +BOOL HTPassEightBitRaw = FALSE; /* Pass 161-172,174-255 raw. */ +BOOL HTPassEightBitNum = FALSE; /* Pass ^ numeric entities raw. */ +BOOL HTPassHighCtrlRaw = FALSE; /* Pass 127-160,173, raw. */ +BOOL HTPassHighCtrlNum = FALSE; /* Pass €-Ÿ raw. */ + +/* The State (context) of the parser + * + * This is passed with each call to make the parser reentrant + * + */ + +#define MAX_ATTRIBUTES 36 /* Max number of attributes per element */ + +/* Element Stack + * ------------- + * This allows us to return down the stack reselecting styles. + * As we return, attribute values will be garbage in general. + */ +typedef struct _HTElement HTElement; +struct _HTElement { + HTElement *next; /* Previously nested element or 0 */ + HTTag *tag; /* The tag at this level */ +}; + +typedef enum { + S_text = 0 + ,S_attr + ,S_attr_gap + ,S_comment + ,S_cro + ,S_doctype + ,S_dollar + ,S_dollar_dq + ,S_dollar_paren + ,S_dollar_paren_dq + ,S_dollar_paren_sq + ,S_dollar_sq + ,S_dquoted + ,S_end + ,S_entity + ,S_equals + ,S_ero + ,S_esc + ,S_esc_dq + ,S_esc_sq + ,S_exclamation + ,S_in_kanji + ,S_incro + ,S_junk_tag + ,S_litteral + ,S_marked + ,S_nonascii_text + ,S_nonascii_text_dq + ,S_nonascii_text_sq + ,S_paren + ,S_paren_dq + ,S_paren_sq + ,S_pcdata + ,S_pi + ,S_script + ,S_sgmlatt + ,S_sgmlele + ,S_sgmlent + ,S_squoted + ,S_tag + ,S_tag_gap + ,S_tagname_slash + ,S_value +} sgml_state; + +/* Internal Context Data Structure + * ------------------------------- + */ +struct _HTStream { + + const HTStreamClass *isa; /* inherited from HTStream */ + + const SGML_dtd *dtd; + const HTStructuredClass *actions; /* target class */ + HTStructured *target; /* target object */ + + HTTag *current_tag; + HTTag *slashedtag; + const HTTag *unknown_tag; + BOOL extended_html; /* xhtml */ + BOOL strict_xml; /* xml */ + BOOL inSELECT; + BOOL no_lynx_specialcodes; + int current_attribute_number; + HTChunk *string; + int leading_spaces; + int trailing_spaces; + HTElement *element_stack; + sgml_state state; + unsigned char kanji_buf; +#ifdef CALLERDATA + void *callerData; +#endif /* CALLERDATA */ + BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */ + char *value[MAX_ATTRIBUTES]; /* NULL, or strings alloc'd with StrAllocCopy_extra() */ + + BOOL lead_exclamation; + BOOL first_dash; + BOOL end_comment; + BOOL doctype_bracket; + BOOL first_bracket; + BOOL second_bracket; + BOOL isHex; + + HTParentAnchor *node_anchor; + LYUCcharset *inUCI; /* pointer to anchor UCInfo */ + int inUCLYhndl; /* charset we are fed */ + LYUCcharset *outUCI; /* anchor UCInfo for target */ + int outUCLYhndl; /* charset for target */ + char utf_count; + UCode_t utf_char; + char utf_buf[8]; + char *utf_buf_p; + UCTransParams T; + int current_tag_charset; /* charset to pass attributes */ + + char *recover; + int recover_index; + char *include; + char *active_include; + int include_index; + char *url; + char *csi; + int csi_index; +#ifdef USE_PRETTYSRC + BOOL cur_attr_is_href; + BOOL cur_attr_is_name; +#endif +}; + +#ifdef NO_LYNX_TRACE +#define state_name(n) "state" +#else +static const char *state_name(sgml_state n) +{ + const char *result = "?"; + /* *INDENT-OFF* */ + switch (n) { + case S_attr: result = "S_attr"; break; + case S_attr_gap: result = "S_attr_gap"; break; + case S_comment: result = "S_comment"; break; + case S_cro: result = "S_cro"; break; + case S_doctype: result = "S_doctype"; break; + case S_dollar: result = "S_dollar"; break; + case S_dollar_dq: result = "S_dollar_dq"; break; + case S_dollar_paren: result = "S_dollar_paren"; break; + case S_dollar_paren_dq: result = "S_dollar_paren_dq"; break; + case S_dollar_paren_sq: result = "S_dollar_paren_sq"; break; + case S_dollar_sq: result = "S_dollar_sq"; break; + case S_dquoted: result = "S_dquoted"; break; + case S_end: result = "S_end"; break; + case S_entity: result = "S_entity"; break; + case S_equals: result = "S_equals"; break; + case S_ero: result = "S_ero"; break; + case S_esc: result = "S_esc"; break; + case S_esc_dq: result = "S_esc_dq"; break; + case S_esc_sq: result = "S_esc_sq"; break; + case S_exclamation: result = "S_exclamation"; break; + case S_in_kanji: result = "S_in_kanji"; break; + case S_incro: result = "S_incro"; break; + case S_pi: result = "S_pi"; break; + case S_junk_tag: result = "S_junk_tag"; break; + case S_litteral: result = "S_litteral"; break; + case S_marked: result = "S_marked"; break; + case S_nonascii_text: result = "S_nonascii_text"; break; + case S_nonascii_text_dq: result = "S_nonascii_text_dq"; break; + case S_nonascii_text_sq: result = "S_nonascii_text_sq"; break; + case S_paren: result = "S_paren"; break; + case S_paren_dq: result = "S_paren_dq"; break; + case S_paren_sq: result = "S_paren_sq"; break; + case S_pcdata: result = "S_pcdata"; break; + case S_script: result = "S_script"; break; + case S_sgmlatt: result = "S_sgmlatt"; break; + case S_sgmlele: result = "S_sgmlele"; break; + case S_sgmlent: result = "S_sgmlent"; break; + case S_squoted: result = "S_squoted"; break; + case S_tag: result = "S_tag"; break; + case S_tag_gap: result = "S_tag_gap"; break; + case S_tagname_slash: result = "S_tagname_slash"; break; + case S_text: result = "S_text"; break; + case S_value: result = "S_value"; break; + } + /* *INDENT-ON* */ + + return result; +} +#endif + +/* storage for Element Stack */ +#define DEPTH 10 +static HTElement pool[DEPTH]; +static int depth = 0; + +static HTElement *pool_alloc(void) +{ + depth++; + if (depth > DEPTH) + return (HTElement *) malloc(sizeof(HTElement)); + return (pool + depth - 1); +} + +static void pool_free(HTElement * e) +{ + if (depth > DEPTH) + FREE(e); + depth--; + return; +} + +#ifdef USE_PRETTYSRC + +static void HTMLSRC_apply_markup(HTStream *context, + HTlexeme lexeme, + int start) +{ + HT_tagspec *ts = *((start ? lexeme_start : lexeme_end) + lexeme); + + while (ts) { +#ifdef USE_COLOR_STYLE + if (ts->start) { + current_tag_style = ts->style; + force_current_tag_style = TRUE; + forced_classname = ts->class_name; + force_classname = TRUE; + } +#endif + CTRACE((tfp, ts->start ? "SRCSTART %d\n" : "SRCSTOP %d\n", (int) lexeme)); + if (ts->start) + (*context->actions->start_element) (context->target, + (int) ts->element, + ts->present, + (STRING2PTR) ts->value, + context->current_tag_charset, + &context->include); + else + (*context->actions->end_element) (context->target, + (int) ts->element, + &context->include); + ts = ts->next; + } +} + +#define PSRCSTART(x) HTMLSRC_apply_markup(context,HTL_##x,START) +#define PSRCSTOP(x) HTMLSRC_apply_markup(context,HTL_##x,STOP) + +#define attr_is_href context->cur_attr_is_href +#define attr_is_name context->cur_attr_is_name +#endif + +static void set_chartrans_handling(HTStream *context, + HTParentAnchor *anchor, + int chndl) +{ + if (chndl < 0) { + /* + * Nothing was set for the parser in earlier stages, so the HTML + * parser's UCLYhndl should still be its default. - FM + */ + chndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_STRUCTURED); + if (chndl < 0) + /* + * That wasn't set either, so seek the HText default. - FM + */ + chndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT); + if (chndl < 0) + /* + * That wasn't set either, so assume the current display character + * set. - FM + */ + chndl = current_char_set; + /* + * Try to set the HText and HTML stages' chartrans info with the + * default lock level (will not be changed if it was set previously + * with a higher lock level). - FM + */ + HTAnchor_setUCInfoStage(anchor, chndl, + UCT_STAGE_HTEXT, + UCT_SETBY_DEFAULT); + HTAnchor_setUCInfoStage(anchor, chndl, + UCT_STAGE_STRUCTURED, + UCT_SETBY_DEFAULT); + /* + * Get the chartrans info for output to the HTML parser. - FM + */ + context->outUCI = HTAnchor_getUCInfoStage(anchor, + UCT_STAGE_STRUCTURED); + context->outUCLYhndl = HTAnchor_getUCLYhndl(context->node_anchor, + UCT_STAGE_STRUCTURED); + } + /* + * Set the in->out transformation parameters. - FM + */ + UCSetTransParams(&context->T, + context->inUCLYhndl, context->inUCI, + context->outUCLYhndl, context->outUCI); + /* + * This is intended for passing the SGML parser's input charset as an + * argument in each call to the HTML parser's start tag function, but it + * would be better to call a Lynx_HTML_parser function to set an element in + * its HTStructured object, itself, if this were needed. - FM + */ +#ifndef EXP_JAPANESEUTF8_SUPPORT + if (IS_CJK_TTY) { + context->current_tag_charset = -1; + } else +#endif + if (context->T.transp) { + context->current_tag_charset = context->inUCLYhndl; + } else if (context->T.decode_utf8) { + context->current_tag_charset = context->inUCLYhndl; + } else if (context->T.do_8bitraw || + context->T.use_raw_char_in) { + context->current_tag_charset = context->inUCLYhndl; + } else if (context->T.output_utf8 || + context->T.trans_from_uni) { + context->current_tag_charset = UCGetLYhndl_byMIME("utf-8"); + } else { + context->current_tag_charset = LATIN1; + } +} + +static void change_chartrans_handling(HTStream *context) +{ + int new_LYhndl = HTAnchor_getUCLYhndl(context->node_anchor, + UCT_STAGE_PARSER); + + if (new_LYhndl != context->inUCLYhndl && + new_LYhndl >= 0) { + /* + * Something changed. but ignore if a META wants an unknown charset. + */ + LYUCcharset *new_UCI = HTAnchor_getUCInfoStage(context->node_anchor, + UCT_STAGE_PARSER); + + if (new_UCI) { + LYUCcharset *next_UCI = HTAnchor_getUCInfoStage(context->node_anchor, + UCT_STAGE_STRUCTURED); + int next_LYhndl = HTAnchor_getUCLYhndl(context->node_anchor, UCT_STAGE_STRUCTURED); + + context->inUCI = new_UCI; + context->inUCLYhndl = new_LYhndl; + context->outUCI = next_UCI; + context->outUCLYhndl = next_LYhndl; + set_chartrans_handling(context, + context->node_anchor, next_LYhndl); + } + } +} + +#ifdef USE_COLOR_STYLE +#include <AttrList.h> +static int current_is_class = 0; +#endif + +/* Handle Attribute + * ---------------- + */ +/* PUBLIC const char * SGML_default = ""; ?? */ + +static void handle_attribute_name(HTStream *context, const char *s) +{ + HTTag *tag = context->current_tag; + const attr *attributes = tag->attributes; + int high, low, i, diff; + +#ifdef USE_PRETTYSRC + if (psrc_view) { + attr_is_href = FALSE; + attr_is_name = FALSE; + } +#endif + /* + * Ignore unknown tag. - KW + */ + if (tag == context->unknown_tag) { +#ifdef USE_PRETTYSRC + if (psrc_view) + context->current_attribute_number = 1; /* anything !=INVALID */ +#endif + return; + } + + /* + * Binary search for attribute name. + */ + for (low = 0, high = tag->number_of_attributes; + high > low; + diff < 0 ? (low = i + 1) : (high = i)) { + i = (low + (high - low) / 2); + diff = my_casecomp(attributes[i].name, s); + if (diff == 0) { /* success: found it */ + context->current_attribute_number = i; +#ifdef USE_PRETTYSRC + if (psrc_view) { + attr_is_name = (BOOL) (attributes[i].type == HTMLA_ANAME); + attr_is_href = (BOOL) (attributes[i].type == HTMLA_HREF); + } else +#endif + { + context->present[i] = YES; + Clear_extra(context->value[i]); +#ifdef USE_COLOR_STYLE +# ifdef USE_PRETTYSRC + current_is_class = IS_C(attributes[i]); +# else + current_is_class = (!strcasecomp("class", s)); +# endif + CTRACE((tfp, "SGML: found attribute %s, %d\n", s, current_is_class)); +#endif + } + return; + } + /* if */ + } /* for */ + + CTRACE((tfp, "SGML: Unknown attribute %s for tag %s\n", + s, NonNull(context->current_tag->name))); + context->current_attribute_number = INVALID; /* Invalid */ +} + +/* Handle attribute value + * ---------------------- + */ +static void handle_attribute_value(HTStream *context, const char *s) +{ + if (context->current_attribute_number != INVALID) { + StrAllocCopy_extra(context->value[context->current_attribute_number], s); +#ifdef USE_COLOR_STYLE + if (current_is_class) { + StrNCpy(class_string, s, TEMPSTRINGSIZE); + CTRACE((tfp, "SGML: class is '%s'\n", s)); + } else { + CTRACE((tfp, "SGML: attribute value is '%s'\n", s)); + } +#endif + } else { + CTRACE((tfp, "SGML: Attribute value %s ***ignored\n", s)); + } + context->current_attribute_number = INVALID; /* can't have two assignments! */ +} + +/* + * Translate some Unicodes to Lynx special codes and output them. + * Special codes - ones those output depend on parsing. + * + * Additional issue, like handling bidirectional text if necessary + * may be called from here: zwnj (8204), zwj (8205), lrm (8206), rlm (8207) + * - currently they are ignored in SGML.c and LYCharUtils.c + * but also in UCdomap.c because they are non printable... + * + */ +static BOOL put_special_unicodes(HTStream *context, UCode_t code) +{ + /* (Tgf_nolyspcl) */ + if (context->no_lynx_specialcodes) { + /* + * We were asked by a "DTD" flag to not generate lynx specials. - kw + */ + return NO; + } + + if (code == CH_NBSP) { /* S/390 -- gil -- 0657 */ + /* + * Use Lynx special character for nbsp. + */ +#ifdef USE_PRETTYSRC + if (!psrc_view) +#endif + PUTC(HT_NON_BREAK_SPACE); + } else if (code == CH_SHY) { + /* + * Use Lynx special character for shy. + */ +#ifdef USE_PRETTYSRC + if (!psrc_view) +#endif + PUTC(LY_SOFT_HYPHEN); + } else if (code == 8194 || code == 8201) { + /* + * Use Lynx special character for ensp or thinsp. + * + * Originally, Lynx use space '32' as word delimiter and omits this + * space at end of line if word is wrapped to the next line. There are + * several other spaces in the Unicode repertoire and we should teach + * Lynx to understand them, not only as regular characters but in the + * context of line wrapping. Unfortunately, if we use HT_EN_SPACE we + * override the chartrans tables for those spaces with a single '32' + * for all (but do line wrapping more fancy). + * + * We may treat emsp as one or two ensp (below). + */ +#ifdef USE_PRETTYSRC + if (!psrc_view) +#endif + PUTC(HT_EN_SPACE); + } else if (code == 8195) { + /* + * Use Lynx special character for emsp. + */ +#ifdef USE_PRETTYSRC + if (!psrc_view) { +#endif + /* PUTC(HT_EN_SPACE); let's stay with a single space :) */ + PUTC(HT_EN_SPACE); +#ifdef USE_PRETTYSRC + } +#endif + } else { + /* + * Return NO if nothing done. + */ + return NO; + } + /* + * We have handled it. + */ + return YES; +} + +#ifdef USE_PRETTYSRC +static void put_pretty_entity(HTStream *context, int term) +{ + PSRCSTART(entity); + PUTC('&'); + PUTS(entity_string); + if (term) + PUTC((char) term); + PSRCSTOP(entity); +} + +static void put_pretty_number(HTStream *context) +{ + PSRCSTART(entity); + PUTS((context->isHex ? "&#x" : "&#")); + PUTS(entity_string); + PUTC(';'); + PSRCSTOP(entity); +} +#endif /* USE_PRETTYSRC */ + +/* Handle entity + * ------------- + * + * On entry, + * s contains the entity name zero terminated + * Bugs: + * If the entity name is unknown, the terminator is treated as + * a printable non-special character in all cases, even if it is '<' + * Bug-fix: + * Modified SGML_character() so we only come here with terminator + * as '\0' and check a FoundEntity flag. -- Foteos Macrides + * + * Modified more (for use with Lynx character translation code): + */ +static char replace_buf[64]; /* buffer for replacement strings */ +static BOOL FoundEntity = FALSE; + +static void handle_entity(HTStream *context, int term) +{ + UCode_t code; + long uck = -1; + const char *s = context->string->data; + + /* + * Handle all entities normally. - FM + */ + FoundEntity = FALSE; + if ((code = HTMLGetEntityUCValue(s)) != 0) { + /* + * We got a Unicode value for the entity name. Check for special + * Unicodes. - FM + */ + if (put_special_unicodes(context, code)) { +#ifdef USE_PRETTYSRC + if (psrc_view) { + put_pretty_entity(context, term); + } +#endif + FoundEntity = TRUE; + return; + } + /* + * Seek a translation from the chartrans tables. + */ + if ((uck = UCTransUniChar(code, context->outUCLYhndl)) >= 32 && +/* =============== work in ASCII below here =============== S/390 -- gil -- 0672 */ + uck < 256 && + (uck < 127 || + uck >= LYlowest_eightbit[context->outUCLYhndl])) { +#ifdef USE_PRETTYSRC + if (psrc_view) { + put_pretty_entity(context, term); + } else +#endif + PUTC(FROMASCII((char) uck)); + FoundEntity = TRUE; + return; + } else if ((uck == -4 || + (context->T.repl_translated_C0 && + uck > 0 && uck < 32)) && + /* + * Not found; look for replacement string. + */ + (uck = UCTransUniCharStr(replace_buf, 60, code, + context->outUCLYhndl, 0) >= 0)) { +#ifdef USE_PRETTYSRC + if (psrc_view) { + put_pretty_entity(context, term); + } else +#endif + PUTS(replace_buf); + FoundEntity = TRUE; + return; + } + /* + * If we're displaying UTF-8, try that now. - FM + */ +#ifndef USE_PRETTYSRC + if (context->T.output_utf8 && PUTUTF8(code)) { + FoundEntity = TRUE; + return; + } +#else + if (context->T.output_utf8 && (psrc_view + ? (UCPutUtf8_charstring((HTStream *) context->target, + (putc_func_t *) (fake_put_character), + code)) + : PUTUTF8(code))) { + + if (psrc_view) { + put_pretty_entity(context, term); + } + + FoundEntity = TRUE; + return; + } +#endif + /* + * If it's safe ASCII, use it. - FM + */ + if (code >= 32 && code < 127) { +#ifdef USE_PRETTYSRC + if (psrc_view) { + put_pretty_entity(context, term); + } else +#endif + + PUTC(FROMASCII((char) code)); + FoundEntity = TRUE; + return; + } +/* =============== work in ASCII above here =============== S/390 -- gil -- 0682 */ + /* + * Ignore zwnj (8204) and zwj (8205), if we get to here. Note that + * zwnj may have been handled as <WBR> by the calling function. - FM + */ + if (!strcmp(s, "zwnj") || + !strcmp(s, "zwj")) { + CTRACE((tfp, "handle_entity: Ignoring '%s'.\n", s)); +#ifdef USE_PRETTYSRC + if (psrc_view) { + put_pretty_entity(context, term); + } +#endif + FoundEntity = TRUE; + return; + } + /* + * Ignore lrm (8206), and rln (8207), if we get to here. - FM + */ + if (!strcmp(s, "lrm") || + !strcmp(s, "rlm")) { + CTRACE((tfp, "handle_entity: Ignoring '%s'.\n", s)); +#ifdef USE_PRETTYSRC + if (psrc_view) { + put_pretty_entity(context, term); + } +#endif + FoundEntity = TRUE; + return; + } + } + + /* + * If entity string not found, display as text. + */ +#ifdef USE_PRETTYSRC + if (psrc_view) + PSRCSTART(badseq); +#endif + CTRACE((tfp, "SGML: Unknown entity '%s' %" PRI_UCode_t " %ld\n", s, code, uck)); /* S/390 -- gil -- 0695 */ + PUTC('&'); + PUTS(s); + if (term != '\0') + PUTC(term); +#ifdef USE_PRETTYSRC + if (psrc_view) + PSRCSTOP(badseq); +#endif +} + +/* Handle comment + * -------------- + */ +static void handle_comment(HTStream *context) +{ + const char *s = context->string->data; + + CTRACE((tfp, "SGML Comment:\n<%s>\n", s)); + + if (context->csi == NULL && + StrNCmp(s, "!--#", 4) == 0 && + LYCheckForCSI(context->node_anchor, &context->url) == TRUE) { + LYDoCSI(context->url, s, &context->csi); + } else { + LYCommentHacks(context->node_anchor, context->string->data); + } + + return; +} + +/* Handle identifier + * ----------------- + */ +static void handle_identifier(HTStream *context) +{ + const char *s = context->string->data; + + CTRACE((tfp, "SGML Identifier:\n<%s>\n", s)); + + return; +} + +/* Handle doctype + * -------------- + */ +static void handle_doctype(HTStream *context) +{ + const char *s = context->string->data; + + CTRACE((tfp, "SGML Doctype:\n<%s>\n", s)); + if (strstr(s, "DTD XHTML ") != 0) { + CTRACE((tfp, "...processing extended HTML\n")); + context->extended_html = TRUE; + } + + return; +} + +/* Handle marked + * ------------- + */ +static void handle_marked(HTStream *context) +{ + const char *s = context->string->data; + + CTRACE((tfp, "SGML Marked Section:\n<%s>\n", s)); + + if (!StrNCmp(context->string->data, "![INCLUDE[", 10)) { + context->string->data[context->string->size - 3] = '\0'; + StrAllocCat(context->include, context->string->data + 10); + /* @@@ This needs to take charset into account! @@@ + the wrong assumptions will be made about the data's + charset once it is in include - kw */ + + } else if (!StrNCmp(context->string->data, "![CDATA[", 8)) { + (*context->actions->put_block) (context->target, + context->string->data + 8, + context->string->size - 11); + + } + return; +} + +/* Handle processing instruction + * ----------------------------- + */ +static void handle_processing_instruction(HTStream *context) +{ + const char *s = context->string->data; + + CTRACE((tfp, "SGML Processing instruction:\n<%s>\n", s)); + + if (!StrNCmp(s, "?xml ", 5)) { + int flag = context->T.decode_utf8; + + context->strict_xml = TRUE; + /* + * Switch to UTF-8 if the encoding is explicitly "utf-8". + */ + if (!flag) { + char *t = strstr(s, "encoding="); + + if (t != 0) { + t += 9; + if (*t == '"') + ++t; + flag = !StrNCmp(t, "utf-8", 5); + } + if (flag) { + CTRACE((tfp, "...Use UTF-8 for XML\n")); + context->T.decode_utf8 = TRUE; + } + } + } + + return; +} + +/* Handle sgmlent + * -------------- + */ +static void handle_sgmlent(HTStream *context) +{ + const char *s = context->string->data; + + CTRACE((tfp, "SGML Entity Declaration:\n<%s>\n", s)); + + return; +} + +/* Handle sgmlent + * -------------- + */ +static void handle_sgmlele(HTStream *context) +{ + const char *s = context->string->data; + + CTRACE((tfp, "SGML Element Declaration:\n<%s>\n", s)); + + return; +} + +/* Handle sgmlatt + * -------------- + */ +static void handle_sgmlatt(HTStream *context) +{ + const char *s = context->string->data; + + CTRACE((tfp, "SGML Attribute Declaration:\n<%s>\n", s)); + + return; +} + +/* + * Convenience macros - tags (elements) are identified sometimes by an int or + * enum value ('TAGNUM'), sometimes by a pointer to HTTag ('TAGP'). - kw + */ +#define TAGNUM_OF_TAGP(t) (HTMLElement) (t - context->dtd->tags) +#define TAGP_OF_TAGNUM(e) (context->dtd->tags + e) + +/* + * The following implement special knowledge about OBJECT. As long as + * HTML_OBJECT is the only tag for which an alternative variant exist, they can + * be simple macros. - kw + */ +/* does 'TAGNUM' e have an alternative (variant) parsing mode? */ +#define HAS_ALT_TAGNUM(e) (e == HTML_OBJECT) + +/* return 'TAGNUM' of the alternative mode for 'TAGNUM' e, if any. */ +#define ALT_TAGNUM(e) ((e == HTML_OBJECT) ? HTML_ALT_OBJECT : e) + +/* return 'TAGNUM' of the normal mode for 'TAGNUM' e which may be alt. */ +#define NORMAL_TAGNUM(e) (((int)(e) >= HTML_ELEMENTS) ? HTML_OBJECT : (HTMLElement)e) + +/* More convenience stuff. - kw */ +#define ALT_TAGP_OF_TAGNUM(e) TAGP_OF_TAGNUM(ALT_TAGNUM(e)) +#define NORMAL_TAGP_OF_TAGNUM(e) TAGP_OF_TAGNUM(NORMAL_TAGNUM(e)) + +#define ALT_TAGP(t) ALT_TAGP_OF_TAGNUM(TAGNUM_OF_TAGP(t)) +#define NORMAL_TAGP(t) NORMAL_TAGP_OF_TAGNUM(TAGNUM_OF_TAGP(t)) + +static BOOL element_valid_within(HTTag * new_tag, HTTag * stacked_tag, int direct) +{ + BOOL result = YES; + TagClass usecontains, usecontained; + + if (stacked_tag && new_tag) { + usecontains = (direct ? stacked_tag->contains : stacked_tag->icontains); + usecontained = (direct ? new_tag->contained : new_tag->icontained); + if (new_tag == stacked_tag) { + result = (BOOL) ((Tgc_same & usecontains) && + (Tgc_same & usecontained)); + } else { + result = (BOOL) ((new_tag->tagclass & usecontains) && + (stacked_tag->tagclass & usecontained)); + } + } + return result; +} + +typedef enum { + close_NO = 0, + close_error = 1, + close_valid = 2 +} canclose_t; + +static canclose_t can_close(HTTag * new_tag, HTTag * stacked_tag) +{ + canclose_t result; + + if (!stacked_tag) { + result = close_NO; + } else if (stacked_tag->flags & Tgf_endO) { + result = close_valid; + } else if (new_tag == stacked_tag) { + result = ((Tgc_same & new_tag->canclose) + ? close_error + : close_NO); + } else { + result = ((stacked_tag->tagclass & new_tag->canclose) + ? close_error + : close_NO); + } + return result; +} + +static void do_close_stacked(HTStream *context) +{ + HTElement *stacked = context->element_stack; + HTMLElement e; + + if (!stacked) + return; /* stack was empty */ + if (context->inSELECT && !strcasecomp(stacked->tag->name, "SELECT")) { + context->inSELECT = FALSE; + } + e = NORMAL_TAGNUM(TAGNUM_OF_TAGP(stacked->tag)); +#ifdef USE_PRETTYSRC + if (!psrc_view) /* Don't actually pass call on if viewing psrc - kw */ +#endif + (*context->actions->end_element) (context->target, + (int) e, + &context->include); + context->element_stack = stacked->next; + pool_free(stacked); + context->no_lynx_specialcodes = + (BOOL) (context->element_stack + ? (context->element_stack->tag->flags & Tgf_nolyspcl) + : NO); +} + +static int is_on_stack(HTStream *context, HTTag * old_tag) +{ + HTElement *stacked = context->element_stack; + int i = 1; + + for (; stacked; stacked = stacked->next, i++) { + if (stacked->tag == old_tag || + stacked->tag == ALT_TAGP(old_tag)) + return i; + } + return 0; +} + +/* End element + * ----------- + */ +static void end_element(HTStream *context, HTTag * old_tag) +{ + BOOL extra_action_taken = NO; + canclose_t canclose_check = close_valid; + int stackpos = is_on_stack(context, old_tag); + + if (!Old_DTD) { + while (canclose_check != close_NO && + context->element_stack && + (stackpos > 1 || (!extra_action_taken && stackpos == 0))) { + if (stackpos == 0 && (old_tag->flags & Tgf_startO) && + element_valid_within(old_tag, context->element_stack->tag, YES)) { + CTRACE((tfp, "SGML: </%s> ignored\n", old_tag->name)); + return; + } + canclose_check = can_close(old_tag, context->element_stack->tag); + if (canclose_check != close_NO) { + CTRACE((tfp, "SGML: End </%s> \t<- %s end </%s>\n", + context->element_stack->tag->name, + ((canclose_check == close_valid) + ? "supplied," + : "***forced by"), + old_tag->name)); + do_close_stacked(context); + extra_action_taken = YES; + stackpos = is_on_stack(context, old_tag); + } + } + + if (stackpos == 0 && old_tag->contents != SGML_EMPTY) { + CTRACE((tfp, "SGML: Still open %s, ***no open %s for </%s>\n", + context->element_stack ? + context->element_stack->tag->name : "none", + old_tag->name, + old_tag->name)); + return; + } + if (stackpos > 1) { + CTRACE((tfp, + "SGML: Nesting <%s>...<%s> \t<- ***invalid end </%s>\n", + old_tag->name, + context->element_stack ? + context->element_stack->tag->name : "none", + old_tag->name)); + return; + } + } + /* Now let the non-extended code deal with the rest. - kw */ + + /* + * If we are in a SELECT block, ignore anything but a SELECT end tag. - FM + */ + if (context->inSELECT) { + if (!strcasecomp(old_tag->name, "SELECT")) { + /* + * Turn off the inSELECT flag and fall through. - FM + */ + context->inSELECT = FALSE; + } else { + /* + * Ignore the end tag. - FM + */ + CTRACE((tfp, "SGML: ***Ignoring end tag </%s> in SELECT block.\n", + old_tag->name)); + return; + } + } + /* + * Handle the end tag. - FM + */ + CTRACE((tfp, "SGML: End </%s>\n", old_tag->name)); + if (old_tag->contents == SGML_EMPTY) { + CTRACE((tfp, "SGML: ***Illegal end tag </%s> found.\n", + old_tag->name)); + return; + } +#ifdef WIND_DOWN_STACK + while (context->element_stack) /* Loop is error path only */ +#else + if (context->element_stack) /* Substitute and remove one stack element */ +#endif /* WIND_DOWN_STACK */ + { + int status = HT_OK; + HTMLElement e; + HTElement *N = context->element_stack; + HTTag *t = (N->tag != old_tag) ? NORMAL_TAGP(N->tag) : N->tag; + + if (old_tag != t) { /* Mismatch: syntax error */ + if (context->element_stack->next) { /* This is not the last level */ + CTRACE((tfp, + "SGML: Found </%s> when expecting </%s>. </%s> ***assumed.\n", + old_tag->name, t->name, t->name)); + } else { /* last level */ + CTRACE((tfp, + "SGML: Found </%s> when expecting </%s>. </%s> ***Ignored.\n", + old_tag->name, t->name, old_tag->name)); + return; /* Ignore */ + } + } + + e = NORMAL_TAGNUM(TAGNUM_OF_TAGP(t)); + CTRACE2(TRACE_SGML, (tfp, "tagnum(%p) = %d\n", (void *) t, (int) e)); +#ifdef USE_PRETTYSRC + if (!psrc_view) /* Don't actually pass call on if viewing psrc - kw */ +#endif + status = (*context->actions->end_element) (context->target, + (int) e, + &context->include); + if (status == HT_PARSER_REOPEN_ELT) { + CTRACE((tfp, "SGML: Restart <%s>\n", t->name)); + (*context->actions->start_element) (context->target, + (int) e, + NULL, + NULL, + context->current_tag_charset, + &context->include); + } else if (status == HT_PARSER_OTHER_CONTENT) { + CTRACE((tfp, "SGML: Continue with other content model for <%s>\n", t->name)); + context->element_stack->tag = ALT_TAGP_OF_TAGNUM(e); + } else { + context->element_stack = N->next; /* Remove from stack */ + pool_free(N); + } + context->no_lynx_specialcodes = + (BOOL) (context->element_stack + ? (context->element_stack->tag->flags & Tgf_nolyspcl) + : NO); +#ifdef WIND_DOWN_STACK + if (old_tag == t) + return; /* Correct sequence */ +#else + return; +#endif /* WIND_DOWN_STACK */ + + /* Syntax error path only */ + + } + CTRACE((tfp, "SGML: Extra end tag </%s> found and ignored.\n", + old_tag->name)); +} + +/* Start a element +*/ +static void start_element(HTStream *context) +{ + int status; + HTTag *new_tag = context->current_tag; + HTMLElement e = TAGNUM_OF_TAGP(new_tag); + BOOL ok = FALSE; + + BOOL valid = YES; + BOOL direct_container = YES; + BOOL extra_action_taken = NO; + canclose_t canclose_check = close_valid; + + if (!Old_DTD) { + while (context->element_stack && + (canclose_check == close_valid || + (canclose_check == close_error && + new_tag == context->element_stack->tag)) && + !(valid = element_valid_within(new_tag, + context->element_stack->tag, + direct_container))) { + canclose_check = can_close(new_tag, context->element_stack->tag); + if (canclose_check != close_NO) { + CTRACE((tfp, "SGML: End </%s> \t<- %s start <%s>\n", + context->element_stack->tag->name, + ((canclose_check == close_valid) + ? "supplied," + : "***forced by"), + new_tag->name)); + do_close_stacked(context); + extra_action_taken = YES; + if (canclose_check == close_error) + direct_container = NO; + } else { + CTRACE((tfp, + "SGML: Still open %s \t<- ***invalid start <%s>\n", + context->element_stack->tag->name, + new_tag->name)); + } + } + if (context->element_stack && !valid && + (context->element_stack->tag->flags & Tgf_strict) && + !(valid = element_valid_within(new_tag, + context->element_stack->tag, + direct_container))) { + CTRACE((tfp, "SGML: Still open %s \t<- ***ignoring start <%s>\n", + context->element_stack->tag->name, + new_tag->name)); + return; + } + + if (context->element_stack && + !extra_action_taken && + (canclose_check == close_NO) && + !valid && (new_tag->flags & Tgf_mafse)) { + BOOL has_attributes = NO; + int i = 0; + + for (; i < new_tag->number_of_attributes && !has_attributes; i++) + has_attributes = context->present[i]; + if (!has_attributes) { + CTRACE((tfp, + "SGML: Still open %s, ***converting invalid <%s> to </%s>\n", + context->element_stack->tag->name, + new_tag->name, + new_tag->name)); + end_element(context, new_tag); + return; + } + } + + if (context->element_stack && + (canclose_check == close_error) && + !element_valid_within(new_tag, + context->element_stack->tag, + direct_container)) { + CTRACE((tfp, "SGML: Still open %s \t<- ***invalid start <%s>\n", + context->element_stack->tag->name, + new_tag->name)); + } + } + /* Fall through to the non-extended code - kw */ + + /* + * If we are not in a SELECT block, check if this is a SELECT start tag. + * Otherwise (i.e., we are in a SELECT block) accept only OPTION as valid, + * terminate the SELECT block if it is any other form-related element, and + * otherwise ignore it. - FM + */ + if (!context->inSELECT) { + /* + * We are not in a SELECT block, so check if this starts one. - FM + * (frequent case!) + */ + /* my_casecomp() - optimized by the first character */ + if (!my_casecomp(new_tag->name, "SELECT")) { + /* + * Set the inSELECT flag and fall through. - FM + */ + context->inSELECT = TRUE; + } + } else { + /* + * We are in a SELECT block. - FM + */ + if (strcasecomp(new_tag->name, "OPTION")) { + /* + * Ugh, it is not an OPTION. - FM + */ + switch (e) { + case HTML_INPUT: + case HTML_TEXTAREA: + case HTML_SELECT: + case HTML_BUTTON: + case HTML_FIELDSET: + case HTML_LABEL: + case HTML_LEGEND: + case HTML_FORM: + ok = TRUE; + break; + default: + break; + } + if (ok) { + /* + * It is another form-related start tag, so terminate the + * current SELECT block and fall through. - FM + */ + CTRACE((tfp, + "SGML: ***Faking SELECT end tag before <%s> start tag.\n", + new_tag->name)); + end_element(context, SGMLFindTag(context->dtd, "SELECT")); + } else { + /* + * Ignore the start tag. - FM + */ + CTRACE((tfp, + "SGML: ***Ignoring start tag <%s> in SELECT block.\n", + new_tag->name)); + return; + } + } + } + /* + * Handle the start tag. - FM + */ + CTRACE((tfp, "SGML: Start <%s>\n", new_tag->name)); + status = (*context->actions->start_element) (context->target, + (int) TAGNUM_OF_TAGP(new_tag), + context->present, + (STRING2PTR) context->value, /* coerce type for think c */ + context->current_tag_charset, + &context->include); + if (status == HT_PARSER_OTHER_CONTENT) + new_tag = ALT_TAGP(new_tag); /* this is only returned for OBJECT */ + if (new_tag->contents != SGML_EMPTY) { /* i.e., tag not empty */ + HTElement *N = pool_alloc(); + + if (N == NULL) + outofmem(__FILE__, "start_element"); + + assert(N != NULL); + + N->next = context->element_stack; + N->tag = new_tag; + context->element_stack = N; + context->no_lynx_specialcodes = (BOOLEAN) (new_tag->flags & Tgf_nolyspcl); + + } else if (e == HTML_META) { + /* + * Check for result of META tag. - KW & FM + */ + change_chartrans_handling(context); + } +} + +/* Find Tag in DTD tag list + * ------------------------ + * + * On entry, + * dtd points to dtd structure including valid tag list + * string points to name of tag in question + * + * On exit, + * returns: + * NULL tag not found + * else address of tag structure in dtd + */ +HTTag *SGMLFindTag(const SGML_dtd * dtd, + const char *s) +{ + int high, low, i, diff; + static HTTag *last[64] = + {NULL}; /*optimize using the previous results */ + HTTag **res = last + (UCH(*s) % 64); /*pointer arithmetic */ + + if (*res) { + if ((*res)->name == NULL) + return NULL; + if (!strcasecomp((*res)->name, s)) + return *res; + } + + for (low = 0, high = dtd->number_of_tags; + high > low; + diff < 0 ? (low = i + 1) : (high = i)) { /* Binary search */ + i = (low + (high - low) / 2); + /* my_casecomp() - optimized by the first character, NOT_ASCII ok */ + diff = my_casecomp(dtd->tags[i].name, s); /* Case insensitive */ + if (diff == 0) { /* success: found it */ + *res = &dtd->tags[i]; + return *res; + } + } + if (IsNmStart(*s)) { + /* + * Unrecognized, but may be valid. - KW + */ + return &HTTag_unrecognized; + } + return NULL; +} + +/*________________________________________________________________________ + * Public Methods + */ + +/* Could check that we are back to bottom of stack! @@ */ +/* Do check! - FM */ +/* */ +static void SGML_free(HTStream *context) +{ + int i; + HTElement *cur; + HTTag *t; + + /* + * Free the buffers. - FM + */ + FREE(context->recover); + FREE(context->url); + FREE(context->csi); + FREE(context->include); + FREE(context->active_include); + + /* + * Wind down stack if any elements are open. - FM + */ + while (context->element_stack) { + cur = context->element_stack; + t = cur->tag; + context->element_stack = cur->next; /* Remove from stack */ + pool_free(cur); +#ifdef USE_PRETTYSRC + if (!psrc_view) /* Don't actually call on target if viewing psrc - kw */ +#endif + (*context->actions->end_element) + (context->target, + (int) NORMAL_TAGNUM(TAGNUM_OF_TAGP(t)), + &context->include); + FREE(context->include); + } + + /* + * Finish off the target. - FM + */ + (*context->actions->_free) (context->target); + + /* + * Free the strings and context structure. - FM + */ + HTChunkFree(context->string); + for (i = 0; i < MAX_ATTRIBUTES; i++) + FREE_extra(context->value[i]); + FREE(context); + +#ifdef USE_PRETTYSRC + sgml_in_psrc_was_initialized = FALSE; +#endif +} + +static void SGML_abort(HTStream *context, HTError e) +{ + int i; + HTElement *cur; + + /* + * Abort the target. - FM + */ + (*context->actions->_abort) (context->target, e); + + /* + * Free the buffers. - FM + */ + FREE(context->recover); + FREE(context->include); + FREE(context->active_include); + FREE(context->url); + FREE(context->csi); + + /* + * Free stack memory if any elements were left open. - KW + */ + while (context->element_stack) { + cur = context->element_stack; + context->element_stack = cur->next; /* Remove from stack */ + pool_free(cur); + } + + /* + * Free the strings and context structure. - FM + */ + HTChunkFree(context->string); + for (i = 0; i < MAX_ATTRIBUTES; i++) + FREE_extra(context->value[i]); + FREE(context); + +#ifdef USE_PRETTYSRC + sgml_in_psrc_was_initialized = FALSE; +#endif +} + +/* Read and write user callback handle + * ----------------------------------- + * + * The callbacks from the SGML parser have an SGML context parameter. + * These calls allow the caller to associate his own context with a + * particular SGML context. + */ + +#ifdef CALLERDATA +void *SGML_callerData(HTStream *context) +{ + return context->callerData; +} + +void SGML_setCallerData(HTStream *context, void *data) +{ + context->callerData = data; +} +#endif /* CALLERDATA */ + +#ifdef USE_PRETTYSRC +static void transform_tag(HTStream *context, HTChunk *string) +{ + if (!context->strict_xml) { + if (tagname_transform != 1) { + if (tagname_transform == 0) + LYLowerCase(string->data); + else + LYUpperCase(string->data); + } + } +} +#endif /* USE_PRETTYSRC */ + +static BOOL ignore_when_empty(HTTag * tag) +{ + BOOL result = FALSE; + + if (!LYPreparsedSource + && LYxhtml_parsing + && tag->name != 0 + && !(tag->flags & Tgf_mafse) + && tag->contents != SGML_EMPTY + && tag->tagclass != Tgc_Plike + && (tag->tagclass == Tgc_SELECTlike + || (tag->contains && tag->icontains))) { + result = TRUE; + } + CTRACE((tfp, "SGML Do%s ignore_when_empty:%s\n", + result ? "" : " not", + NonNull(tag->name))); + return result; +} + +static void discard_empty(HTStream *context) +{ + static HTTag empty_tag; + + CTRACE((tfp, "SGML discarding empty %s\n", + NonNull(context->current_tag->name))); + CTRACE_FLUSH(tfp); + + memset(&empty_tag, 0, sizeof(empty_tag)); + context->current_tag = &empty_tag; + context->string->size = 0; + + /* do not call end_element() if start_element() was not called */ +} + +#ifdef USE_PRETTYSRC +static BOOL end_if_prettysrc(HTStream *context, HTChunk *string, int end_ch) +{ + BOOL result = psrc_view; + + if (psrc_view) { + if (attr_is_name) { + HTStartAnchor(context->target, string->data, NULL); + (*context->actions->end_element) (context->target, + HTML_A, + &context->include); + } else if (attr_is_href) { + PSRCSTART(href); + HTStartAnchor(context->target, NULL, string->data); + } + PUTS_TR(string->data); + if (attr_is_href) { + (*context->actions->end_element) (context->target, + HTML_A, + &context->include); + PSRCSTOP(href); + } + if (end_ch) + PUTC(end_ch); + PSRCSTOP(attrval); + } + return result; +} +#endif + +static void SGML_character(HTStream *context, int c_in) +{ + const SGML_dtd *dtd = context->dtd; + HTChunk *string = context->string; + const char *EntityName; + HTTag *testtag = NULL; + BOOLEAN chk; /* Helps (?) walk through all the else ifs... */ + UCode_t clong, uck = 0; /* Enough bits for UCS4 ... */ + int testlast; + + unsigned char c; + unsigned char saved_char_in = '\0'; + + ++sgml_offset; + + /* + * Now some fun with the preprocessor. Use copies for c and unsign_c == + * clong, so that we can revert back to the unchanged c_in. - KW + */ +#define unsign_c clong + + c = UCH(c_in); + clong = UCH(c); /* a.k.a. unsign_c */ + + if (context->T.decode_utf8) { + /* + * Combine UTF-8 into Unicode. Incomplete characters silently ignored. + * From Linux kernel's console.c. - KW + */ + if (TOASCII(UCH(c)) > 127) { /* S/390 -- gil -- 0710 */ + /* + * We have an octet from a multibyte character. - FM + */ + if (context->utf_count > 0 && (TOASCII(c) & 0xc0) == 0x80) { + context->utf_char = (context->utf_char << 6) | (TOASCII(c) & 0x3f); + context->utf_count--; + *(context->utf_buf_p) = (char) c; + (context->utf_buf_p)++; + if (context->utf_count == 0) { + /* + * We have all of the bytes, so terminate the buffer and + * set 'clong' to the UCode_t value. - FM + */ + *(context->utf_buf_p) = '\0'; + clong = context->utf_char; + if (clong < 256) { + c = UCH(clong & 0xff); + } + /* lynx does not use left-to-right */ + if (clong == 0x200e) + return; + goto top1; + } else { + /* + * Wait for more. - KW + */ + return; + } + } else { + /* + * Start handling a new multibyte character. - FM + */ + context->utf_buf_p = context->utf_buf; + *(context->utf_buf_p) = (char) c; + (context->utf_buf_p)++; + if ((c & 0xe0) == 0xc0) { + context->utf_count = 1; + context->utf_char = (c & 0x1f); + } else if ((c & 0xf0) == 0xe0) { + context->utf_count = 2; + context->utf_char = (c & 0x0f); + } else if ((c & 0xf8) == 0xf0) { + context->utf_count = 3; + context->utf_char = (c & 0x07); + } else if ((c & 0xfc) == 0xf8) { + context->utf_count = 4; + context->utf_char = (c & 0x03); + } else if ((c & 0xfe) == 0xfc) { + context->utf_count = 5; + context->utf_char = (c & 0x01); + } else { + /* + * Garbage. - KW + */ + context->utf_count = 0; + context->utf_buf_p = context->utf_buf; + *(context->utf_buf_p) = '\0'; + } + /* + * Wait for more. - KW + */ + return; + } + } else { + /* + * Got an ASCII char. - KW + */ + context->utf_count = 0; + context->utf_buf_p = context->utf_buf; + *(context->utf_buf_p) = '\0'; + /* goto top; */ + } + } + /* end of context->T.decode_utf8 S/390 -- gil -- 0726 */ +#ifdef NOTDEFINED + /* + * If we have a koi8-r input and do not have koi8-r as the output, save the + * raw input in saved_char_in before we potentially convert it to Unicode. + * - FM + */ + if (context->T.strip_raw_char_in) + saved_char_in = c; +#endif /* NOTDEFINED */ + + /* + * If we want the raw input converted to Unicode, try that now. - FM + */ + if (context->T.trans_to_uni && +#ifdef EXP_JAPANESEUTF8_SUPPORT + ((strcmp(LYCharSet_UC[context->inUCLYhndl].MIMEname, "euc-jp") == 0) || + (strcmp(LYCharSet_UC[context->inUCLYhndl].MIMEname, "shift_jis") == 0))) { + if (strcmp(LYCharSet_UC[context->inUCLYhndl].MIMEname, "shift_jis") == 0) { + if (context->utf_count == 0) { + if (IS_SJIS_HI1((unsigned char) c) || + IS_SJIS_HI2((unsigned char) c)) { + context->utf_buf[0] = (char) c; + context->utf_count = 1; + clong = -11; + } + } else { + if (IS_SJIS_LO((unsigned char) c)) { + context->utf_buf[1] = (char) c; + clong = UCTransJPToUni(context->utf_buf, 2, context->inUCLYhndl); + } + context->utf_count = 0; + } + } else { + if (context->utf_count == 0) { + if (IS_EUC_HI((unsigned char) c)) { + context->utf_buf[0] = (char) c; + context->utf_count = 1; + clong = -11; + } + } else { + if (IS_EUC_LOX((unsigned char) c)) { + context->utf_buf[1] = (char) c; + clong = UCTransJPToUni(context->utf_buf, 2, context->inUCLYhndl); + } + context->utf_count = 0; + } + } + goto top1; + } else if (context->T.trans_to_uni && +#endif + ((TOASCII(unsign_c) >= LYlowest_eightbit[context->inUCLYhndl]) || /* S/390 -- gil -- 0744 */ + (unsign_c < ' ' && unsign_c != 0 && + context->T.trans_C0_to_uni))) { + /* + * Convert the octet to Unicode. - FM + */ + clong = UCTransToUni((char) c, context->inUCLYhndl); + if (clong > 0) { + saved_char_in = c; + if (clong < 256) { + c = FROMASCII(UCH(clong)); + } + } + goto top1; + } else if (unsign_c < ' ' && unsign_c != 0 && /* S/390 -- gil -- 0768 */ + context->T.trans_C0_to_uni) { + /* + * This else if may be too ugly to keep. - KW + */ + if (context->T.trans_from_uni && + (((clong = UCTransToUni((char) c, context->inUCLYhndl)) >= ' ') || + (context->T.transp && + (clong = UCTransToUni((char) c, context->inUCLYhndl)) > 0))) { + saved_char_in = c; + if (clong < 256) { + c = FROMASCII(UCH(clong)); + } + goto top1; + } else { + uck = -1; + if (context->T.transp) { + uck = UCTransCharStr(replace_buf, 60, (char) c, + context->inUCLYhndl, + context->inUCLYhndl, NO); + } + if (!context->T.transp || uck < 0) { + uck = UCTransCharStr(replace_buf, 60, (char) c, + context->inUCLYhndl, + context->outUCLYhndl, YES); + } + if (uck == 0) { + return; + } else if (uck < 0) { + goto top0a; + } + c = UCH(replace_buf[0]); + if (c && replace_buf[1]) { + if (context->state == S_text) { + PUTS(replace_buf); + return; + } + StrAllocCat(context->recover, replace_buf + 1); + } + goto top0a; + } /* Next line end of ugly stuff for C0. - KW */ + } else { /* end of context->T.trans_to_uni S/390 -- gil -- 0791 */ + goto top0a; + } + + /* + * At this point we have either unsign_c a.k.a. clong in Unicode (and c in + * latin1 if clong is in the latin1 range), or unsign_c and c will have to + * be passed raw. - KW + */ +/* + * We jump up to here from below if we have + * stuff in the recover, insert, or csi buffers + * to process. We zero saved_char_in, in effect + * as a flag that the octet is not that of the + * actual call to this function. This may be OK + * for now, for the stuff this function adds to + * its recover buffer, but it might not be for + * stuff other functions added to the insert or + * csi buffer, so bear that in mind. - FM + * Stuff from the recover buffer is now handled + * as UTF-8 if we can expect that's what it is, + * and in that case we don't come back up here. - kw + */ + top: + saved_char_in = '\0'; +/* + * We jump to here from above when we don't have + * UTF-8 input, haven't converted to Unicode, and + * want clong set to the input octet (unsigned) + * without zeroing its saved_char_in copy (which + * is signed). - FM + */ + top0a: + *(context->utf_buf) = '\0'; + clong = UCH(c); +/* + * We jump to here from above if we have converted + * the input, or a multibyte sequence across calls, + * to a Unicode value and loaded it into clong (to + * which unsign_c has been defined), and from below + * when we are recycling a character (e.g., because + * it terminated an entity but is not the standard + * semi-colon). The character will already have + * been put through the Unicode conversions. - FM + */ + top1: + /* + * Ignore low ISO 646 7-bit control characters if HTCJK is not set. - FM + */ + /* + * Works for both ASCII and EBCDIC. -- gil + * S/390 -- gil -- 0811 + */ + if (TOASCII(unsign_c) < 32 && + c != '\t' && c != '\n' && c != '\r' && + !IS_CJK_TTY) + goto after_switch; + + /* + * Ignore 127 if we don't have HTPassHighCtrlRaw or HTCJK set. - FM + */ +#define PASSHICTRL (context->T.transp || \ + unsign_c >= LYlowest_eightbit[context->inUCLYhndl]) + if (TOASCII(c) == 127 && /* S/390 -- gil -- 0830 */ + !(PASSHICTRL || IS_CJK_TTY)) + goto after_switch; + + /* + * Ignore 8-bit control characters 128 - 159 if neither HTPassHighCtrlRaw + * nor HTCJK is set. - FM + */ + if (TOASCII(unsign_c) > 127 && TOASCII(unsign_c) < 160 && /* S/390 -- gil -- 0847 */ + !(PASSHICTRL || IS_CJK_TTY)) { + /* + * If we happen to be reading from an "ISO-8859-1" or "US-ASCII" + * document, allow the cp-1252 codes, to accommodate the HTML5 draft + * recommendation for replacement encoding: + * + * http://www.whatwg.org/specs/web-apps/current-work/multipage/infrastructure.html#character-encodings-0 + */ + if (AssumeCP1252(context)) { + clong = LYcp1252ToUnicode((UCode_t) c); + goto top1; + } + goto after_switch; + } + + /* Almost all CJK characters are double byte but only Japanese + * JIS X0201 Kana is single byte. To prevent to fail SGML parsing + * we have to take care of them here. -- TH + */ + if ((HTCJK == JAPANESE) && (context->state == S_in_kanji) && + !IS_JAPANESE_2BYTE(context->kanji_buf, UCH(c)) +#ifdef EXP_JAPANESEUTF8_SUPPORT + && !context->T.decode_utf8 +#endif + ) { +#ifdef CONV_JISX0201KANA_JISX0208KANA + if (IS_SJIS_X0201KANA(context->kanji_buf)) { + unsigned char sjis_hi, sjis_lo; + + JISx0201TO0208_SJIS(context->kanji_buf, &sjis_hi, &sjis_lo); + PUTC(sjis_hi); + PUTC(sjis_lo); + } else +#endif + PUTC(context->kanji_buf); + context->state = S_text; + } + + /* + * Handle character based on context->state. + */ + CTRACE2(TRACE_SGML, (tfp, "SGML before %s|%.*s|%c|\n", + state_name(context->state), + string->size, + NonNull(string->data), + UCH(c))); + switch (context->state) { + + case S_in_kanji: + /* + * Note that if we don't have a CJK input, then this is not the second + * byte of a CJK di-byte, and we're trashing the input. That's why + * 8-bit characters followed by, for example, '<' can cause the tag to + * be treated as text, not markup. We could try to deal with it by + * holding each first byte and then checking byte pairs, but that + * doesn't seem worth the overhead (see below). - FM + */ + context->state = S_text; + PUTC(context->kanji_buf); + PUTC(c); + break; + + case S_tagname_slash: + /* + * We had something link "<name/" so far, set state to S_text but keep + * context->slashedtag as a flag; except if we get '>' directly + * after the "<name/", and really have a tag for that name in + * context->slashedtag, in which case keep state as is and let code + * below deal with it. - kw + */ + if (!(c == '>' && context->slashedtag && TOASCII(unsign_c) < 127)) { + context->state = S_text; + } + /* fall through in any case! */ + case S_text: + if (IS_CJK_TTY && ((TOASCII(c) & 0200) != 0) +#ifdef EXP_JAPANESEUTF8_SUPPORT + && !context->T.decode_utf8 +#endif + ) { /* S/390 -- gil -- 0864 */ + /* + * Setting up for Kanji multibyte handling (based on Takuya ASADA's + * (asada@three-a.co.jp) CJK Lynx). Note that if the input is not + * in fact CJK, the next byte also will be mishandled, as explained + * above. Toggle raw mode off in such cases, or select the "7 bit + * approximations" display character set, which is largely + * equivalent to having raw mode off with CJK. - FM + */ + context->state = S_in_kanji; + context->kanji_buf = c; + break; + } else if (IS_CJK_TTY && TOASCII(c) == '\033') { /* S/390 -- gil -- 0881 */ + /* + * Setting up for CJK escape sequence handling (based on Takuya + * ASADA's (asada@three-a.co.jp) CJK Lynx). - FM + */ + context->state = S_esc; + PUTC(c); + break; + } + + if (c == '&' || c == '<') { +#ifdef USE_PRETTYSRC + if (psrc_view) { /*there is nothing useful in the element_stack */ + testtag = context->current_tag; + } else +#endif + { + testtag = context->element_stack ? + context->element_stack->tag : NULL; + } + } + + if (c == '&' && TOASCII(unsign_c) < 127 && /* S/390 -- gil -- 0898 */ + (!testtag || + (testtag->contents == SGML_MIXED || + testtag->contents == SGML_ELEMENT || + testtag->contents == SGML_PCDATA || +#ifdef USE_PRETTYSRC + testtag->contents == SGML_EMPTY || +#endif + testtag->contents == SGML_RCDATA))) { + /* + * Setting up for possible entity, without the leading '&'. - FM + */ + string->size = 0; + context->state = S_ero; + } else if (c == '<' && TOASCII(unsign_c) < 127) { /* S/390 -- gil -- 0915 */ + /* + * Setting up for possible tag. - FM + */ + string->size = 0; + if (testtag && testtag->contents == SGML_PCDATA) { + context->state = S_pcdata; + } else if (testtag && (testtag->contents == SGML_LITTERAL + || testtag->contents == SGML_CDATA)) { + context->state = S_litteral; + } else if (testtag && (testtag->contents == SGML_SCRIPT)) { + context->state = S_script; + } else { + context->state = S_tag; + } + context->slashedtag = NULL; + } else if (context->slashedtag && + context->slashedtag->name && + (c == '/' || + (c == '>' && context->state == S_tagname_slash)) && + TOASCII(unsign_c) < 127) { + /* + * We got either the second slash of a pending "<NAME/blah blah/" + * shortref construct, or the '>' of a mere "<NAME/>". In both + * cases generate a "</NAME>" end tag in the recover buffer for + * reparsing unless NAME is really an empty element. - kw + */ +#ifdef USE_PRETTYSRC + if (psrc_view) { + PSRCSTART(abracket); + PUTC(c); + PSRCSTOP(abracket); + } else +#endif + if (context->slashedtag != context->unknown_tag && + !ReallyEmptyTag(context->slashedtag)) { + if (context->recover == NULL) { + StrAllocCopy(context->recover, "</"); + context->recover_index = 0; + } else { + StrAllocCat(context->recover, "</"); + } + StrAllocCat(context->recover, context->slashedtag->name); + StrAllocCat(context->recover, ">"); + } + context->slashedtag = NULL; + + } else if (context->element_stack && + (context->element_stack->tag->flags & Tgf_frecyc)) { + /* + * The element stack says we are within the contents of an element + * that the next stage (HTML.c) may want to feed us back again (via + * the *include string). So try to output text in UTF-8 if + * possible, using the same logic as for attribute values (which + * should be in line with what context->current_tag_charset + * indicates). - kw + */ + if (context->T.decode_utf8 && + *context->utf_buf) { + PUTS(context->utf_buf); + context->utf_buf_p = context->utf_buf; + *(context->utf_buf_p) = '\0'; + } else if (!IS_CJK_TTY && + (context->T.output_utf8 || + context->T.trans_from_uni)) { + if (LYIsASCII(clong)) { + PUTC(c); + } else if (clong == 0xfffd && saved_char_in && + HTPassEightBitRaw && + saved_char_in >= + LYlowest_eightbit[context->outUCLYhndl]) { + PUTUTF8((UCode_t) (0xf000 | saved_char_in)); + } else { + PUTUTF8(clong); + } + } else if (saved_char_in && context->T.use_raw_char_in) { + PUTC(saved_char_in); + } else { + PUTC(c); + } + +#define PASS8859SPECL context->T.pass_160_173_raw + /* + * Convert 160 (nbsp) to Lynx special character if neither + * HTPassHighCtrlRaw nor HTCJK is set. - FM + */ + } else if (unsign_c == CH_NBSP && /* S/390 -- gil -- 0932 */ + !context->no_lynx_specialcodes && + !(PASS8859SPECL || IS_CJK_TTY)) { + PUTC(HT_NON_BREAK_SPACE); + /* + * Convert 173 (shy) to Lynx special character if neither + * HTPassHighCtrlRaw nor HTCJK is set. - FM + */ + } else if (unsign_c == CH_SHY && /* S/390 -- gil -- 0949 */ + !context->no_lynx_specialcodes && + !(PASS8859SPECL || IS_CJK_TTY)) { + PUTC(LY_SOFT_HYPHEN); + /* + * Handle the case in which we think we have a character which + * doesn't need further processing (e.g., a koi8-r input for a + * koi8-r output). - FM + */ + } else if (context->T.use_raw_char_in && saved_char_in) { + /* + * Only if the original character is still in saved_char_in, + * otherwise we may be iterating from a goto top. - KW + */ + PUTC(saved_char_in); +/****************************************************************** + * I. LATIN-1 OR UCS2 TO DISPLAY CHARSET + ******************************************************************/ + } else if ((chk = (BOOL) (context->T.trans_from_uni && + TOASCII(unsign_c) >= 160)) && /* S/390 -- gil -- 0968 */ + (uck = UCTransUniChar(unsign_c, + context->outUCLYhndl)) >= ' ' && + uck < 256) { + CTRACE((tfp, "UCTransUniChar returned 0x%.2" PRI_UCode_t + ":'%c'.\n", + uck, FROMASCII((char)uck))); + /* + * We got one octet from the conversions, so use it. - FM + */ + PUTC(FROMASCII((char) uck)); + } else if ((chk && + (uck == -4 || + (context->T.repl_translated_C0 && + uck > 0 && uck < 32))) && + /* + * Not found; look for replacement string. - KW + */ + (uck = UCTransUniCharStr(replace_buf, 60, clong, + context->outUCLYhndl, + 0) >= 0)) { + /* + * Got a replacement string. No further tests for validity - + * assume that whoever defined replacement strings knew what she + * was doing. - KW + */ + PUTS(replace_buf); + /* + * If we're displaying UTF-8, try that now. - FM + */ + } else if (context->T.output_utf8 && PUTUTF8(clong)) { + ; /* do nothing more */ + /* + * If it's any other (> 160) 8-bit character, and we have not set + * HTPassEightBitRaw nor HTCJK, nor have the "ISO Latin 1" + * character set selected, back translate for our character set. - + * FM + */ +#define IncludesLatin1Enc \ + (context->outUCLYhndl == LATIN1 || \ + (context->outUCI && \ + (context->outUCI->enc & (UCT_CP_SUPERSETOF_LAT1)))) + +#define PASSHI8BIT (HTPassEightBitRaw || \ + (context->T.do_8bitraw && !context->T.trans_from_uni)) + + } else if (unsign_c > 160 && unsign_c < 256 && + !(PASSHI8BIT || IS_CJK_TTY) && + !IncludesLatin1Enc) { +#ifdef USE_PRETTYSRC + int psrc_view_backup = 0; +#endif + + string->size = 0; + EntityName = HTMLGetEntityName((UCode_t) (unsign_c - 160)); + HTChunkPuts(string, EntityName); + HTChunkTerminate(string); +#ifdef USE_PRETTYSRC + /* we need to disable it temporarily */ + if (psrc_view) { + psrc_view_backup = 1; + psrc_view = 0; + } +#endif + handle_entity(context, '\0'); +#ifdef USE_PRETTYSRC + /* we need to disable it temporarily */ + if (psrc_view_backup) + psrc_view = TRUE; +#endif + + string->size = 0; + if (!FoundEntity) + PUTC(';'); + /* + * If we get to here and have an ASCII char, pass the character. - + * KW + */ + } else if (TOASCII(unsign_c) < 127 && unsign_c > 0) { /* S/390 -- gil -- 0987 */ + PUTC(c); + /* + * If we get to here, and should have translated, translation has + * failed so far. - KW + * + * We should have sent UTF-8 output to the parser already, but what + * the heck, try again. - FM + */ + } else if (context->T.output_utf8 && *context->utf_buf) { + PUTS(context->utf_buf); + context->utf_buf_p = context->utf_buf; + *(context->utf_buf_p) = '\0'; +#ifdef NOTDEFINED + /* + * Check for a strippable koi8-r 8-bit character. - FM + */ + } else if (context->T.strip_raw_char_in && saved_char_in && + (saved_char_in >= 0xc0) && + (saved_char_in < 255)) { + /* + * KOI8 special: strip high bit, gives (somewhat) readable ASCII + * or KOI7 - it was constructed that way! - KW + */ + PUTC((saved_char_in & 0x7f)); + saved_char_in = '\0'; +#endif /* NOTDEFINED */ + /* + * If we don't actually want the character, make it safe and output + * that now. - FM + */ + } else if (TOASCII(UCH(c)) < /* S/390 -- gil -- 0997 */ + LYlowest_eightbit[context->outUCLYhndl] || + (context->T.trans_from_uni && !HTPassEightBitRaw)) { + /* + * If we get to here, pass the character. - FM + */ + } else { + PUTC(c); + } + break; + + /* + * Found '<' in SGML_PCDATA content; treat this mode nearly like + * S_litteral, but recognize '<!' and '<?' to filter out comments and + * processing instructions. - kw + */ + case S_pcdata: + if (!string->size && TOASCII(unsign_c) < 127) { /* first after '<' */ + if (c == '!') { /* <! */ + /* + * Terminate and set up for possible comment, identifier, + * declaration, or marked section as under S_tag. - kw + */ + context->state = S_exclamation; + context->lead_exclamation = TRUE; + context->doctype_bracket = FALSE; + context->first_bracket = FALSE; + HTChunkPutc(string, c); + break; + } else if (c == '?') { /* <? - ignore as a PI until '>' - kw */ + CTRACE((tfp, + "SGML: Found PI in PCDATA, junking it until '>'\n")); +#ifdef USE_PRETTYSRC + if (psrc_view) { + PSRCSTART(abracket); + PUTS("<?"); + PSRCSTOP(abracket); + } +#endif + context->state = S_pi; + break; + } + } + goto case_S_litteral; + + /* + * Found '<' in SGML_SCRIPT content; treat this mode nearly like + * S_litteral, but recognize '<!' to allow the content to be treated as + * a comment by lynx. + */ + case S_script: + if (!string->size && TOASCII(unsign_c) < 127) { /* first after '<' */ + if (c == '!') { /* <! */ + /* + * Terminate and set up for possible comment, identifier, + * declaration, or marked section as under S_tag. - kw + */ + context->state = S_exclamation; + context->lead_exclamation = TRUE; + context->doctype_bracket = FALSE; + context->first_bracket = FALSE; + HTChunkPutc(string, c); + break; + } + } + goto case_S_litteral; + + /* + * In litteral mode, waits only for specific end tag (for compatibility + * with old servers, and for Lynx). - FM + */ + case_S_litteral: + case S_litteral: + /*PSRC:this case not understood completely by HV, not done */ + HTChunkPutc(string, c); +#ifdef USE_PRETTYSRC + if (psrc_view) { + /* there is nothing useful in the element_stack */ + testtag = context->current_tag; + } else +#endif + testtag = (context->element_stack + ? context->element_stack->tag + : NULL); + + if (testtag == NULL || testtag->name == NULL) { + string->size--; + context->state = S_text; + goto top1; + } + + /* + * Normally when we get the closing ">", + * testtag contains something like "TITLE" + * string contains something like "/title>" + * so we decrement by 2 to compare the final character of each. + */ + testlast = string->size - 2 - context->trailing_spaces - context->leading_spaces; + + if (TOUPPER(c) != ((testlast < 0) + ? '/' + : testtag->name[testlast])) { + int i; + + /* + * If complete match, end litteral. + */ + if ((c == '>') && + testlast >= 0 && !testtag->name[testlast]) { +#ifdef USE_PRETTYSRC + if (psrc_view) { + char *trailing = NULL; + + if (context->trailing_spaces) { + StrAllocCopy(trailing, + string->data + + string->size + - 1 + - context->trailing_spaces); + trailing[context->trailing_spaces] = '\0'; + } + + PSRCSTART(abracket); + PUTS("</"); + PSRCSTOP(abracket); + PSRCSTART(tag); + + strcpy(string->data, context->current_tag->name); + transform_tag(context, string); + PUTS(string->data); + + if (trailing) { + PUTS(trailing); + FREE(trailing); + } + + PSRCSTOP(tag); + PSRCSTART(abracket); + PUTC('>'); + PSRCSTOP(abracket); + + context->current_tag = NULL; + } else +#endif + end_element(context, context->element_stack->tag); + + string->size = 0; + context->current_attribute_number = INVALID; + context->state = S_text; + context->leading_spaces = 0; + context->trailing_spaces = 0; + break; + } + + /* + * Allow whitespace between the "<" or ">" and the keyword, for + * error-recovery. + */ + if (isspace(UCH(c))) { + if (testlast == -1) { + context->leading_spaces += 1; + CTRACE2(TRACE_SGML, (tfp, "leading spaces: %d\n", context->leading_spaces)); + break; + } else if (testlast > 0) { + context->trailing_spaces += 1; + CTRACE2(TRACE_SGML, (tfp, "trailing spaces: %d\n", context->trailing_spaces)); + break; + } + } + + /* + * Mismatch - recover. + */ + context->leading_spaces = 0; + context->trailing_spaces = 0; + if (((testtag->contents != SGML_LITTERAL && + (testtag->flags & Tgf_strict)) || + (context->state == S_pcdata && + (testtag->flags & (Tgf_strict | Tgf_endO)))) && + (testlast > -1 && + (c == '>' || testlast > 0 || IsNmStart(c)))) { + context->state = S_end; + string->size--; + for (i = 0; i < string->size; i++) /* remove '/' */ + string->data[i] = string->data[i + 1]; + if ((string->size == 1) ? IsNmStart(c) : IsNmChar(c)) + break; + string->size--; + goto top1; + } + if (context->state == S_pcdata && + (testtag->flags & (Tgf_strict | Tgf_endO)) && + (testlast < 0 && IsNmStart(c))) { + context->state = S_tag; + break; + } + /* + * If Mismatch: recover string literally. + */ + PUTC('<'); + for (i = 0; i < string->size - 1; i++) /* recover, except last c */ + PUTC(string->data[i]); + string->size = 0; + context->state = S_text; + goto top1; /* to recover last c */ + } + break; + + /* + * Character reference (numeric entity) or named entity. + */ + case S_ero: + if (c == '#') { + /* + * Setting up for possible numeric entity. + */ + context->state = S_cro; /* &# is Char Ref Open */ + break; + } + context->state = S_entity; /* Fall through! */ + + /* + * Handle possible named entity. + */ + case S_entity: + if (TOASCII(unsign_c) < 127 && (string->size ? /* S/390 -- gil -- 1029 */ + isalnum(UCH(c)) : isalpha(UCH(c)))) { + /* Should probably use IsNmStart/IsNmChar above (is that right?), + but the world is not ready for that - there's  : (note + colon!) and stuff around. */ + /* + * Accept valid ASCII character. - FM + */ + HTChunkPutc(string, c); + } else if (string->size == 0) { + /* + * It was an ampersand that's just text, so output the ampersand + * and recycle this character. - FM + */ +#ifdef USE_PRETTYSRC + if (psrc_view) + PSRCSTART(badseq); +#endif + PUTC('&'); +#ifdef USE_PRETTYSRC + if (psrc_view) + PSRCSTOP(badseq); +#endif + context->state = S_text; + goto top1; + } else { + /* + * Terminate entity name and try to handle it. - FM + */ + HTChunkTerminate(string); +#ifdef USE_PRETTYSRC + entity_string = string->data; +#endif + /* S/390 -- gil -- 1039 */ + /* CTRACE((tfp, "%s: %d: %s\n", __FILE__, __LINE__, string->data)); */ + if (!strcmp(string->data, "zwnj") && + (!context->element_stack || + (context->element_stack->tag && + context->element_stack->tag->contents == SGML_MIXED))) { + /* + * Handle zwnj (8204) as <WBR>. - FM + */ + char temp[8]; + + CTRACE((tfp, + "SGML_character: Handling 'zwnj' entity as 'WBR' element.\n")); + + if (c != ';') { + sprintf(temp, "<WBR>%c", c); + } else { + sprintf(temp, "<WBR>"); + } + if (context->recover == NULL) { + StrAllocCopy(context->recover, temp); + context->recover_index = 0; + } else { + StrAllocCat(context->recover, temp); + } + string->size = 0; + context->state = S_text; + break; + } else { + handle_entity(context, '\0'); + } + string->size = 0; + context->state = S_text; + /* + * Don't eat the terminator if we didn't find the entity name and + * therefore sent the raw string via handle_entity(), or if the + * terminator is not the "standard" semi-colon for HTML. - FM + */ +#ifdef USE_PRETTYSRC + if (psrc_view && FoundEntity && c == ';') { + PSRCSTART(entity); + PUTC(c); + PSRCSTOP(entity); + } +#endif + if (!FoundEntity || c != ';') + goto top1; + } + break; + + /* + * Check for a numeric entity. + */ + case S_cro: + if (TOASCII(unsign_c) < 127 && TOLOWER(UCH(c)) == 'x') { /* S/390 -- gil -- 1060 */ + context->isHex = TRUE; + context->state = S_incro; + } else if (TOASCII(unsign_c) < 127 && isdigit(UCH(c))) { + /* + * Accept only valid ASCII digits. - FM + */ + HTChunkPutc(string, c); /* accumulate a character NUMBER */ + context->isHex = FALSE; + context->state = S_incro; + } else if (string->size == 0) { + /* + * No 'x' or digit following the "&#" so recover them and recycle + * the character. - FM + */ +#ifdef USE_PRETTYSRC + if (psrc_view) + PSRCSTART(badseq); +#endif + PUTC('&'); + PUTC('#'); +#ifdef USE_PRETTYSRC + if (psrc_view) + PSRCSTOP(badseq); +#endif + context->state = S_text; + goto top1; + } + break; + + /* + * Handle a numeric entity. + */ + case S_incro: + /* S/390 -- gil -- 1075 */ + if ((TOASCII(unsign_c) < 127) && + (context->isHex + ? isxdigit(UCH(c)) + : isdigit(UCH(c)))) { + /* + * Accept only valid hex or ASCII digits. - FM + */ + HTChunkPutc(string, c); /* accumulate a character NUMBER */ + } else if (string->size == 0) { + /* + * No hex digit following the "&#x" so recover them and recycle the + * character. - FM + */ +#ifdef USE_PRETTYSRC + if (psrc_view) + PSRCSTART(badseq); +#endif + PUTS("&#x"); +#ifdef USE_PRETTYSRC + if (psrc_view) + PSRCSTOP(badseq); +#endif + context->isHex = FALSE; + context->state = S_text; + goto top1; + } else { + /* + * Terminate the numeric entity and try to handle it. - FM + */ + UCode_t code; + int i; + + HTChunkTerminate(string); +#ifdef USE_PRETTYSRC + entity_string = string->data; +#endif + if (UCScanCode(&code, string->data, context->isHex)) { + +/* =============== work in ASCII below here =============== S/390 -- gil -- 1092 */ + if (AssumeCP1252(context)) { + code = LYcp1252ToUnicode(code); + } + /* + * Check for special values. - FM + */ + if ((code == 8204) && + (!context->element_stack || + (context->element_stack->tag && + context->element_stack->tag->contents == SGML_MIXED))) { + /* + * Handle zwnj (8204) as <WBR>. - FM + */ + char temp[8]; + + CTRACE((tfp, + "SGML_character: Handling '8204' (zwnj) reference as 'WBR' element.\n")); + + /* + * Include the terminator if it is not the standard + * semi-colon. - FM + */ + if (c != ';') { + sprintf(temp, "<WBR>%c", c); + } else { + sprintf(temp, "<WBR>"); + } + /* + * Add the replacement string to the recover buffer for + * processing. - FM + */ + if (context->recover == NULL) { + StrAllocCopy(context->recover, temp); + context->recover_index = 0; + } else { + StrAllocCat(context->recover, temp); + } + string->size = 0; + context->isHex = FALSE; + context->state = S_text; + break; + } else if (put_special_unicodes(context, code)) { + /* + * We handled the value as a special character, so recycle + * the terminator or break. - FM + */ +#ifdef USE_PRETTYSRC + if (psrc_view) { + PSRCSTART(entity); + PUTS((context->isHex ? "&#x" : "&#")); + PUTS(entity_string); + if (c == ';') + PUTC(';'); + PSRCSTOP(entity); + } +#endif + string->size = 0; + context->isHex = FALSE; + context->state = S_text; + if (c != ';') + goto top1; + break; + } + /* + * Seek a translation from the chartrans tables. + */ + if ((uck = UCTransUniChar(code, + context->outUCLYhndl)) >= 32 && + uck < 256 && + (uck < 127 || + uck >= LYlowest_eightbit[context->outUCLYhndl])) { +#ifdef USE_PRETTYSRC + if (!psrc_view) { +#endif + PUTC(FROMASCII((char) uck)); +#ifdef USE_PRETTYSRC + } else { + put_pretty_number(context); + } +#endif + } else if ((uck == -4 || + (context->T.repl_translated_C0 && + uck > 0 && uck < 32)) && + /* + * Not found; look for replacement string. + */ + (uck = UCTransUniCharStr(replace_buf, 60, code, + context->outUCLYhndl, + 0) >= 0)) { +#ifdef USE_PRETTYSRC + if (psrc_view) { + put_pretty_number(context); + } else +#endif + PUTS(replace_buf); + /* + * If we're displaying UTF-8, try that now. - FM + */ + } else if (context->T.output_utf8 && PUTUTF8(code)) { + ; /* do nothing more */ + /* + * Ignore 8205 (zwj), 8206 (lrm), and 8207 (rln), if we get + * to here. - FM + */ + } else if (code == 8205 || + code == 8206 || + code == 8207) { + if (TRACE) { + string->size--; + LYStrNCpy(replace_buf, + string->data, + (string->size < 64 ? string->size : 63)); + fprintf(tfp, + "SGML_character: Ignoring '%s%s'.\n", + (context->isHex ? "&#x" : "&#"), + replace_buf); + } +#ifdef USE_PRETTYSRC + if (psrc_view) { + PSRCSTART(badseq); + PUTS((context->isHex ? "&#x" : "&#")); + PUTS(entity_string); + if (c == ';') + PUTC(';'); + PSRCSTOP(badseq); + } +#endif + string->size = 0; + context->isHex = FALSE; + context->state = S_text; + if (c != ';') + goto top1; + break; + /* + * Show the numeric entity if we get to here and the value: + * (1) Is greater than 255 (but use ASCII characters for + * spaces or dashes). + * (2) Is less than 32, and not valid or we don't have + * HTCJK set. + * (3) Is 127 and we don't have HTPassHighCtrlRaw or HTCJK + * set. + * (4) Is 128 - 159 and we don't have HTPassHighCtrlNum + * set. + * - FM + */ + } else if ((code > 255) || + (code < ' ' && /* S/390 -- gil -- 1140 */ + code != '\t' && code != '\n' && code != '\r' && + !IS_CJK_TTY) || + (TOASCII(code) == 127 && + !(HTPassHighCtrlRaw || IS_CJK_TTY)) || + (TOASCII(code) > 127 && code < 160 && + !HTPassHighCtrlNum)) { + /* + * Unhandled or illegal value. Recover the "&#" or "&#x" + * and digit(s), and recycle the terminator. - FM + */ +#ifdef USE_PRETTYSRC + if (psrc_view) { + PSRCSTART(badseq); + } +#endif + if (context->isHex) { + PUTS("&#x"); + context->isHex = FALSE; + } else { + PUTS("&#"); + } + string->size--; + for (i = 0; i < string->size; i++) /* recover */ + PUTC(string->data[i]); +#ifdef USE_PRETTYSRC + if (psrc_view) { + PSRCSTOP(badseq); + } +#endif + string->size = 0; + context->isHex = FALSE; + context->state = S_text; + goto top1; + } else if (TOASCII(code) < 161 || /* S/390 -- gil -- 1162 */ + HTPassEightBitNum || + IncludesLatin1Enc) { + /* + * No conversion needed. - FM + */ +#ifdef USE_PRETTYSRC + if (psrc_view) { + put_pretty_number(context); + } else +#endif + PUTC(FROMASCII((char) code)); + } else { + /* + * Handle as named entity. - FM + */ + code -= 160; + EntityName = HTMLGetEntityName(code); + if (EntityName && EntityName[0] != '\0') { + string->size = 0; + HTChunkPuts(string, EntityName); + HTChunkTerminate(string); + handle_entity(context, '\0'); + /* + * Add a semi-colon if something went wrong and + * handle_entity() sent the string. - FM + */ + if (!FoundEntity) { + PUTC(';'); + } + } else { + /* + * Our conversion failed, so recover the "&#" and + * digit(s), and recycle the terminator. - FM + */ +#ifdef USE_PRETTYSRC + if (psrc_view) + PSRCSTART(badseq); +#endif + if (context->isHex) { + PUTS("&#x"); + context->isHex = FALSE; + } else { + PUTS("&#"); + } + string->size--; + for (i = 0; i < string->size; i++) /* recover */ + PUTC(string->data[i]); +#ifdef USE_PRETTYSRC + if (psrc_view) + PSRCSTOP(badseq); +#endif + string->size = 0; + context->isHex = FALSE; + context->state = S_text; + goto top1; + } + } + /* + * If we get to here, we succeeded. Hoorah!!! - FM + */ + string->size = 0; + context->isHex = FALSE; + context->state = S_text; + /* + * Don't eat the terminator if it's not the "standard" + * semi-colon for HTML. - FM + */ + if (c != ';') { + goto top1; + } + } else { + /* + * Not an entity, and don't know why not, so add the terminator + * to the string, output the "&#" or "&#x", and process the + * string via the recover element. - FM + */ + string->size--; + HTChunkPutc(string, c); + HTChunkTerminate(string); +#ifdef USE_PRETTYSRC + if (psrc_view) + PSRCSTART(badseq); +#endif + if (context->isHex) { + PUTS("&#x"); + context->isHex = FALSE; + } else { + PUTS("&#"); + } +#ifdef USE_PRETTYSRC + if (psrc_view) + PSRCSTOP(badseq); +#endif + if (context->recover == NULL) { + StrAllocCopy(context->recover, string->data); + context->recover_index = 0; + } else { + StrAllocCat(context->recover, string->data); + } + string->size = 0; + context->isHex = FALSE; + context->state = S_text; + break; + } + } + break; + + /* + * Tag + */ + case S_tag: /* new tag */ + if (TOASCII(unsign_c) < 127 && (string->size ? /* S/390 -- gil -- 1179 */ + IsNmChar(c) : IsNmStart(c))) { + /* + * Add valid ASCII character. - FM + */ + HTChunkPutc(string, c); + } else if (c == '!' && !string->size) { /* <! */ + /* + * Terminate and set up for possible comment, identifier, + * declaration, or marked section. - FM + */ + context->state = S_exclamation; + context->lead_exclamation = TRUE; + context->doctype_bracket = FALSE; + context->first_bracket = FALSE; + HTChunkPutc(string, c); + break; + } else if (!string->size && + (TOASCII(unsign_c) <= 160 && /* S/390 -- gil -- 1196 */ + (c != '/' && c != '?' && c != '_' && c != ':'))) { + /* + * '<' must be followed by an ASCII letter to be a valid start tag. + * Here it isn't, nor do we have a '/' for an end tag, nor one of + * some other characters with a special meaning for SGML or which + * are likely to be legal Name Start characters in XML or some + * other extension. So recover the '<' and following character as + * data. - FM & KW + */ + context->state = S_text; +#ifdef USE_PRETTYSRC + if (psrc_view) + PSRCSTART(badseq); +#endif + PUTC('<'); +#ifdef USE_PRETTYSRC + if (psrc_view) + PSRCSTOP(badseq); +#endif + goto top1; + } else { /* End of tag name */ + /* + * Try to handle tag. - FM + */ + HTTag *t; + + if (c == '/') { + if (string->size == 0) { + context->state = S_end; + break; + } + CTRACE((tfp, "SGML: `<%.*s/' found!\n", string->size, string->data)); + } + HTChunkTerminate(string); + + t = SGMLFindTag(dtd, string->data); + if (t == context->unknown_tag && + ((c == ':' && + string->size == 4 && 0 == strcasecomp(string->data, "URL")) || + (string->size > 4 && 0 == strncasecomp(string->data, "URL:", 4)))) { + /* + * Treat <URL: as text rather than a junk tag, so we display + * it and the URL (Lynxism 8-). - FM + */ +#ifdef USE_PRETTYSRC + if (psrc_view) + PSRCSTART(badseq); +#endif + PUTC('<'); + PUTS(string->data); /* recover */ + PUTC(c); +#ifdef USE_PRETTYSRC + if (psrc_view) + PSRCSTOP(badseq); +#endif + CTRACE((tfp, "SGML: Treating <%s%c as text\n", + string->data, c)); + string->size = 0; + context->state = S_text; + break; + } + if (c == '/' && t) { + /* + * Element name was ended by '/'. Remember the tag that ended + * thusly, we'll interpret this as either an indication of an + * empty element (if '>' follows directly) or do some + * SGMLshortref-ish treatment. - kw + */ + context->slashedtag = t; + } + if (!t) { + if (c == '?' && string->size <= 1) { + CTRACE((tfp, "SGML: Found PI, looking for '>'\n")); +#ifdef USE_PRETTYSRC + if (psrc_view) { + PSRCSTART(abracket); + PUTS("<?"); + PSRCSTOP(abracket); + } +#endif + string->size = 0; + context->state = S_pi; + HTChunkPutc(string, c); + break; + } + CTRACE((tfp, "SGML: *** Invalid element %s\n", + string->data)); + +#ifdef USE_PRETTYSRC + if (psrc_view) { + PSRCSTART(abracket); + PUTC('<'); + PSRCSTOP(abracket); + PSRCSTART(badtag); + transform_tag(context, string); + PUTS(string->data); + if (c == '>') { + PSRCSTOP(badtag); + PSRCSTART(abracket); + PUTC('>'); + PSRCSTOP(abracket); + } else { + PUTC(c); + } + } +#endif + context->state = (c == '>') ? S_text : S_junk_tag; + break; + } else if (t == context->unknown_tag) { + CTRACE((tfp, "SGML: *** Unknown element \"%s\"\n", + string->data)); + /* + * Fall through and treat like valid tag for attribute parsing. + * - KW + */ + + } + context->current_tag = t; + +#ifdef USE_PRETTYSRC + if (psrc_view) { + PSRCSTART(abracket); + PUTC('<'); + PSRCSTOP(abracket); + if (t != context->unknown_tag) + PSRCSTART(tag); + else + PSRCSTART(badtag); + transform_tag(context, string); + PUTS(string->data); + if (t != context->unknown_tag) + PSRCSTOP(tag); + else + PSRCSTOP(badtag); + } + if (!psrc_view) /*don't waste time */ +#endif + { + /* + * Clear out attributes. + */ + memset((void *) context->present, 0, sizeof(BOOL) * + (unsigned) (context->current_tag->number_of_attributes)); + } + + string->size = 0; + context->current_attribute_number = INVALID; +#ifdef USE_PRETTYSRC + if (psrc_view) { + if (c == '>' || c == '<' || (c == '/' && context->slashedtag)) { + if (c != '<') { + PSRCSTART(abracket); + PUTC(c); + PSRCSTOP(abracket); + context->state = (c == '>') ? S_text : S_tagname_slash; + } else { + context->state = S_tag; + } + } else { + if (!WHITE(c)) + PUTC(c); + context->state = S_tag_gap; + } + } else +#endif + if (c == '>' || c == '<' || (c == '/' && context->slashedtag)) { + if (context->current_tag->name) + start_element(context); + context->state = (c == '>') ? S_text : + (c == '<') ? S_tag : S_tagname_slash; + } else { + context->state = S_tag_gap; + } + } + break; + + case S_exclamation: + if (context->lead_exclamation && c == '-') { + /* + * Set up for possible comment. - FM + */ + context->lead_exclamation = FALSE; + context->first_dash = TRUE; + HTChunkPutc(string, c); + break; + } + if (context->lead_exclamation && c == '[') { + /* + * Set up for possible marked section. - FM + */ + context->lead_exclamation = FALSE; + context->first_bracket = TRUE; + context->second_bracket = FALSE; + HTChunkPutc(string, c); + context->state = S_marked; + break; + } + if (context->first_dash && c == '-') { + /* + * Set up to handle comment. - FM + */ + context->lead_exclamation = FALSE; + context->first_dash = FALSE; + context->end_comment = FALSE; + HTChunkPutc(string, c); + context->state = S_comment; + break; + } + context->lead_exclamation = FALSE; + context->first_dash = FALSE; + if (c == '>') { + /* + * Try to handle identifier. - FM + */ + HTChunkTerminate(string); +#ifdef USE_PRETTYSRC + if (psrc_view) { + PSRCSTART(sgmlspecial); + PUTC('<'); + PUTS(string->data); + PUTC('>'); + PSRCSTOP(sgmlspecial); + } else +#endif + handle_identifier(context); + string->size = 0; + context->state = S_text; + break; + } + if (WHITE(c)) { + if (string->size == 8 && + !strncasecomp(string->data, "!DOCTYPE", 8)) { + /* + * Set up for DOCTYPE declaration. - FM + */ + HTChunkPutc(string, c); + context->doctype_bracket = FALSE; + context->state = S_doctype; + break; + } + if (string->size == 7 && + !strncasecomp(string->data, "!ENTITY", 7)) { + /* + * Set up for ENTITY declaration. - FM + */ + HTChunkPutc(string, c); + context->first_dash = FALSE; + context->end_comment = TRUE; + context->state = S_sgmlent; + break; + } + if (string->size == 8 && + !strncasecomp(string->data, "!ELEMENT", 8)) { + /* + * Set up for ELEMENT declaration. - FM + */ + HTChunkPutc(string, c); + context->first_dash = FALSE; + context->end_comment = TRUE; + context->state = S_sgmlele; + break; + } + if (string->size == 8 && + !strncasecomp(string->data, "!ATTLIST", 8)) { + /* + * Set up for ATTLIST declaration. - FM + */ + HTChunkPutc(string, c); + context->first_dash = FALSE; + context->end_comment = TRUE; + context->state = S_sgmlatt; + break; + } + } + HTChunkPutc(string, c); + break; + + case S_comment: /* Expecting comment. - FM */ + if (historical_comments) { + /* + * Any '>' terminates. - FM + */ + if (c == '>') { + HTChunkTerminate(string); +#ifdef USE_PRETTYSRC + if (psrc_view) { + PSRCSTART(comm); + PUTC('<'); + PUTS_TR(string->data); + PUTC('>'); + PSRCSTOP(comm); + } else +#endif + handle_comment(context); + string->size = 0; + context->end_comment = FALSE; + context->first_dash = FALSE; + context->state = S_text; + break; + } + goto S_comment_put_c; + } + if (!context->first_dash && c == '-') { + HTChunkPutc(string, c); + context->first_dash = TRUE; + break; + } + if (context->first_dash && c == '-') { + HTChunkPutc(string, c); + context->first_dash = FALSE; + if (!context->end_comment) + context->end_comment = TRUE; + else if (!minimal_comments) + /* + * Validly treat '--' pairs as successive comments (for + * minimal, any "--WHITE>" terminates). - FM + */ + context->end_comment = FALSE; + break; + } + if (context->end_comment && c == '>') { + /* + * Terminate and handle the comment. - FM + */ + HTChunkTerminate(string); +#ifdef USE_PRETTYSRC + if (psrc_view) { + PSRCSTART(comm); + PUTC('<'); + PUTS_TR(string->data); + PUTC('>'); + PSRCSTOP(comm); + } else +#endif + handle_comment(context); + string->size = 0; + context->end_comment = FALSE; + context->first_dash = FALSE; + context->state = S_text; + break; + } + context->first_dash = FALSE; + if (context->end_comment && !isspace(UCH(c))) + context->end_comment = FALSE; + + S_comment_put_c: + if (context->T.decode_utf8 && + *context->utf_buf) { + HTChunkPuts(string, context->utf_buf); + context->utf_buf_p = context->utf_buf; + *(context->utf_buf_p) = '\0'; + } else if (!IS_CJK_TTY && + (context->T.output_utf8 || + context->T.trans_from_uni)) { + if (clong == 0xfffd && saved_char_in && + HTPassEightBitRaw && + saved_char_in >= + LYlowest_eightbit[context->outUCLYhndl]) { + HTChunkPutUtf8Char(string, + (UCode_t) (0xf000 | saved_char_in)); + } else { + HTChunkPutUtf8Char(string, clong); + } + } else if (saved_char_in && context->T.use_raw_char_in) { + HTChunkPutc(string, saved_char_in); + } else { + HTChunkPutc(string, c); + } + break; + + case S_doctype: /* Expecting DOCTYPE. - FM */ + if (context->doctype_bracket) { + HTChunkPutc(string, c); + if (c == ']') + context->doctype_bracket = FALSE; + break; + } + if (c == '[' && WHITE(string->data[string->size - 1])) { + HTChunkPutc(string, c); + context->doctype_bracket = TRUE; + break; + } + if (c == '>') { + HTChunkTerminate(string); +#ifdef USE_PRETTYSRC + if (psrc_view) { + PSRCSTART(sgmlspecial); + PUTC('<'); + PUTS(string->data); + PUTC('>'); + PSRCSTOP(sgmlspecial); + } else +#endif + handle_doctype(context); + string->size = 0; + context->state = S_text; + break; + } + HTChunkPutc(string, c); + break; + + case S_marked: /* Expecting marked section. - FM */ + if (context->first_bracket && c == '[') { + HTChunkPutc(string, c); + context->first_bracket = FALSE; + context->second_bracket = TRUE; + break; + } + if (context->second_bracket && c == ']' && + string->data[string->size - 1] == ']') { + HTChunkPutc(string, c); + context->second_bracket = FALSE; + break; + } + if (!context->second_bracket && c == '>') { + HTChunkTerminate(string); +#ifdef USE_PRETTYSRC + if (psrc_view) { + PSRCSTART(sgmlspecial); + PUTC('<'); + PUTS(string->data); + PUTC('>'); + PSRCSTOP(sgmlspecial); + } else +#endif + handle_marked(context); + string->size = 0; + context->state = S_text; + break; + } + HTChunkPutc(string, c); + break; + + case S_sgmlent: /* Expecting ENTITY. - FM */ + if (!context->first_dash && c == '-') { + HTChunkPutc(string, c); + context->first_dash = TRUE; + break; + } + if (context->first_dash && c == '-') { + HTChunkPutc(string, c); + context->first_dash = FALSE; + if (!context->end_comment) + context->end_comment = TRUE; + else + context->end_comment = FALSE; + break; + } + if (context->end_comment && c == '>') { + HTChunkTerminate(string); +#ifdef USE_PRETTYSRC + if (psrc_view) { + PSRCSTART(sgmlspecial); + PUTC('<'); + PUTS(string->data); + PUTC('>'); + PSRCSTOP(sgmlspecial); + } else +#endif + handle_sgmlent(context); + string->size = 0; + context->end_comment = FALSE; + context->first_dash = FALSE; + context->state = S_text; + break; + } + context->first_dash = FALSE; + HTChunkPutc(string, c); + break; + + case S_sgmlele: /* Expecting ELEMENT. - FM */ + if (!context->first_dash && c == '-') { + HTChunkPutc(string, c); + context->first_dash = TRUE; + break; + } + if (context->first_dash && c == '-') { + HTChunkPutc(string, c); + context->first_dash = FALSE; + if (!context->end_comment) + context->end_comment = TRUE; + else + context->end_comment = FALSE; + break; + } + if (context->end_comment && c == '>') { + HTChunkTerminate(string); +#ifdef USE_PRETTYSRC + if (psrc_view) { + PSRCSTART(sgmlspecial); + PUTC('<'); + PUTS(string->data); + PUTC('>'); + PSRCSTOP(sgmlspecial); + } else +#endif + handle_sgmlele(context); + string->size = 0; + context->end_comment = FALSE; + context->first_dash = FALSE; + context->state = S_text; + break; + } + context->first_dash = FALSE; + HTChunkPutc(string, c); + break; + + case S_sgmlatt: /* Expecting ATTLIST. - FM */ + if (!context->first_dash && c == '-') { + HTChunkPutc(string, c); + context->first_dash = TRUE; + break; + } + if (context->first_dash && c == '-') { + HTChunkPutc(string, c); + context->first_dash = FALSE; + if (!context->end_comment) + context->end_comment = TRUE; + else + context->end_comment = FALSE; + break; + } + if (context->end_comment && c == '>') { + HTChunkTerminate(string); +#ifdef USE_PRETTYSRC + if (psrc_view) { + PSRCSTART(sgmlspecial); + PUTC('<'); + PUTS(string->data); + PUTC('>'); + PSRCSTOP(sgmlspecial); + } else +#endif + handle_sgmlatt(context); + string->size = 0; + context->end_comment = FALSE; + context->first_dash = FALSE; + context->state = S_text; + break; + } + context->first_dash = FALSE; + HTChunkPutc(string, c); + break; + + case S_tag_gap: /* Expecting attribute or '>' */ + if (WHITE(c)) { + /* PUTC(c); - no, done as special case */ + break; /* Gap between attributes */ + } + if (c == '>') { /* End of tag */ +#ifdef USE_PRETTYSRC + if (!psrc_view) +#endif + if (context->current_tag->name) + start_element(context); +#ifdef USE_PRETTYSRC + if (psrc_view) { + PSRCSTART(abracket); + PUTC('>'); + PSRCSTOP(abracket); + } +#endif + context->state = S_text; + break; + } + HTChunkPutc(string, c); + context->state = S_attr; /* Get attribute */ + break; + + /* accumulating value */ + case S_attr: + if (WHITE(c) || (c == '>') || (c == '=')) { /* End of word */ + if ((c == '>') + && (string->size == 1) + && (string->data[0] == '/')) { + if (context->extended_html + && ignore_when_empty(context->current_tag)) { + discard_empty(context); + } + } else { + HTChunkTerminate(string); + handle_attribute_name(context, string->data); + } +#ifdef USE_PRETTYSRC + if (!psrc_view) { +#endif + string->size = 0; + if (c == '>') { /* End of tag */ + if (context->current_tag->name) + start_element(context); + context->state = S_text; + break; + } +#ifdef USE_PRETTYSRC + } else { + PUTC(' '); + if (context->current_attribute_number == INVALID) + PSRCSTART(badattr); + else + PSRCSTART(attrib); + if (attrname_transform != 1) { + if (attrname_transform == 0) + LYLowerCase(string->data); + else + LYUpperCase(string->data); + } + PUTS(string->data); + if (c == '=' || WHITE(c)) + PUTC(c); + if (c == '=' || c == '>') { + if (context->current_attribute_number == INVALID) { + PSRCSTOP(badattr); + } else { + PSRCSTOP(attrib); + } + } + if (c == '>') { + PSRCSTART(abracket); + PUTC('>'); + PSRCSTOP(abracket); + context->state = S_text; + break; + } + string->size = 0; + } +#endif + context->state = (c == '=' ? S_equals : S_attr_gap); + } else { + HTChunkPutc(string, c); + } + break; + + case S_attr_gap: /* Expecting attribute or '=' or '>' */ + if (WHITE(c)) { + PRETTYSRC_PUTC(c); + break; /* Gap after attribute */ + } + if (c == '>') { /* End of tag */ +#ifdef USE_PRETTYSRC + if (psrc_view) { + if (context->current_attribute_number == INVALID) { + PSRCSTOP(badattr); + } else { + PSRCSTOP(attrib); + } + PSRCSTART(abracket); + PUTC('>'); + PSRCSTOP(abracket); + } else +#endif + if (context->current_tag->name) + start_element(context); + context->state = S_text; + break; + } else if (c == '=') { +#ifdef USE_PRETTYSRC + if (psrc_view) { + PUTC('='); + if (context->current_attribute_number == INVALID) { + PSRCSTOP(badattr); + } else { + PSRCSTOP(attrib); + } + } +#endif + context->state = S_equals; + break; + } + HTChunkPutc(string, c); + context->state = S_attr; /* Get next attribute */ + break; + + case S_equals: /* After attr = */ + if (WHITE(c)) { + PRETTYSRC_PUTC(c); + break; /* Before attribute value */ + } + if (c == '>') { /* End of tag */ + CTRACE((tfp, "SGML: found = but no value\n")); +#ifdef USE_PRETTYSRC + if (psrc_view) { + PSRCSTART(abracket); + PUTC('>'); + PSRCSTOP(abracket); + } else +#endif + if (context->current_tag->name) + start_element(context); + context->state = S_text; + break; + + } else if (c == '\'') { +#ifdef USE_PRETTYSRC + if (psrc_view) { + PSRCSTART(attrval); + PUTC(c); + } +#endif + context->state = S_squoted; + break; + + } else if (c == '"') { +#ifdef USE_PRETTYSRC + if (psrc_view) { + PSRCSTART(attrval); + PUTC(c); + } +#endif + context->state = S_dquoted; + break; + } +#ifdef USE_PRETTYSRC + if (psrc_view) + PSRCSTART(attrval); +#endif + context->state = S_value; + /* no break! fall through to S_value and process current `c` */ + + case S_value: + if (WHITE(c) || (c == '>')) { /* End of word */ + HTChunkTerminate(string); +#ifdef USE_PRETTYSRC + if (!end_if_prettysrc(context, string, 0)) +#endif + { +#ifdef CJK_EX /* Quick hack. - JH7AYN */ + if (IS_CJK_TTY) { + if (string->data[0] == '$') { + if (string->data[1] == 'B' || string->data[1] == '@') { + char *jis_buf = 0; + + HTSprintf0(&jis_buf, "\033%s", string->data); + TO_EUC((const unsigned char *) jis_buf, + (unsigned char *) string->data); + FREE(jis_buf); + } + } + } +#endif + handle_attribute_value(context, string->data); + } + string->size = 0; + if (c == '>') { /* End of tag */ +#ifdef USE_PRETTYSRC + if (psrc_view) { + PSRCSTART(abracket); + PUTC('>'); + PSRCSTOP(abracket); + } else +#endif + if (context->current_tag->name) + start_element(context); + context->state = S_text; + break; + } else + context->state = S_tag_gap; + } else if (context->T.decode_utf8 && + *context->utf_buf) { + HTChunkPuts(string, context->utf_buf); + context->utf_buf_p = context->utf_buf; + *(context->utf_buf_p) = '\0'; + } else if (!IS_CJK_TTY && + (context->T.output_utf8 || + context->T.trans_from_uni)) { + if (clong == 0xfffd && saved_char_in && + HTPassEightBitRaw && + saved_char_in >= + LYlowest_eightbit[context->outUCLYhndl]) { + HTChunkPutUtf8Char(string, + (UCode_t) (0xf000 | saved_char_in)); + } else { + HTChunkPutUtf8Char(string, clong); + } + } else if (saved_char_in && context->T.use_raw_char_in) { + HTChunkPutc(string, saved_char_in); + } else { + HTChunkPutc(string, c); + } + break; + + case S_squoted: /* Quoted attribute value */ + if (c == '\'') { /* End of attribute value */ + HTChunkTerminate(string); +#ifdef USE_PRETTYSRC + if (!end_if_prettysrc(context, string, '\'')) +#endif + handle_attribute_value(context, string->data); + string->size = 0; + context->state = S_tag_gap; + } else if (TOASCII(c) == '\033') { /* S/390 -- gil -- 1213 */ + /* + * Setting up for possible single quotes in CJK escape sequences. + * - Takuya ASADA (asada@three-a.co.jp) + */ + context->state = S_esc_sq; + HTChunkPutc(string, c); + } else if (context->T.decode_utf8 && + *context->utf_buf) { + HTChunkPuts(string, context->utf_buf); + context->utf_buf_p = context->utf_buf; + *(context->utf_buf_p) = '\0'; + } else if (!IS_CJK_TTY && + (context->T.output_utf8 || + context->T.trans_from_uni)) { + if (clong == 0xfffd && saved_char_in && + HTPassEightBitRaw && + saved_char_in >= + LYlowest_eightbit[context->outUCLYhndl]) { + HTChunkPutUtf8Char(string, + (UCode_t) (0xf000 | saved_char_in)); + } else { + HTChunkPutUtf8Char(string, clong); + } + } else if (saved_char_in && context->T.use_raw_char_in) { + HTChunkPutc(string, saved_char_in); + } else { + HTChunkPutc(string, c); + } + break; + + case S_dquoted: /* Quoted attribute value */ + if (c == '"' || /* Valid end of attribute value */ + (soft_dquotes && /* If emulating old Netscape bug, treat '>' */ + c == '>')) { /* as a co-terminator of dquoted and tag */ + HTChunkTerminate(string); +#ifdef USE_PRETTYSRC + if (!end_if_prettysrc(context, string, (char) c)) +#endif + handle_attribute_value(context, string->data); + string->size = 0; + context->state = S_tag_gap; + if (c == '>') /* We emulated the Netscape bug, so we go */ + goto top1; /* back and treat it as the tag terminator */ + } else if (TOASCII(c) == '\033') { /* S/390 -- gil -- 1230 */ + /* + * Setting up for possible double quotes in CJK escape sequences. + * - Takuya ASADA (asada@three-a.co.jp) + */ + context->state = S_esc_dq; + HTChunkPutc(string, c); + } else if (context->T.decode_utf8 && + *context->utf_buf) { + HTChunkPuts(string, context->utf_buf); + context->utf_buf_p = context->utf_buf; + *(context->utf_buf_p) = '\0'; + } else if (!IS_CJK_TTY && + (context->T.output_utf8 || + context->T.trans_from_uni)) { + if (clong == 0xfffd && saved_char_in && + HTPassEightBitRaw && + saved_char_in >= + LYlowest_eightbit[context->outUCLYhndl]) { + HTChunkPutUtf8Char(string, + (UCode_t) (0xf000 | saved_char_in)); + } else { + HTChunkPutUtf8Char(string, clong); + } + } else if (saved_char_in && context->T.use_raw_char_in) { + HTChunkPutc(string, saved_char_in); + } else { + HTChunkPutc(string, c); + } + break; + + case S_end: /* </ */ + if (TOASCII(unsign_c) < 127 && (string->size ? /* S/390 -- gil -- 1247 */ + IsNmChar(c) : IsNmStart(c))) { + HTChunkPutc(string, c); + } else { /* End of end tag name */ + HTTag *t = 0; + +#ifdef USE_PRETTYSRC + BOOL psrc_tagname_processed = FALSE; +#endif + + HTChunkTerminate(string); + if (!*string->data) { /* Empty end tag */ + if (context->element_stack) + t = context->element_stack->tag; + } else { + t = SGMLFindTag(dtd, string->data); + } + if (!t || t == context->unknown_tag) { + CTRACE((tfp, "Unknown end tag </%s>\n", string->data)); +#ifdef USE_PRETTYSRC + if (psrc_view) { + PSRCSTART(abracket); + PUTS("</"); + PSRCSTOP(abracket); + PSRCSTART(badtag); + transform_tag(context, string); + PUTS(string->data); + if (c != '>') { + PUTC(c); + } else { + PSRCSTOP(badtag); + PSRCSTART(abracket); + PUTC('>'); + PSRCSTOP(abracket); + } + psrc_tagname_processed = TRUE; + } + } else if (psrc_view) { +#endif + } else { + BOOL tag_OK = (BOOL) (c == '>' || WHITE(c)); + HTMLElement e = TAGNUM_OF_TAGP(t); + int branch = 2; /* it can be 0,1,2 */ + + context->current_tag = t; + if (HAS_ALT_TAGNUM(TAGNUM_OF_TAGP(t)) && + context->element_stack && + ALT_TAGP(t) == context->element_stack->tag) + context->element_stack->tag = NORMAL_TAGP(context->element_stack->tag); + + if (tag_OK && Old_DTD) { + switch (e) { + case HTML_DD: + case HTML_DT: + case HTML_LI: + case HTML_LH: + case HTML_TD: + case HTML_TH: + case HTML_TR: + case HTML_THEAD: + case HTML_TFOOT: + case HTML_TBODY: + case HTML_COLGROUP: + branch = 0; + break; + + case HTML_A: + case HTML_B: + case HTML_BLINK: + case HTML_CITE: + case HTML_EM: + case HTML_FONT: + case HTML_FORM: + case HTML_I: + case HTML_P: + case HTML_STRONG: + case HTML_TT: + case HTML_U: + branch = 1; + break; + default: + break; + } + } + + /* + * Just handle ALL end tags normally :-) - kw + */ + if (!Old_DTD) { + end_element(context, context->current_tag); + } else if (tag_OK && (branch == 0)) { + /* + * Don't treat these end tags as invalid, nor act on them. + * - FM + */ + CTRACE((tfp, "SGML: `</%s%c' found! Ignoring it.\n", + string->data, c)); + string->size = 0; + context->current_attribute_number = INVALID; + if (c != '>') { + context->state = S_junk_tag; + } else { + context->current_tag = NULL; + context->state = S_text; + } + break; + } else if (tag_OK && (branch == 1)) { + /* + * Handle end tags for container elements declared as + * SGML_EMPTY to prevent "expected tag substitution" but + * still processed via HTML_end_element() in HTML.c with + * checks there to avoid throwing the HTML.c stack out of + * whack (Ugh, what a hack! 8-). - FM + */ + if (context->inSELECT) { + /* + * We are in a SELECT block. - FM + */ + if (strcasecomp(string->data, "FORM")) { + /* + * It is not at FORM end tag, so ignore it. - FM + */ + CTRACE((tfp, + "SGML: ***Ignoring end tag </%s> in SELECT block.\n", + string->data)); + } else { + /* + * End the SELECT block and then handle the FORM + * end tag. - FM + */ + CTRACE((tfp, + "SGML: ***Faking SELECT end tag before </%s> end tag.\n", + string->data)); + end_element(context, + SGMLFindTag(context->dtd, "SELECT")); + CTRACE((tfp, "SGML: End </%s>\n", string->data)); + +#ifdef USE_PRETTYSRC + if (!psrc_view) /* Don't actually call if viewing psrc - kw */ +#endif + (*context->actions->end_element) + (context->target, + (int) TAGNUM_OF_TAGP(context->current_tag), + &context->include); + } + } else if (!strcasecomp(string->data, "P")) { + /* + * Treat a P end tag like a P start tag (Ugh, what a + * hack! 8-). - FM + */ + CTRACE((tfp, + "SGML: `</%s%c' found! Treating as '<%s%c'.\n", + string->data, c, string->data, c)); + { + int i; + + for (i = 0; + i < context->current_tag->number_of_attributes; + i++) { + context->present[i] = NO; + } + } + if (context->current_tag->name) + start_element(context); + } else { + CTRACE((tfp, "SGML: End </%s>\n", string->data)); + +#ifdef USE_PRETTYSRC + if (!psrc_view) /* Don't actually call if viewing psrc - kw */ +#endif + (*context->actions->end_element) + (context->target, + (int) TAGNUM_OF_TAGP(context->current_tag), + &context->include); + } + string->size = 0; + context->current_attribute_number = INVALID; + if (c != '>') { + context->state = S_junk_tag; + } else { + context->current_tag = NULL; + context->state = S_text; + } + break; + } else { + /* + * Handle all other end tags normally. - FM + */ + end_element(context, context->current_tag); + } + } + +#ifdef USE_PRETTYSRC + if (psrc_view && !psrc_tagname_processed) { + PSRCSTART(abracket); + PUTS("</"); + PSRCSTOP(abracket); + PSRCSTART(tag); + if (tagname_transform != 1) { + if (tagname_transform == 0) + LYLowerCase(string->data); + else + LYUpperCase(string->data); + } + PUTS(string->data); + PSRCSTOP(tag); + if (c != '>') { + PSRCSTART(badtag); + PUTC(c); + } else { + PSRCSTART(abracket); + PUTC('>'); + PSRCSTOP(abracket); + } + } +#endif + + string->size = 0; + context->current_attribute_number = INVALID; + if (c != '>') { + if (!WHITE(c)) + CTRACE((tfp, "SGML: `</%s%c' found!\n", string->data, c)); + context->state = S_junk_tag; + } else { + context->current_tag = NULL; + context->state = S_text; + } + } + break; + + case S_esc: /* Expecting '$'or '(' following CJK ESC. */ + if (c == '$') { + context->state = S_dollar; + } else if (c == '(') { + context->state = S_paren; + } else { + context->state = S_text; + } + PUTC(c); + break; + + case S_dollar: /* Expecting '@', 'B', 'A' or '(' after CJK "ESC$". */ + if (c == '@' || c == 'B' || c == 'A') { + context->state = S_nonascii_text; + } else if (c == '(') { + context->state = S_dollar_paren; + } + PUTC(c); + break; + + case S_dollar_paren: /* Expecting 'C' after CJK "ESC$(". */ + if (c == 'C') { + context->state = S_nonascii_text; + } else { + context->state = S_text; + } + PUTC(c); + break; + + case S_paren: /* Expecting 'B', 'J', 'T' or 'I' after CJK "ESC(". */ + if (c == 'B' || c == 'J' || c == 'T') { + context->state = S_text; + } else if (c == 'I') { + context->state = S_nonascii_text; + } else { + context->state = S_text; + } + PUTC(c); + break; + + case S_nonascii_text: /* Expecting CJK ESC after non-ASCII text. */ + if (TOASCII(c) == '\033') { /* S/390 -- gil -- 1264 */ + context->state = S_esc; + } + PUTC(c); + if (c < 32) + context->state = S_text; + break; + + case S_esc_sq: /* Expecting '$'or '(' following CJK ESC. */ + if (c == '$') { + context->state = S_dollar_sq; + } else if (c == '(') { + context->state = S_paren_sq; + } else { + context->state = S_squoted; + } + HTChunkPutc(string, c); + break; + + case S_dollar_sq: /* Expecting '@', 'B', 'A' or '(' after CJK "ESC$". */ + if (c == '@' || c == 'B' || c == 'A') { + context->state = S_nonascii_text_sq; + } else if (c == '(') { + context->state = S_dollar_paren_sq; + } + HTChunkPutc(string, c); + break; + + case S_dollar_paren_sq: /* Expecting 'C' after CJK "ESC$(". */ + if (c == 'C') { + context->state = S_nonascii_text_sq; + } else { + context->state = S_squoted; + } + HTChunkPutc(string, c); + break; + + case S_paren_sq: /* Expecting 'B', 'J', 'T' or 'I' after CJK "ESC(". */ + if (c == 'B' || c == 'J' || c == 'T') { + context->state = S_squoted; + } else if (c == 'I') { + context->state = S_nonascii_text_sq; + } else { + context->state = S_squoted; + } + HTChunkPutc(string, c); + break; + + case S_nonascii_text_sq: /* Expecting CJK ESC after non-ASCII text. */ + if (TOASCII(c) == '\033') { /* S/390 -- gil -- 1281 */ + context->state = S_esc_sq; + } + HTChunkPutc(string, c); + break; + + case S_esc_dq: /* Expecting '$'or '(' following CJK ESC. */ + if (c == '$') { + context->state = S_dollar_dq; + } else if (c == '(') { + context->state = S_paren_dq; + } else { + context->state = S_dquoted; + } + HTChunkPutc(string, c); + break; + + case S_dollar_dq: /* Expecting '@', 'B', 'A' or '(' after CJK "ESC$". */ + if (c == '@' || c == 'B' || c == 'A') { + context->state = S_nonascii_text_dq; + } else if (c == '(') { + context->state = S_dollar_paren_dq; + } + HTChunkPutc(string, c); + break; + + case S_dollar_paren_dq: /* Expecting 'C' after CJK "ESC$(". */ + if (c == 'C') { + context->state = S_nonascii_text_dq; + } else { + context->state = S_dquoted; + } + HTChunkPutc(string, c); + break; + + case S_paren_dq: /* Expecting 'B', 'J', 'T' or 'I' after CJK "ESC(". */ + if (c == 'B' || c == 'J' || c == 'T') { + context->state = S_dquoted; + } else if (c == 'I') { + context->state = S_nonascii_text_dq; + } else { + context->state = S_dquoted; + } + HTChunkPutc(string, c); + break; + + case S_nonascii_text_dq: /* Expecting CJK ESC after non-ASCII text. */ + if (TOASCII(c) == '\033') { /* S/390 -- gil -- 1298 */ + context->state = S_esc_dq; + } + HTChunkPutc(string, c); + break; + + case S_junk_tag: + case S_pi: + if (c == '>') { + HTChunkTerminate(string); +#ifdef USE_PRETTYSRC + if (psrc_view) { + if (context->state == S_junk_tag) { + PSRCSTOP(badtag); + } + PSRCSTART(abracket); + PUTC('>'); + PSRCSTOP(abracket); + } +#endif + if (context->state == S_pi) + handle_processing_instruction(context); + string->size = 0; + context->current_tag = NULL; + context->state = S_text; + } else { + HTChunkPutc(string, c); +#ifdef USE_PRETTYSRC + if (psrc_view) { + PUTC(c); + } +#endif + } + + } /* switch on context->state */ + CTRACE2(TRACE_SGML, (tfp, "SGML after %s|%.*s|%c|\n", + state_name(context->state), + string->size, + NonNull(string->data), + UCH(c))); + + after_switch: + /* + * Check whether an external function has added anything to the include + * buffer. If so, move the new stuff to the beginning of active_include. + * - kw + */ + if (context->include != NULL) { + if (context->include[0] == '\0') { + FREE(context->include); + } else { + if (context->active_include && + context->active_include[context->include_index] != '\0') + StrAllocCat(context->include, + context->active_include + context->include_index); + FREE(context->active_include); + context->active_include = context->include; + context->include_index = 0; + context->include = NULL; + } + } + + /* + * Check whether we've added anything to the recover buffer. - FM + */ + if (context->recover != NULL) { + if (context->recover[context->recover_index] == '\0') { + FREE(context->recover); + context->recover_index = 0; + } else { + c = UCH(context->recover[context->recover_index]); + context->recover_index++; + goto top; + } + } + + /* + * Check whether an external function had added anything to the include + * buffer; it should now be in active_include. - FM / kw + */ + if (context->active_include != NULL) { + if (context->active_include[context->include_index] == '\0') { + FREE(context->active_include); + context->include_index = 0; + } else { + if (context->current_tag_charset == UTF8_handle || + context->T.trans_from_uni) { + /* + * If it looks like we would have fed UTF-8 to the next + * processing stage, assume that whatever we were fed back is + * in UTF-8 form, too. This won't be always true for all uses + * of the include buffer, but it's a start. - kw + */ + char *puni = context->active_include + context->include_index; + + c = UCH(*puni); + clong = UCGetUniFromUtf8String(&puni); + if (clong < 256 && clong >= 0) { + c = UCH((clong & 0xff)); + } + saved_char_in = '\0'; + context->include_index = (int) (puni + - context->active_include + + 1); + goto top1; + } else { + /* + * Otherwise assume no UTF-8 - do charset-naive processing and + * hope for the best. - kw + */ + c = UCH(context->active_include[context->include_index]); + context->include_index++; + goto top; + } + } + } + + /* + * Check whether an external function has added anything to the csi buffer. + * - FM + */ + if (context->csi != NULL) { + if (context->csi[context->csi_index] == '\0') { + FREE(context->csi); + context->csi_index = 0; + } else { + c = UCH(context->csi[context->csi_index]); + context->csi_index++; + goto top; + } + } +} /* SGML_character */ + +static void InferUtfFromBom(HTStream *context, int chndl) +{ + HTAnchor_setUCInfoStage(context->node_anchor, chndl, + UCT_STAGE_PARSER, + UCT_SETBY_PARSER); + change_chartrans_handling(context); +} + +/* + * Avoid rewrite of SGML_character() to handle hypothetical case of UTF-16 + * webpages, by pretending that the data is UTF-8. + */ +static void SGML_widechar(HTStream *context, int ch) +{ + if (!UCPutUtf8_charstring(context, SGML_character, (UCode_t) ch)) { + SGML_character(context, ch); + } +} + +static void SGML_write(HTStream *context, const char *str, int l) +{ + const char *p; + const char *e = str + l; + + if (sgml_offset == 0) { + if (l > 3 + && !MemCmp(str, "\357\273\277", 3)) { + CTRACE((tfp, "SGML_write found UTF-8 BOM\n")); + InferUtfFromBom(context, UTF8_handle); + str += 3; + } else if (l > 2) { + if (!MemCmp(str, "\377\376", 2)) { + CTRACE((tfp, "SGML_write found UCS-2 LE BOM\n")); + InferUtfFromBom(context, UTF8_handle); + str += 2; + context->T.ucs_mode = -1; + } else if (!MemCmp(str, "\376\377", 2)) { + CTRACE((tfp, "SGML_write found UCS-2 BE BOM\n")); + InferUtfFromBom(context, UTF8_handle); + str += 2; + context->T.ucs_mode = 1; + } + } + } + switch (context->T.ucs_mode) { + case -1: + for (p = str; p < e; p += 2) + SGML_widechar(context, (UCH(p[1]) << 8) | UCH(p[0])); + break; + case 1: + for (p = str; p < e; p += 2) + SGML_widechar(context, (UCH(p[0]) << 8) | UCH(p[1])); + break; + default: + for (p = str; p < e; p++) + SGML_character(context, *p); + break; + } +} + +static void SGML_string(HTStream *context, const char *str) +{ + SGML_write(context, str, (int) strlen(str)); +} + +/*_______________________________________________________________________ +*/ + +/* Structured Object Class + * ----------------------- + */ +const HTStreamClass SGMLParser = +{ + "SGMLParser", + SGML_free, + SGML_abort, + SGML_character, + SGML_string, + SGML_write, +}; + +/* Create SGML Engine + * ------------------ + * + * On entry, + * dtd represents the DTD, along with + * actions is the sink for the data as a set of routines. + * + */ + +HTStream *SGML_new(const SGML_dtd * dtd, + HTParentAnchor *anchor, + HTStructured * target) +{ + HTStream *context = typecalloc(struct _HTStream); + + if (!context) + outofmem(__FILE__, "SGML_begin"); + + assert(context != NULL); + + context->isa = &SGMLParser; + context->string = HTChunkCreate(128); /* Grow by this much */ + context->dtd = dtd; + context->target = target; + context->actions = (const HTStructuredClass *) (((HTStream *) target)->isa); + /* Ugh: no OO */ + context->unknown_tag = &HTTag_unrecognized; + context->current_tag = context->slashedtag = NULL; + context->state = S_text; +#ifdef CALLERDATA + context->callerData = (void *) callerData; +#endif /* CALLERDATA */ + + context->node_anchor = anchor; /* Could be NULL? */ + context->utf_buf_p = context->utf_buf; + UCTransParams_clear(&context->T); + context->inUCLYhndl = HTAnchor_getUCLYhndl(anchor, + UCT_STAGE_PARSER); + if (context->inUCLYhndl < 0) { + HTAnchor_copyUCInfoStage(anchor, + UCT_STAGE_PARSER, + UCT_STAGE_MIME, + -1); + context->inUCLYhndl = HTAnchor_getUCLYhndl(anchor, + UCT_STAGE_PARSER); + } +#ifdef CAN_SWITCH_DISPLAY_CHARSET /* Allow a switch to a more suitable display charset */ + else if (anchor->UCStages + && anchor->UCStages->s[UCT_STAGE_PARSER].LYhndl >= 0 + && anchor->UCStages->s[UCT_STAGE_PARSER].LYhndl != current_char_set) { + int o = anchor->UCStages->s[UCT_STAGE_PARSER].LYhndl; + + anchor->UCStages->s[UCT_STAGE_PARSER].LYhndl = -1; /* Force reset */ + HTAnchor_resetUCInfoStage(anchor, o, UCT_STAGE_PARSER, + /* Preserve change this: */ + anchor->UCStages->s[UCT_STAGE_PARSER].lock); + } +#endif + + context->inUCI = HTAnchor_getUCInfoStage(anchor, + UCT_STAGE_PARSER); + set_chartrans_handling(context, anchor, -1); + + context->recover = NULL; + context->recover_index = 0; + context->include = NULL; + context->active_include = NULL; + context->include_index = 0; + context->url = NULL; + context->csi = NULL; + context->csi_index = 0; + +#ifdef USE_PRETTYSRC + if (psrc_view) { + psrc_view = FALSE; + mark_htext_as_source = TRUE; + SGML_string(context, + "<HTML><HEAD><TITLE>source</TITLE></HEAD><BODY><PRE>"); + psrc_view = TRUE; + psrc_convert_string = FALSE; + sgml_in_psrc_was_initialized = TRUE; + } +#endif + + sgml_offset = 0; + return context; +} + +/* + * Return the offset within the document where we're parsing. This is used + * to help identify anchors which shift around while reparsing. + */ +int SGML_offset(void) +{ + int result = sgml_offset; + +#ifdef USE_PRETTYSRC + result += psrc_view; +#endif + return result; +} + +/* Asian character conversion functions + * ==================================== + * + * Added 24-Mar-96 by FM, based on: + * + //////////////////////////////////////////////////////////////////////// +Copyright (c) 1993 Electrotechnical Laboratory (ETL) + +Permission to use, copy, modify, and distribute this material +for any purpose and without fee is hereby granted, provided +that the above copyright notice and this permission notice +appear in all copies, and that the name of ETL not be +used in advertising or publicity pertaining to this +material without the specific, prior written permission +of an authorized representative of ETL. +ETL MAKES NO REPRESENTATIONS ABOUT THE ACCURACY OR SUITABILITY +OF THIS MATERIAL FOR ANY PURPOSE. IT IS PROVIDED "AS IS", +WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. +///////////////////////////////////////////////////////////////////////// +Content-Type: program/C; charset=US-ASCII +Program: SJIS.c +Author: Yutaka Sato <ysato@etl.go.jp> +Description: +History: + 930923 extracted from codeconv.c of cosmos +/////////////////////////////////////////////////////////////////////// +*/ + +static int TREAT_SJIS = 1; + +void JISx0201TO0208_EUC(unsigned IHI, + unsigned ILO, + unsigned char *OHI, + unsigned char *OLO) +{ + static const char *table[] = + { + "\241\243", /* A1,A3 */ + "\241\326", /* A1,D6 */ + "\241\327", /* A1,D7 */ + "\241\242", /* A1,A2 */ + "\241\246", /* A1,A6 */ + "\245\362", /* A5,F2 */ + "\245\241", /* A5,A1 */ + "\245\243", /* A5,A3 */ + "\245\245", /* A5,A5 */ + "\245\247", /* A5,A7 */ + "\245\251", /* A5,A9 */ + "\245\343", /* A5,E3 */ + "\245\345", /* A5,E5 */ + "\245\347", /* A5,E7 */ + "\245\303", /* A5,C3 */ + "\241\274", /* A1,BC */ + "\245\242", /* A5,A2 */ + "\245\244", /* A5,A4 */ + "\245\246", /* A5,A6 */ + "\245\250", /* A5,A8 */ + "\245\252", /* A5,AA */ + "\245\253", /* A5,AB */ + "\245\255", /* A5,AD */ + "\245\257", /* A5,AF */ + "\245\261", /* A5,B1 */ + "\245\263", /* A5,B3 */ + "\245\265", /* A5,B5 */ + "\245\267", /* A5,B7 */ + "\245\271", /* A5,B9 */ + "\245\273", /* A5,BB */ + "\245\275", /* A5,BD */ + "\245\277", /* A5,BF */ + "\245\301", /* A5,C1 */ + "\245\304", /* A5,C4 */ + "\245\306", /* A5,C6 */ + "\245\310", /* A5,C8 */ + "\245\312", /* A5,CA */ + "\245\313", /* A5,CB */ + "\245\314", /* A5,CC */ + "\245\315", /* A5,CD */ + "\245\316", /* A5,CE */ + "\245\317", /* A5,CF */ + "\245\322", /* A5,D2 */ + "\245\325", /* A5,D5 */ + "\245\330", /* A5,D8 */ + "\245\333", /* A5,DB */ + "\245\336", /* A5,DE */ + "\245\337", /* A5,DF */ + "\245\340", /* A5,E0 */ + "\245\341", /* A5,E1 */ + "\245\342", /* A5,E2 */ + "\245\344", /* A5,E4 */ + "\245\346", /* A5,E6 */ + "\245\350", /* A5,E8 */ + "\245\351", /* A5,E9 */ + "\245\352", /* A5,EA */ + "\245\353", /* A5,EB */ + "\245\354", /* A5,EC */ + "\245\355", /* A5,ED */ + "\245\357", /* A5,EF */ + "\245\363", /* A5,F3 */ + "\241\253", /* A1,AB */ + "\241\254" /* A1,AC */ + }; + + if ((IHI == 0x8E) && (ILO >= 0xA1) && (ILO <= 0xDF)) { + *OHI = UCH(table[ILO - 0xA1][0]); + *OLO = UCH(table[ILO - 0xA1][1]); + } else { + *OHI = UCH(IHI); + *OLO = UCH(ILO); + } +} + +static int IS_SJIS_STR(const unsigned char *str) +{ + const unsigned char *s; + unsigned char ch; + int is_sjis = 0; + + s = str; + while ((ch = *s++) != '\0') { + if (ch & 0x80) + if (IS_SJIS(ch, *s, is_sjis)) + return 1; + } + return 0; +} + +unsigned char *SJIS_TO_JIS1(unsigned HI, + unsigned LO, + unsigned char *JCODE) +{ + HI = UCH(HI - (unsigned) UCH((HI <= 0x9F) ? 0x71 : 0xB1)); + HI = UCH((HI << 1) + 1); + if (0x7F < LO) + LO--; + if (0x9E <= LO) { + LO = UCH(LO - UCH(0x7D)); + HI++; + } else { + LO = UCH(LO - UCH(0x1F)); + } + JCODE[0] = UCH(HI); + JCODE[1] = UCH(LO); + return JCODE; +} + +unsigned char *JIS_TO_SJIS1(unsigned HI, + unsigned LO, + unsigned char *SJCODE) +{ + if (HI & 1) + LO = UCH(LO + UCH(0x1F)); + else + LO = UCH(LO + UCH(0x7D)); + if (0x7F <= LO) + LO++; + + HI = UCH(((HI - 0x21) >> 1) + 0x81); + if (0x9F < HI) + HI = UCH(HI + UCH(0x40)); + SJCODE[0] = UCH(HI); + SJCODE[1] = UCH(LO); + return SJCODE; +} + +unsigned char *EUC_TO_SJIS1(unsigned HI, + unsigned LO, + unsigned char *SJCODE) +{ + if (HI == 0x8E) { + unsigned char HI_data[2]; + unsigned char LO_data[2]; + + HI_data[0] = UCH(HI); + LO_data[0] = UCH(LO); + JISx0201TO0208_EUC(HI, LO, HI_data, LO_data); + } + JIS_TO_SJIS1(UCH(HI & 0x7F), UCH(LO & 0x7F), SJCODE); + return SJCODE; +} + +void JISx0201TO0208_SJIS(unsigned I, + unsigned char *OHI, + unsigned char *OLO) +{ + unsigned char SJCODE[2]; + + JISx0201TO0208_EUC(0x8E, I, OHI, OLO); + JIS_TO_SJIS1(UCH(*OHI & 0x7F), UCH(*OLO & 0x7F), SJCODE); + *OHI = SJCODE[0]; + *OLO = SJCODE[1]; +} + +unsigned char *SJIS_TO_EUC1(unsigned HI, + unsigned LO, + unsigned char *data) +{ + SJIS_TO_JIS1(HI, LO, data); + data[0] |= 0x80; + data[1] |= 0x80; + return data; +} + +unsigned char *SJIS_TO_EUC(unsigned char *src, + unsigned char *dst) +{ + unsigned char hi, lo, *sp, *dp; + int in_sjis = 0; + + in_sjis = IS_SJIS_STR(src); + for (sp = src, dp = dst; (hi = sp[0]) != '\0';) { + lo = sp[1]; + if (TREAT_SJIS && IS_SJIS(hi, lo, in_sjis)) { + SJIS_TO_JIS1(hi, lo, dp); + dp[0] |= 0x80; + dp[1] |= 0x80; + dp += 2; + sp += 2; + } else + *dp++ = *sp++; + } + *dp = 0; + return dst; +} + +unsigned char *EUC_TO_SJIS(unsigned char *src, + unsigned char *dst) +{ + unsigned char *sp, *dp; + + for (sp = src, dp = dst; *sp;) { + if (*sp & 0x80) { + if (sp[1] && (sp[1] & 0x80)) { + JIS_TO_SJIS1(UCH(sp[0] & 0x7F), UCH(sp[1] & 0x7F), dp); + dp += 2; + sp += 2; + } else { + sp++; + } + } else { + *dp++ = *sp++; + } + } + *dp = 0; + return dst; +} + +#define Strcpy(a,b) (strcpy((char*)a,(const char*)b),&a[strlen((const char*)a)]) + +unsigned char *EUC_TO_JIS(unsigned char *src, + unsigned char *dst, + const char *toK, + const char *toA) +{ + unsigned char kana_mode = 0; + unsigned char cch; + unsigned char *sp = src; + unsigned char *dp = dst; + int is_JIS = 0; + + while ((cch = *sp++) != '\0') { + if (cch & 0x80) { + if (!IS_EUC(cch, *sp)) { + if (cch == 0xA0 && is_JIS) /* ignore NBSP */ + continue; + is_JIS++; + *dp++ = cch; + continue; + } + if (!kana_mode) { + kana_mode = UCH(~kana_mode); + dp = Strcpy(dp, toK); + } + if (*sp & 0x80) { + *dp++ = UCH(cch & ~0x80); + *dp++ = UCH(*sp++ & ~0x80); + } + } else { + if (kana_mode) { + kana_mode = UCH(~kana_mode); + dp = Strcpy(dp, toA); + } + *dp++ = cch; + } + } + if (kana_mode) + dp = Strcpy(dp, toA); + + if (dp) + *dp = 0; + return dst; +} + +#define IS_JIS7(c1,c2) (0x20<(c1)&&(c1)<0x7F && 0x20<(c2)&&(c2)<0x7F) +#define SO ('N'-0x40) +#define SI ('O'-0x40) + +static int repair_JIS = 0; + +static const unsigned char *repairJIStoEUC(const unsigned char *src, + unsigned char **dstp) +{ + const unsigned char *s; + unsigned char *d, ch1, ch2; + + d = *dstp; + s = src; + while ((ch1 = s[0]) && (ch2 = s[1])) { + s += 2; + if (ch1 == '(') + if (ch2 == 'B' || ch2 == 'J') { + *dstp = d; + return s; + } + if (!IS_JIS7(ch1, ch2)) + return 0; + + *d++ = UCH(0x80 | ch1); + *d++ = UCH(0x80 | ch2); + } + return 0; +} + +unsigned char *TO_EUC(const unsigned char *jis, + unsigned char *euc) +{ + const unsigned char *s; + unsigned char c, jis_stat; + unsigned char *d; + int to1B, to2B; + int in_sjis = 0; + static int nje; + int n8bits; + int is_JIS; + + nje++; + n8bits = 0; + s = jis; + d = euc; + jis_stat = 0; + to2B = TO_2BCODE; + to1B = TO_1BCODE; + in_sjis = IS_SJIS_STR(jis); + is_JIS = 0; + + while ((c = *s++) != '\0') { + if (c == 0x80) + continue; /* ignore it */ + if (c == 0xA0 && is_JIS) + continue; /* ignore Non-breaking space */ + + if (c == to2B && jis_stat == 0 && repair_JIS) { + if (*s == 'B' || *s == '@') { + const unsigned char *ts; + + if ((ts = repairJIStoEUC(s + 1, &d)) != NULL) { + s = ts; + continue; + } + } + } + if (c == CH_ESC) { + if (*s == to2B) { + if ((s[1] == 'B') || (s[1] == '@')) { + jis_stat = 0x80; + s += 2; + is_JIS++; + continue; + } + jis_stat = 0; + } else if (*s == to1B) { + jis_stat = 0; + if ((s[1] == 'B') || (s[1] == 'J') || (s[1] == 'H')) { + s += 2; + continue; + } + } else if (*s == ',') { /* MULE */ + jis_stat = 0; + } + } + if (c & 0x80) + n8bits++; + + if (IS_SJIS(c, *s, in_sjis)) { + SJIS_TO_EUC1(c, *s, d); + d += 2; + s++; + is_JIS++; + } else if (jis_stat) { + if (c <= 0x20 || 0x7F <= c) { + *d++ = c; + if (c == '\n') + jis_stat = 0; + } else { + if (IS_JIS7(c, *s)) { + *d++ = jis_stat | c; + *d++ = jis_stat | *s++; + } else + *d++ = c; + } + } else { + if (n8bits == 0 && (c == SI || c == SO)) { + } else { + *d++ = c; + } + } + } + *d = 0; + return euc; +} + +#define non94(ch) ((ch) <= 0x20 || (ch) == 0x7F) + +static int is_EUC_JP(unsigned char *euc) +{ + unsigned char *cp; + int ch1, ch2; + + for (cp = euc; (ch1 = *cp) != '\0'; cp++) { + if (ch1 & 0x80) { + ch2 = cp[1] & 0xFF; + if ((ch2 & 0x80) == 0) { + /* sv1log("NOT_EUC1[%x][%x]\n",ch1,ch2); */ + return 0; + } + if (non94(ch1 & 0x7F) || non94(ch2 & 0x7F)) { + /* sv1log("NOT_EUC2[%x][%x]\n",ch1,ch2); */ + return 0; + } + cp++; + } + } + return 1; +} + +void TO_SJIS(const unsigned char *arg, + unsigned char *sjis) +{ + unsigned char *euc; + + euc = typeMallocn(unsigned char, strlen((const char *) arg) + 1); + +#ifdef CJK_EX + if (!euc) + outofmem(__FILE__, "TO_SJIS"); +#endif + TO_EUC(arg, euc); + if (is_EUC_JP(euc)) + EUC_TO_SJIS(euc, sjis); + else + strcpy((char *) sjis, (const char *) arg); + free(euc); +} + +void TO_JIS(const unsigned char *arg, + unsigned char *jis) +{ + unsigned char *euc; + + if (arg[0] == 0) { + jis[0] = 0; + return; + } + euc = typeMallocn(unsigned char, strlen((const char *)arg) + 1); +#ifdef CJK_EX + if (!euc) + outofmem(__FILE__, "TO_JIS"); +#endif + TO_EUC(arg, euc); + is_EUC_JP(euc); + EUC_TO_JIS(euc, jis, TO_KANJI, TO_ASCII); + + free(euc); +} diff --git a/WWW/Library/Implementation/SGML.h b/WWW/Library/Implementation/SGML.h new file mode 100644 index 00000000..9fccdda2 --- /dev/null +++ b/WWW/Library/Implementation/SGML.h @@ -0,0 +1,286 @@ +/* + * $LynxId: SGML.h,v 1.46 2012/02/10 18:32:26 tom Exp $ + * SGML parse and stream definition for libwww + * SGML AND STRUCTURED STREAMS + * + * The SGML parser is a state machine. It is called for every character + * of the input stream. The DTD data structure contains pointers + * to functions which are called to implement the actual effect of the + * text read. When these functions are called, the attribute structures pointed to by the + * DTD are valid, and the function is passed a pointer to the current tag structure, and an + * "element stack" which represents the state of nesting within SGML elements. + * + * The following aspects are from Dan Connolly's suggestions: Binary search, + * Structured object scheme basically, SGML content enum type. + * + * (c) Copyright CERN 1991 - See Copyright.html + * + */ +#ifndef SGML_H +#define SGML_H + +#include <HTStream.h> +#include <HTAnchor.h> +#include <LYJustify.h> + +#ifdef __cplusplus +extern "C" { +#endif +/* + * + * SGML content types + * + */ typedef enum { + SGML_EMPTY, /* No content. */ + SGML_LITTERAL, /* Literal character data. Recognize exact close tag only. + Old www server compatibility only! Not SGML */ + SGML_CDATA, /* Character data. Recognize </ only. + (But we treat it just as SGML_LITTERAL.) */ + SGML_SCRIPT, /* Like CDATA, but allow it to be a comment */ + SGML_RCDATA, /* Replaceable character data. Should recognize </ and &ref; + (but we treat it like SGML_MIXED for old times' sake). */ + SGML_MIXED, /* Elements and parsed character data. + Recognize all markup. */ + SGML_ELEMENT, /* Any data found should be regarded as an error. + (But we treat it just like SGML_MIXED.) */ + SGML_PCDATA /* Should contain no elements but &ref; is parsed. + (We treat it like SGML_CDATA wrt. contained tags + i.e. pass them on literally, i.e. like we should + treat SGML_RCDATA) (added by KW). */ + } SGMLContent; + + typedef struct { + const char *name; /* The name of the attribute */ +#ifdef USE_PRETTYSRC + char type; /* code of the type of the attribute. Code + values are in HTMLDTD.h */ +#endif + } attr; + + typedef const attr *AttrList; + + typedef struct { + const char *name; + AttrList list; + } AttrType; + + typedef int TagClass; + + /* textflow */ +#define Tgc_FONTlike 0x00001 /* S,STRIKE,I,B,TT,U,BIG,SMALL,STYLE,BLINK;BR,TAB */ +#define Tgc_EMlike 0x00002 /* EM,STRONG,DFN,CODE,SAMP,KBD,VAR,CITE,Q,INS,DEL,SPAN,.. */ +#define Tgc_MATHlike 0x00004 /* SUB,SUP,MATH,COMMENT */ +#define Tgc_Alike 0x00008 /* A */ +#define Tgc_formula 0x00010 /* not used until math is supported better... */ + /* used for special structures: forms, tables,... */ +#define Tgc_TRlike 0x00020 /* TR and similar */ +#define Tgc_SELECTlike 0x00040 /* SELECT,INPUT,TEXTAREA(,...) */ + /* structure */ +#define Tgc_FORMlike 0x00080 /* FORM itself */ +#define Tgc_Plike 0x00100 /* P,H1..H6,... structures containing text or + insertion but not other structures */ +#define Tgc_DIVlike 0x00200 /* ADDRESS,FIG,BDO,NOTE,FN,DIV,CENTER;FIG + structures which can contain other structures */ +#define Tgc_LIlike 0x00400 /* LH,LI,DT,DD;TH,TD structure-like, only valid + within certain other structures */ +#define Tgc_ULlike 0x00800 /* UL,OL,DL,DIR,MENU;TABLE;XMP,LISTING + special in some way, cannot contain (parsed) + text directly */ + /* insertions */ +#define Tgc_BRlike 0x01000 /* BR,IMG,TAB allowed in any text */ +#define Tgc_APPLETlike 0x02000 /* APPLET,OBJECT,EMBED,SCRIPT;BUTTON */ +#define Tgc_HRlike 0x04000 /* HR,MARQUEE can contain all kinds of things + and/or are not allowed (?) in running text */ +#define Tgc_MAPlike 0x08000 /* MAP,AREA some specials that never contain + (directly or indirectly) other things than + special insertions */ +#define Tgc_outer 0x10000 /* HTML,FRAMESET,FRAME,PLAINTEXT; */ +#define Tgc_BODYlike 0x20000 /* BODY,BODYTEXT,NOFRAMES,TEXTFLOW; */ +#define Tgc_HEADstuff 0x40000 /* HEAD,BASE,STYLE,TITLE; */ + /* special relations */ +#define Tgc_same 0x80000 + +/* + * Groups for contains-data. + */ +#define Tgc_INLINElike (Tgc_Alike | Tgc_APPLETlike | Tgc_BRlike | Tgc_EMlike | Tgc_FONTlike | Tgc_SELECTlike) +#define Tgc_LISTlike (Tgc_LIlike | Tgc_ULlike) +#define Tgc_BLOCKlike (Tgc_DIVlike | Tgc_LISTlike) + +/* Some more properties of tags (or rather, elements) and rules how + to deal with them. - kw */ + typedef int TagFlags; + +#define Tgf_endO 0x00001 /* end tag can be Omitted */ +#define Tgf_startO 0x00002 /* start tag can be Omitted */ +#define Tgf_mafse 0x00004 /* Make Attribute-Free Start-tag End instead + (if found invalid) */ +#define Tgf_strict 0x00008 /* Ignore contained invalid elements, + don't pass them on; or other variant + handling for some content types */ +#define Tgf_nreie 0x00010 /* Not Really Empty If Empty, + used by color style code */ +#define Tgf_frecyc 0x00020 /* Pass element content on in a form that + allows recycling, i.e. don't translate to + output (display) character set yet (treat + content similar to attribute values) */ +#define Tgf_nolyspcl 0x00040 /* Don't generate lynx special characters + for soft hyphen and various spaces (nbsp, + ensp,..) */ + +/* A tag structure describes an SGML element. + * ----------------------------------------- + * + * + * name is the string which comes after the tag opener "<". + * + * attributes points to a zero-terminated array + * of attribute names. + * + * litteral determines how the SGML engine parses the characters + * within the element. If set, tag openers are ignored + * except for that which opens a matching closing tag. + * + */ + typedef struct _tag HTTag; + struct _tag { + const char *name; /* The name of the tag */ +#ifdef USE_COLOR_STYLE + unsigned name_len; /* The length of the name */ +#endif +#ifdef USE_JUSTIFY_ELTS + BOOL can_justify; /* justification allowed? */ +#endif + AttrList attributes; /* The list of acceptable attributes */ + int number_of_attributes; /* Number of possible attributes */ + const AttrType *attr_types; + SGMLContent contents; /* End only on end tag @@ */ + TagClass tagclass; + TagClass contains; /* which classes of elements this one can contain directly */ + TagClass icontains; /* which classes of elements this one can contain indirectly */ + TagClass contained; /* in which classes can this tag be contained ? */ + TagClass icontained; /* in which classes can this tag be indirectly contained ? */ + TagClass canclose; /* which classes of elements can this one close + if something looks wrong ? */ + TagFlags flags; + }; + +/* DTD Information + * --------------- + * + * Not the whole DTD, but all this parser uses of it. + */ + typedef struct { + HTTag *tags; /* Must be in strcmp order by name */ + int number_of_tags; + STRING2PTR entity_names; /* Must be in strcmp order by name */ + size_t number_of_entities; + /* "entity_names" table probably unused, + * see comments in HTMLDTD.c near the top + */ + } SGML_dtd; + +/* SGML context passed to parsers +*/ + typedef struct _HTSGMLContext *HTSGMLContext; /* Hidden */ + +/*__________________________________________________________________________ +*/ + +/* + +Structured Object definition + + A structured object is something which can reasonably be represented + in SGML. I'll rephrase that. A structured object is an ordered + tree-structured arrangement of data which is representable as text. + The SGML parser outputs to a Structured object. A Structured object + can output its contents to another Structured Object. It's a kind of + typed stream. The architecture is largely Dan Conolly's. Elements and + entities are passed to the sob by number, implying a knowledge of the + DTD. Knowledge of the SGML syntax is not here, though. + + Superclass: HTStream + + The creation methods will vary on the type of Structured Object. + Maybe the callerData is enough info to pass along. + + */ + typedef struct _HTStructured HTStructured; + + typedef struct _HTStructuredClass { + + const char *name; /* Just for diagnostics */ + + void (*_free) (HTStructured * me); + + void (*_abort) (HTStructured * me, HTError e); + + void (*put_character) (HTStructured * me, int ch); + + void (*put_string) (HTStructured * me, const char *str); + + void (*put_block) (HTStructured * me, const char *str, int len); + + /* HTStreamClass ends here */ + + int (*start_element) (HTStructured * me, int element_number, + const BOOL *attribute_present, + STRING2PTR attribute_value, + int charset, + char **include); + + int (*end_element) (HTStructured * me, int element_number, + char **include); + + int (*put_entity) (HTStructured * me, int entity_number); + + } HTStructuredClass; + +/* + Equivalents to the following functions possibly could be generalised + into additional HTStructuredClass members. For now they don't do + anything target-specific. - kw + */ + extern BOOLEAN LYCheckForCSI(HTParentAnchor *anchor, char **url); + extern void LYDoCSI(char *url, const char *comment, char **csi); + extern BOOLEAN LYCommentHacks(HTParentAnchor *anchor, const char *comment); + +/* + +Find a Tag by Name + + Returns a pointer to the tag within the DTD. + + */ + extern HTTag *SGMLFindTag(const SGML_dtd * dtd, + const char *string); + +/* + * Return the current offset within the file that SGML is parsing + */ + extern int SGML_offset(void); + +/* + +Create an SGML parser + + */ +/* + * On entry, + * dtd must point to a DTD structure as defined above + * callbacks must point to user routines. + * callData is returned in callbacks transparently. + * On exit, + * The default tag starter has been processed. + */ + extern HTStream *SGML_new(const SGML_dtd * dtd, + HTParentAnchor *anchor, + HTStructured * target); + + extern const HTStreamClass SGMLParser; + +#ifdef __cplusplus +} +#endif +#endif /* SGML_H */ diff --git a/WWW/Library/Implementation/UCAux.h b/WWW/Library/Implementation/UCAux.h new file mode 100644 index 00000000..e0b93e60 --- /dev/null +++ b/WWW/Library/Implementation/UCAux.h @@ -0,0 +1,77 @@ +/* + * $LynxId: UCAux.h,v 1.18 2010/11/07 21:20:59 tom Exp $ + */ +#ifndef UCAUX_H +#define UCAUX_H + +#ifndef HTUTILS_H +#include <HTUtils.h> +#endif + +#ifndef UCDEFS_H +#include <UCDefs.h> +#endif /* UCDEFS_H */ + +#ifndef HTSTREAM_H +#include <HTStream.h> +#endif /* HTSTREAM_H */ + +#ifndef UCMAP_H +#include <UCMap.h> +#endif /* UCMAP_H */ + +#ifdef __cplusplus +extern "C" { +#endif + extern BOOL UCCanUniTranslateFrom(int from); + extern BOOL UCCanTranslateUniTo(int to); + extern BOOL UCCanTranslateFromTo(int from, int to); + extern BOOL UCNeedNotTranslate(int from, + int to); + + struct _UCTransParams { + BOOL transp; + BOOL do_cjk; + BOOL decode_utf8; + BOOL output_utf8; + BOOL use_raw_char_in; + BOOL strip_raw_char_in; + BOOL pass_160_173_raw; + BOOL do_8bitraw; + BOOL trans_to_uni; + BOOL trans_C0_to_uni; + BOOL repl_translated_C0; + BOOL trans_from_uni; + int ucs_mode; + }; + typedef struct _UCTransParams UCTransParams; + + extern void UCSetTransParams(UCTransParams * pT, int cs_in, + const LYUCcharset *p_in, + int cs_out, + const LYUCcharset *p_out); + + extern void UCTransParams_clear(UCTransParams * pT); + + extern void UCSetBoxChars(int cset, + int *pvert_out, + int *phori_out, + int vert_in, + int hori_in); + + typedef void putc_func_t (HTStream *me, + int ch); + + extern BOOL UCPutUtf8_charstring(HTStream *target, + putc_func_t *actions, + UCode_t code); + + extern BOOL UCConvertUniToUtf8(UCode_t code, + char *buffer); + + extern UCode_t UCGetUniFromUtf8String(char **ppuni); + +#ifdef __cplusplus +} +#endif +#endif /* UCAUX_H */ diff --git a/WWW/Library/Implementation/UCDefs.h b/WWW/Library/Implementation/UCDefs.h new file mode 100644 index 00000000..7555beaa --- /dev/null +++ b/WWW/Library/Implementation/UCDefs.h @@ -0,0 +1,106 @@ +/* + * $LynxId: UCDefs.h,v 1.17 2009/03/10 20:02:44 tom Exp $ + * + * Definitions for Unicode character-translations + */ + +#ifndef UCDEFS_H +#define UCDEFS_H + +#ifndef HTUTILS_H +#include <HTUtils.h> +#endif + +typedef struct _LYUCcharset { + int UChndl; /* -1 for "old" charsets, >= 0 for chartrans tables */ + + const char *MIMEname; + int enc; + int codepage; /* if positive, an IBM OS/2 specific number; + if negative, flag for no table translation */ + + /* parameters below are not used by chartrans mechanism, */ + /* they describe some relationships against built-in Latin1 charset... */ + int repertoire; /* unused */ + int codepoints; /* subset/superset of Latin1 ? */ + int cpranges; /* unused, obsolete by LYlowest_eightbit; + "which ranges have valid displayable chars + (including nbsp and shy)" */ + int like8859; /* currently used for nbsp and shy only + (but UCT_R_8859SPECL assumed for any UCT_R_8BIT...); + "for which ranges is it like 8859-1" */ +} LYUCcharset; + +typedef enum { + UCT_ENC_7BIT, + UCT_ENC_8BIT, + UCT_ENC_8859, /* no displayable chars in 0x80-0x9F */ + UCT_ENC_8BIT_C0, /* 8-bit + some chars in C0 control area */ + UCT_ENC_MAYBE2022, + UCT_ENC_CJK, + UCT_ENC_16BIT, + UCT_ENC_UTF8 +} eUCT_ENC; + +#define UCT_REP_SUBSETOF_LAT1 0x01 +#define UCT_REP_SUPERSETOF_LAT1 0x02 +#define UCT_REP_IS_LAT1 UCT_REP_SUBSETOF_LAT1 | UCT_REP_SUPERSETOF_LAT1 +/* + * Assume everything we deal with is included in the UCS2 reperoire, + * so a flag for _REP_SUBSETOF_UCS2 would be redundant. + */ + +/* + * More general description how the code points relate to 8859-1 and UCS: + */ +#define UCT_CP_SUBSETOF_LAT1 0x01 /* implies UCT_CP_SUBSETOF_UCS2 */ +#define UCT_CP_SUPERSETOF_LAT1 0x02 +#define UCT_CP_SUBSETOF_UCS2 0x04 + +#define UCT_CP_IS_LAT1 UCT_CP_SUBSETOF_LAT1 | UCT_CP_SUPERSETOF_LAT1 + +/* + * More specific bitflags for practically important code point ranges: + */ +#define UCT_R_LOWCTRL 0x08 /* 0x00-0x1F, for completeness */ +#define UCT_R_7BITINV 0x10 /* invariant???, displayable 7bit chars */ +#define UCT_R_7BITNAT 0x20 /* displayable 7bit, national??? */ +#define UCT_R_HIGHCTRL 0x40 /* chars in 0x80-0x9F range */ +#define UCT_R_8859SPECL 0x80 /* special chars in 8859-x sets: nbsp and shy */ +#define UCT_R_HIGH8BIT 0x100 /* rest of 0xA0-0xFF range */ + +#define UCT_R_ASCII UCT_R_7BITINV | UCT_R_7BITNAT /* displayable US-ASCII */ +#define UCT_R_LAT1 UCT_R_ASCII | UCT_R_8859SPECL | UCT_R_HIGH8BIT +#define UCT_R_8BIT UCT_R_LAT1 | UCT_R_HIGHCTRL /* full 8bit range */ + +/* + * For the following some comments are in HTAnchor.c. + */ +typedef enum { + UCT_STAGE_MIME, + UCT_STAGE_PARSER, /* What the parser (SGML.c) gets to see */ + UCT_STAGE_STRUCTURED, /* What the structured stream (HTML) gets fed */ + UCT_STAGE_HTEXT, /* What gets fed to the HText_* functions */ + UCT_STAGEMAX +} eUCT_STAGE; + +typedef enum { + UCT_SETBY_NONE, + UCT_SETBY_DEFAULT, + UCT_SETBY_LINK, /* set by A or LINK CHARSET= hint */ + UCT_SETBY_STRUCTURED, /* structured stream stage (HTML.c) */ + UCT_SETBY_PARSER, /* set by SGML parser or similar */ + UCT_SETBY_MIME /* set explicitly by MIME charset parameter */ +} eUCT_SETBY; + +typedef struct _UCStageInfo { + int lock; /* by what it has been set */ + int LYhndl; + LYUCcharset C; +} UCStageInfo; + +typedef struct _UCAnchorInfo { + struct _UCStageInfo s[UCT_STAGEMAX]; +} UCAnchorInfo; + +#endif /* UCDEFS_H */ diff --git a/WWW/Library/Implementation/UCMap.h b/WWW/Library/Implementation/UCMap.h new file mode 100644 index 00000000..b55fa6bd --- /dev/null +++ b/WWW/Library/Implementation/UCMap.h @@ -0,0 +1,110 @@ +/* + * $LynxId: UCMap.h,v 1.27 2011/12/01 02:00:57 tom Exp $ + */ +#ifndef UCMAP_H +#define UCMAP_H + +#ifndef HTUTILS_H +#include <HTUtils.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + typedef enum { + ucError = -1, + ucZeroWidth = -2, + ucInvalidHash = -3, + ucNotFound = -4, + ucNeedMore = -10, + ucCannotConvert = -11, + ucCannotOutput = -12, + ucBufferTooSmall = -13, + ucUnknown = -14 + } UCStatus; + + typedef long UCode_t; + + extern BOOL UCScanCode(UCode_t *, const char *, BOOL); + + extern int UCTransUniChar(UCode_t unicode, + int charset_out); + extern int UCTransUniCharStr(char *outbuf, + int buflen, + UCode_t unicode, + int charset_out, + int chk_single_flag); + extern int UCTransChar(int ch_in, + int charset_in, + int charset_out); + extern int UCReverseTransChar(int ch_out, + int charset_in, + int charset_out); + extern int UCTransCharStr(char *outbuf, + int buflen, + int ch_in, + int charset_in, + int charset_out, + int chk_single_flag); +#ifdef EXP_JAPANESEUTF8_SUPPORT + extern UCode_t UCTransJPToUni(char *inbuf, + int buflen, + int charset_in); +#endif + extern UCode_t UCTransToUni(int ch_in, + int charset_in); + extern int UCGetRawUniMode_byLYhndl(int i); + extern int UCGetLYhndl_byMIME(const char *p); /* returns -1 if name not recognized */ + extern int safeUCGetLYhndl_byMIME(const char *p); /* returns LATIN1 if name not recognized */ + +#ifdef USE_LOCALE_CHARSET + extern void LYFindLocaleCharset(void); +#endif + + extern int UCLYhndl_for_unspec; + extern int UCLYhndl_for_unrec; + extern int UCLYhndl_HTFile_for_unspec; + extern int UCLYhndl_HTFile_for_unrec; + +/* easy to type: */ + extern int LATIN1; /* UCGetLYhndl_byMIME("iso-8859-1") */ + extern int US_ASCII; /* UCGetLYhndl_byMIME("us-ascii") */ + extern int UTF8_handle; /* UCGetLYhndl_byMIME("utf-8") */ + +#undef TRANSPARENT /* defined on Solaris in <sys/stream.h> */ + extern int TRANSPARENT; /* UCGetLYhndl_byMIME("x-transparent") */ + +/* +In general, Lynx translates letters from document charset to display charset. +If document charset is not specified or not recognized by Lynx, we fall back +to different assumptions below, read also lynx.cfg for info. + +UCLYhndl_for_unspec - assume this as charset for documents that don't + specify a charset parameter in HTTP headers or via META + this corresponds to "assume_charset" + +UCLYhndl_HTFile_for_unspec - assume this as charset of local file + this corresponds to "assume_local_charset" + +UCLYhndl_for_unrec - in case a charset parameter is not recognized; + this corresponds to "assume_unrec_charset" + +UCLYhndl_HTFile_for_unrec - the same but only for local files, + currently not used. + +current_char_set - this corresponds to "display charset", + declared in LYCharSets.c and really important. + +All external charset information is available in so called MIME format. +For internal needs Lynx uses charset handlers as integers +from UCGetLYhndl_byMIME(). However, there is no way to recover +from user's error in configuration file lynx.cfg or command line switches, +those unrecognized MIME names are assumed as LATIN1 (via safeUCGetLYhndl...). +*/ + +#define UCTRANS_NOTFOUND (-4) + +#ifdef __cplusplus +} +#endif +#endif /* UCMAP_H */ diff --git a/WWW/Library/Implementation/Version.make b/WWW/Library/Implementation/Version.make new file mode 100644 index 00000000..4b4b380f --- /dev/null +++ b/WWW/Library/Implementation/Version.make @@ -0,0 +1 @@ +VC = 2.14 diff --git a/WWW/Library/Implementation/dtd_util.c b/WWW/Library/Implementation/dtd_util.c new file mode 100644 index 00000000..4e18fdf7 --- /dev/null +++ b/WWW/Library/Implementation/dtd_util.c @@ -0,0 +1,1405 @@ +/* + * $LynxId: dtd_util.c,v 1.76 2010/09/25 00:30:23 tom Exp $ + * + * Given a SGML_dtd structure, write a corresponding flat file, or "C" source. + * Given the flat-file, write the "C" source. + * + * TODO: use symbols for HTMLA_NORMAL, etc. + */ + +#include <HTUtils.h> +#include <HTMLDTD.h> +#include <string.h> + +/* + * Tweaks to build standalone. + */ +#undef exit + +BOOLEAN WWW_TraceFlag = FALSE; +FILE *TraceFP(void) +{ + return stderr; +} + +/* + * Begin the actual utility. + */ +#define GETOPT "chl:o:ts" + +#define NOTE(message) fprintf(output, message "\n"); +/* *INDENT-OFF* */ +#ifdef USE_PRETTYSRC +# define N HTMLA_NORMAL +# define i HTMLA_ANAME +# define h HTMLA_HREF +# define c HTMLA_CLASS +# define x HTMLA_AUXCLASS +# define T(t) , t +#else +# define T(t) /*nothing */ +#endif + +#define ATTR_TYPE(name) { #name, name##_attr_list } + +static const attr core_attr_list[] = { + { "CLASS" T(c) }, + { "ID" T(i) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr i18n_attr_list[] = { + { "DIR" T(N) }, + { "LANG" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr events_attr_list[] = { + { "ONCLICK" T(N) }, + { "ONDBLCLICK" T(N) }, + { "ONKEYDOWN" T(N) }, + { "ONKEYPRESS" T(N) }, + { "ONKEYUP" T(N) }, + { "ONMOUSEDOWN" T(N) }, + { "ONMOUSEMOVE" T(N) }, + { "ONMOUSEOUT" T(N) }, + { "ONMOUSEOVER" T(N) }, + { "ONMOUSEUP" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr align_attr_list[] = { + { "ALIGN" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr cellalign_attr_list[] = { + { "ALIGN" T(N) }, + { "CHAR" T(N) }, + { "CHAROFF" T(N) }, + { "VALIGN" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr bgcolor_attr_list[] = { + { "BGCOLOR" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +#undef T +/* *INDENT-ON* */ + +static void failed(const char *s) +{ + perror(s); + exit(EXIT_FAILURE); +} + +static void usage(void) +{ + static const char *tbl[] = + { + "Usage: dtd_util [options]", + "", + "Options:", + " -c generate C-source" + " -h generate C-header" + " -l load", + " -o filename specify output (default: stdout)", + " -s strict (HTML DTD 0)", + " -t tagsoup (HTML DTD 1)", + }; + unsigned n; + + for (n = 0; n < TABLESIZE(tbl); ++n) { + fprintf(stderr, "%s\n", tbl[n]); + } + exit(EXIT_FAILURE); +} + +static const char *SGMLContent2s(SGMLContent contents) +{ + char *value = "?"; + + switch (contents) { + case SGML_EMPTY: + value = "SGML_EMPTY"; + break; + case SGML_LITTERAL: + value = "SGML_LITTERAL"; + break; + case SGML_CDATA: + value = "SGML_CDATA"; + break; + case SGML_SCRIPT: + value = "SGML_SCRIPT"; + break; + case SGML_RCDATA: + value = "SGML_RCDATA"; + break; + case SGML_MIXED: + value = "SGML_MIXED"; + break; + case SGML_ELEMENT: + value = "SGML_ELEMENT"; + break; + case SGML_PCDATA: + value = "SGML_PCDATA"; + break; + } + return value; +} + +static SGMLContent s2SGMLContent(const char *value) +{ + static SGMLContent table[] = + { + SGML_EMPTY, + SGML_LITTERAL, + SGML_CDATA, + SGML_SCRIPT, + SGML_RCDATA, + SGML_MIXED, + SGML_ELEMENT, + SGML_PCDATA + }; + unsigned n; + SGMLContent result = SGML_EMPTY; + + for (n = 0; n < TABLESIZE(table); ++n) { + if (!strcmp(SGMLContent2s(table[n]), value)) { + result = table[n]; + break; + } + } + return result; +} + +static void PrintF(FILE *, int, const char *,...) GCC_PRINTFLIKE(3, 4); + +static void PrintF(FILE *output, int width, const char *fmt,...) +{ + char buffer[BUFSIZ]; + va_list ap; + + va_start(ap, fmt); + vsprintf(buffer, fmt, ap); + va_end(ap); + + fprintf(output, "%-*s", width, buffer); +} + +static int same_AttrList(AttrList a, AttrList b) +{ + int result = 1; + + if (a && b) { + while (a->name && b->name) { + if (strcmp(a->name, b->name)) { + result = 0; + break; + } + ++a, ++b; + } + if (a->name || b->name) + result = 0; + } else { + result = 0; + } + return result; +} + +static int first_attrs(const SGML_dtd * dtd, int which) +{ + int check; + int result = TRUE; + + for (check = 0; check < which; ++check) { + if (dtd->tags[check].attributes == dtd->tags[which].attributes) { + result = FALSE; + break; + } else if (same_AttrList(dtd->tags[check].attributes, + dtd->tags[which].attributes)) { + result = FALSE; + dtd->tags[which].attributes = dtd->tags[check].attributes; + break; + } + } + return result; +} + +static char *no_dashes(char *target, const char *source) +{ + int j; + + for (j = 0; (target[j] = source[j]) != '\0'; ++j) { + if (!isalnum(target[j])) + target[j] = '_'; + } + return target; +} + +/* the second "OBJECT" is treated specially */ +static int first_object(const SGML_dtd * dtd, int which) +{ + int check; + + for (check = 0; check <= which; ++check) { + if (!strcmp(dtd->tags[check].name, "OBJECT")) + break; + } + return (check == which); +} + +static const char *NameOfAttrs(const SGML_dtd * dtd, int which) +{ + int check; + const char *result = dtd->tags[which].name; + + for (check = 0; check < which; ++check) { + if (dtd->tags[check].attributes == dtd->tags[which].attributes) { + result = dtd->tags[check].name; + break; + } + } + /* special cases to match existing headers */ + if (!strcmp(result, "ABBR")) + result = "GEN"; + else if (!strcmp(result, "BLOCKQUOTE")) + result = "BQ"; + else if (!strcmp(result, "BASEFONT")) + result = "FONT"; + else if (!strcmp(result, "CENTER")) + result = "DIV"; + else if (!strcmp(result, "DIR")) + result = "UL"; + else if (!strcmp(result, "H1")) + result = "H"; + else if (!strcmp(result, "TBODY")) + result = "TR"; + return result; +} + +static const char *DEF_name(const SGML_dtd * dtd, int which) +{ + const char *result = dtd->tags[which].name; + + if (!strcmp(result, "OBJECT") && !first_object(dtd, which)) + result = "OBJECT_PCDATA"; + return result; +} + +typedef struct { + const char *name; + const attr *attrs; + int count; + int which; +} AttrInfo; + +static int compare_attr_types(const void *a, const void *b) +{ + const AttrType *p = (const AttrType *) a; + const AttrType *q = (const AttrType *) b; + int result = 0; + + /* keep lowercase AttrType lists before uppercase, since latter are derived */ + if (isupper(p->name[0]) ^ isupper(q->name[0])) { + if (isupper(p->name[0])) { + result = 1; + } else { + result = -1; + } + } else { + result = strcmp(p->name, q->name); + } + return result; +} + +static int len_AttrTypes(const AttrType * data) +{ + int result = 0; + + for (result = 0; data[result].name != 0; ++result) { + ; + } + return result; +} + +static AttrType *sorted_AttrTypes(const AttrType * source) +{ + AttrType *result = 0; + unsigned number = len_AttrTypes(source); + + if (number != 0) { + result = typecallocn(AttrType, number + 1); + if (result != 0) { + MemCpy(result, source, number * sizeof(*result)); + qsort(result, number, sizeof(*result), compare_attr_types); + } + } + + return result; +} + +static int compare_attr(const void *a, const void *b) +{ + const AttrInfo *p = (const AttrInfo *) a; + const AttrInfo *q = (const AttrInfo *) b; + + return strcmp(p->name, q->name); +} + +static int len_AttrList(AttrList data) +{ + int result = 0; + + for (result = 0; data[result].name != 0; ++result) { + ; + } + return result; +} + +static void sort_uniq_AttrList(attr * data) +{ + unsigned have = len_AttrList(data); + unsigned j, k; + + qsort(data, have, sizeof(*data), compare_attr); + /* + * Eliminate duplicates + */ + for (j = 0; j < have; ++j) { + for (k = j; data[k].name; ++k) { + if (data[k + 1].name == 0) + break; + if (strcmp(data[j].name, data[k + 1].name)) { + break; + } + } + data[j] = data[k]; + } + memset(data + j, 0, sizeof(data[0])); +} + +static attr *copy_AttrList(AttrList data) +{ + unsigned need = len_AttrList(data); + unsigned n; + + attr *result = (attr *) calloc(need + 1, sizeof(attr)); + + for (n = 0; n < need; ++n) + result[n] = data[n]; + sort_uniq_AttrList(result); + return result; +} + +static attr *merge_AttrLists(const AttrType * data) +{ + const AttrType *at; + attr *result = 0; + unsigned need = 1; + unsigned have = 0; + unsigned j; + + for (at = data; at->name; ++at) { + need += len_AttrList(at->list); + } + result = (attr *) calloc(need + 1, sizeof(attr)); + for (at = data; at->name; ++at) { + if (!strcmp(at->name, "events")) { + ; /* lynx does not use events */ + } else { + for (j = 0; at->list[j].name; ++j) { + result[have++] = at->list[j]; + } + } + } + sort_uniq_AttrList(result); + return result; +} + +static int clean_AttrList(attr * target, AttrList source) +{ + int result = 0; + int j, k; + + for (j = 0; target[j].name != 0; ++j) { + for (k = 0; source[k].name != 0; ++k) { + if (!strcmp(target[j].name, source[k].name)) { + k = j--; + for (;;) { + target[k] = target[k + 1]; + if (target[k++].name == 0) + break; + } + ++result; + break; + } + } + } + return result; +} + +/* + * Actually COUNT the number of attributes, to make it possible to edit a + * attribute-table in src0_HTMLDTD.h and have all of the files updated by + * just doing a "make sources". + */ +static int AttrCount(HTTag * tag) +{ + return len_AttrList(tag->attributes); +} + +static AttrInfo *sorted_attrs(const SGML_dtd * dtd, unsigned *countp) +{ + int j; + + AttrInfo *data = (AttrInfo *) calloc(dtd->number_of_tags, sizeof(AttrInfo)); + unsigned count = 0; + + /* get the attribute-data */ + for (j = 0; j < dtd->number_of_tags; ++j) { + if (first_attrs(dtd, j)) { + data[count].name = NameOfAttrs(dtd, j); + data[count].attrs = dtd->tags[j].attributes; + data[count].count = AttrCount(&(dtd->tags[j])); + data[count].which = j; + ++count; + } + } + /* sort the data by the name of their associated tag */ + qsort(data, count, sizeof(*data), compare_attr); + *countp = count; + return data; +} + +static void dump_src_HTTag_Defines(FILE *output, const SGML_dtd * dtd, int which) +{ + HTTag *tag = &(dtd->tags[which]); + +#define myFMT "0x%05X" + fprintf(output, + "#define T_%-13s " + myFMT "," myFMT "," myFMT "," myFMT "," myFMT "," myFMT + "," myFMT "\n", + DEF_name(dtd, which), + tag->tagclass, + tag->contains, + tag->icontains, + tag->contained, + tag->icontained, + tag->canclose, + tag->flags); +} + +static void dump_AttrItem(FILE *output, const attr * data) +{ + char buffer[BUFSIZ]; + char pretty = 'N'; + + sprintf(buffer, "\"%s\"", data->name); +#ifdef USE_PRETTYSRC + switch (data->type) { + case HTMLA_NORMAL: + pretty = 'N'; + break; + case HTMLA_ANAME: + pretty = 'i'; + break; + case HTMLA_HREF: + pretty = 'h'; + break; + case HTMLA_CLASS: + pretty = 'c'; + break; + case HTMLA_AUXCLASS: + pretty = 'x'; + break; + } +#endif + fprintf(output, "\t{ %-15s T(%c) },\n", buffer, pretty); +} + +static void dump_AttrItem0(FILE *output) +{ + fprintf(output, "\t{ 0 T(N) }\t/* Terminate list */\n"); +} + +static void dump_src_AttrType(FILE *output, const char *name, AttrList data, const char **from) +{ + int n; + + fprintf(output, "static const attr %s_attr_list[] = {\n", name); + if (data != 0) { + for (n = 0; data[n].name != 0; ++n) { + dump_AttrItem(output, data + n); + } + } + fprintf(output, "\t{ 0 T(N) } /* Terminate list */\n"); + fprintf(output, "};\n"); + NOTE(""); + fprintf(output, "static const AttrType %s_attr_type[] = {\n", name); + if (from != 0) { + while (*from != 0) { + fprintf(output, "\t{ ATTR_TYPE(%s) },\n", *from); + ++from; + } + } else { + fprintf(output, "\t{ ATTR_TYPE(%s) },\n", name); + } + fprintf(output, "\t{ 0, 0 },\n"); + fprintf(output, "};\n"); + NOTE(""); +} + +static void dump_src_HTTag_Attrs(FILE *output, const SGML_dtd * dtd, int which) +{ + HTTag *tag = &(dtd->tags[which]); + attr *list = merge_AttrLists(tag->attr_types); + char buffer[BUFSIZ]; + int n; + int limit = len_AttrList(list); + + sprintf(buffer, "static const attr %s_attr[] = {", NameOfAttrs(dtd, which)); + fprintf(output, + "%-40s/* %s attributes */\n", buffer, tag->name); + for (n = 0; n < limit; ++n) { + dump_AttrItem(output, list + n); + } + dump_AttrItem0(output); + fprintf(output, "};\n"); + NOTE(""); + free(list); +} + +static void dump_src_HTTag(FILE *output, const SGML_dtd * dtd, int which) +{ + HTTag *tag = &(dtd->tags[which]); + char *P_macro = "P"; + +#ifdef USE_JUSTIFY_ELTS + if (!tag->can_justify) + P_macro = "P0"; +#endif + PrintF(output, 19, " { %s(%s),", P_macro, tag->name); + PrintF(output, 24, "ATTR_DATA(%s), ", NameOfAttrs(dtd, which)); + PrintF(output, 14, "%s,", SGMLContent2s(tag->contents)); + fprintf(output, "T_%s", DEF_name(dtd, which)); + fprintf(output, "},\n"); +} + +static void dump_source(FILE *output, const SGML_dtd * dtd, int dtd_version) +{ + static AttrType generic_types[] = + { + ATTR_TYPE(core), + ATTR_TYPE(i18n), + ATTR_TYPE(events), + ATTR_TYPE(align), + ATTR_TYPE(cellalign), + ATTR_TYPE(bgcolor), + {0, 0} + }; + AttrType *gt; + + const char *marker = "src_HTMLDTD_H"; + int j; + + unsigned count = 0; + AttrInfo *data = sorted_attrs(dtd, &count); + + fprintf(output, "/* %cLynxId%c */\n", '$', '$'); + fprintf(output, "#ifndef %s%d\n", marker, dtd_version); + fprintf(output, "#define %s%d 1\n\n", marker, dtd_version); + + /* + * If we ifdef this for once, and make the table names distinct, we can + * #include the strict- and tagsoup-output directly in HTMLDTD.c + */ + NOTE("#ifndef once_HTMLDTD"); + NOTE("#define once_HTMLDTD 1"); + NOTE(""); + + /* construct TagClass-define's */ + for (j = 0; j <= dtd->number_of_tags; ++j) { + dump_src_HTTag_Defines(output, dtd, j); + } + NOTE("#define T__UNREC_ 0x00000,0x00000,0x00000,0x00000,0x00000,0x00000,0x00000"); + + /* construct attribute-tables */ + NOTE("#ifdef USE_PRETTYSRC"); + NOTE("# define N HTMLA_NORMAL"); + NOTE("# define i HTMLA_ANAME"); + NOTE("# define h HTMLA_HREF"); + NOTE("# define c HTMLA_CLASS"); + NOTE("# define x HTMLA_AUXCLASS"); + NOTE("# define T(t) , t"); + NOTE("#else"); + NOTE("# define T(t) /*nothing */"); + NOTE("#endif"); + NOTE("/* *INDENT-OFF* */"); + NOTE(""); + NOTE("#define ATTR_TYPE(name) #name, name##_attr_list"); + NOTE(""); + NOTE("/* generic attributes, used in different tags */"); + for (gt = generic_types; gt->name != 0; ++gt) { + dump_src_AttrType(output, gt->name, gt->list, 0); + } + NOTE(""); + NOTE("/* tables defining attributes per-tag in terms of generic attributes (editable) */"); + for (j = 0; j < (int) count; ++j) { + int which = data[j].which; + + if (first_attrs(dtd, which)) { + HTTag *tag = &(dtd->tags[which]); + const AttrType *types = tag->attr_types; + const char *name = NameOfAttrs(dtd, which); + attr *list = 0; + const char *from_attr[10]; + int from_size = 0; + + while (types->name != 0) { + from_attr[from_size++] = types->name; + if (!strcmp(types->name, name)) { + list = copy_AttrList(types->list); + for (gt = generic_types; gt->name != 0; ++gt) { + if (clean_AttrList(list, gt->list)) { + int k; + int found = 0; + + for (k = 0; k < from_size; ++k) { + if (!strcmp(from_attr[k], gt->name)) { + found = 1; + break; + } + } + if (!found) + from_attr[from_size++] = gt->name; + break; + } + } + } + ++types; + } + from_attr[from_size] = 0; + + if (list != 0) { + dump_src_AttrType(output, name, list, from_attr); + free(list); + } + } + } + NOTE(""); + NOTE("/* attribute lists for the runtime (generated by dtd_util) */"); + for (j = 0; j < (int) count; ++j) { + dump_src_HTTag_Attrs(output, dtd, data[j].which); + } + NOTE("/* *INDENT-ON* */"); + NOTE(""); + NOTE("/* justification-flags */"); + NOTE("#undef N"); + NOTE("#undef i"); + NOTE("#undef h"); + NOTE("#undef c"); + NOTE("#undef x"); + NOTE(""); + NOTE("#undef T"); + NOTE(""); + NOTE("/* tag-names */"); + for (j = 0; j <= dtd->number_of_tags; ++j) { + fprintf(output, "#undef %s\n", DEF_name(dtd, j)); + } + NOTE(""); + NOTE("/* these definitions are used in the tags-tables */"); + NOTE("#undef P"); + NOTE("#undef P_"); + NOTE("#ifdef USE_COLOR_STYLE"); + NOTE("#define P_(x) #x, (sizeof #x) -1"); + NOTE("#define NULL_HTTag_ NULL, 0"); + NOTE("#else"); + NOTE("#define P_(x) #x"); + NOTE("#define NULL_HTTag_ NULL"); + NOTE("#endif"); + NOTE(""); + NOTE("#ifdef USE_JUSTIFY_ELTS"); + NOTE("#define P(x) P_(x), 1"); + NOTE("#define P0(x) P_(x), 0"); + NOTE("#define NULL_HTTag NULL_HTTag_,0"); + NOTE("#else"); + NOTE("#define P(x) P_(x)"); + NOTE("#define P0(x) P_(x)"); + NOTE("#define NULL_HTTag NULL_HTTag_"); + NOTE("#endif"); + NOTE(""); + NOTE("#define ATTR_DATA(name) name##_attr, HTML_##name##_ATTRIBUTES, name##_attr_type"); + NOTE(""); + NOTE("#endif /* once_HTMLDTD */"); + NOTE("/* *INDENT-OFF* */"); + + /* construct the tags table */ + fprintf(output, + "static const HTTag tags_table%d[HTML_ALL_ELEMENTS] = {\n", + dtd_version); + for (j = 0; j <= dtd->number_of_tags; ++j) { + if (j == dtd->number_of_tags) { + NOTE("/* additional (alternative variants), not counted in HTML_ELEMENTS: */"); + NOTE("/* This one will be used as a temporary substitute within the parser when"); + NOTE(" it has been signalled to parse OBJECT content as MIXED. - kw */"); + } + dump_src_HTTag(output, dtd, j); + } + fprintf(output, "};\n"); + + NOTE("/* *INDENT-ON* */"); + NOTE(""); + fprintf(output, "#endif /* %s%d */\n", marker, dtd_version); + + free(data); +} + +static void dump_hdr_attr(FILE *output, AttrInfo * data) +{ + int j; + char buffer[BUFSIZ]; + + for (j = 0; j < data->count; ++j) { + PrintF(output, 33, "#define HTML_%s_%s", + data->name, + no_dashes(buffer, data->attrs[j].name)); + fprintf(output, "%2d\n", j); + } + PrintF(output, 33, "#define HTML_%s_ATTRIBUTES", data->name); + fprintf(output, "%2d\n", data->count); + fprintf(output, "\n"); +} + +static void dump_header(FILE *output, const SGML_dtd * dtd) +{ + const char *marker = "hdr_HTMLDTD_H"; + int j; + + unsigned count = 0; + AttrInfo *data = sorted_attrs(dtd, &count); + + fprintf(output, "/* %cLynxId%c */\n", '$', '$'); + fprintf(output, "#ifndef %s\n", marker); + fprintf(output, "#define %s 1\n\n", marker); + + NOTE("#ifdef __cplusplus"); + NOTE("extern \"C\" {"); + NOTE("#endif"); + + NOTE("/*"); + NOTE(""); + NOTE(" Element Numbers"); + NOTE(""); + NOTE(" Must Match all tables by element!"); + NOTE(" These include tables in HTMLDTD.c"); + NOTE(" and code in HTML.c."); + NOTE(""); + NOTE(" */"); + + fprintf(output, " typedef enum {\n"); + for (j = 0; j < dtd->number_of_tags; ++j) { + fprintf(output, "\tHTML_%s,\n", dtd->tags[j].name); + } + NOTE("\tHTML_ALT_OBJECT"); + NOTE(" } HTMLElement;\n"); + NOTE("/* Notes: HTML.c uses a different extension of the"); + NOTE(" HTML_ELEMENTS space privately, see"); + NOTE(" HTNestedList.h."); + NOTE(""); + NOTE(" Do NOT replace HTML_ELEMENTS with"); + NOTE(" TABLESIZE(mumble_dtd.tags)."); + NOTE(""); + NOTE(" Keep the following defines in synch with"); + NOTE(" the above enum!"); + NOTE(" */"); + NOTE(""); + NOTE("/* # of elements generally visible to Lynx code */"); + fprintf(output, "#define HTML_ELEMENTS %d\n", dtd->number_of_tags); + NOTE(""); + NOTE("/* # of elements visible to SGML parser */"); + fprintf(output, "#define HTML_ALL_ELEMENTS %d\n", dtd->number_of_tags + 1); + NOTE(""); + NOTE("/*"); + NOTE(""); + NOTE(" Attribute numbers"); + NOTE(""); + NOTE(" Identifier is HTML_<element>_<attribute>."); + NOTE(" These must match the tables in HTML.c!"); + NOTE(""); + NOTE(" */"); + + /* output the sorted list */ + for (j = 0; j < (int) count; ++j) { + dump_hdr_attr(output, data + j); + } + free(data); + + NOTE("#ifdef __cplusplus"); + NOTE("}"); + NOTE("#endif"); + + fprintf(output, "#endif\t\t\t\t/* %s */\n", marker); +} + +#define FMT_NUM_ATTRS "%d attributes:\n" +#define FMT_ONE_ATTR "%d:%d:%s\n" +#define NUM_ONE_ATTR 3 + +static void dump_flat_attrs(FILE *output, + const attr * attributes, + int number_of_attributes) +{ + int n; + + fprintf(output, "\t\t" FMT_NUM_ATTRS, number_of_attributes); + for (n = 0; n < number_of_attributes; ++n) { + fprintf(output, "\t\t\t" FMT_ONE_ATTR, n, +#ifdef USE_PRETTYSRC + attributes[n].type, +#else + 0, /* need placeholder for source-compat */ +#endif + attributes[n].name + ); + } +} + +static void dump_flat_attr_types(FILE *output, const AttrType * attr_types) +{ + const AttrType *p = sorted_AttrTypes(attr_types); + int number = len_AttrTypes(attr_types); + + fprintf(output, "\t\t%d attr_types\n", number); + + if (p != 0) { + while (p->name != 0) { + fprintf(output, "\t\t\t%s\n", p->name); + ++p; + } + } +} + +static void dump_flat_SGMLContent(FILE *output, const char *name, SGMLContent contents) +{ + fprintf(output, "\t\t%s: %s\n", name, SGMLContent2s(contents)); +} + +#define DUMP(name) \ + if (theClass & Tgc_##name) {\ + fprintf(output, " " #name); \ + theClass &= ~(Tgc_##name); \ + } + +static void dump_flat_TagClass(FILE *output, const char *name, TagClass theClass) +{ + fprintf(output, "\t\t%s:", name); + DUMP(FONTlike); + DUMP(EMlike); + DUMP(MATHlike); + DUMP(Alike); + DUMP(formula); + DUMP(TRlike); + DUMP(SELECTlike); + DUMP(FORMlike); + DUMP(Plike); + DUMP(DIVlike); + DUMP(LIlike); + DUMP(ULlike); + DUMP(BRlike); + DUMP(APPLETlike); + DUMP(HRlike); + DUMP(MAPlike); + DUMP(outer); + DUMP(BODYlike); + DUMP(HEADstuff); + DUMP(same); + if (theClass) + fprintf(output, " OOPS:%#x", theClass); + fprintf(output, "\n"); +} + +#undef DUMP + +#define DUMP(name) \ + if (theFlags & Tgf_##name) {\ + fprintf(output, " " #name); \ + theFlags &= ~(Tgf_##name); \ + } + +static void dump_flat_TagFlags(FILE *output, const char *name, TagFlags theFlags) +{ + fprintf(output, "\t\t%s:", name); + DUMP(endO); + DUMP(startO); + DUMP(mafse); + DUMP(strict); + DUMP(nreie); + DUMP(frecyc); + DUMP(nolyspcl); + if (theFlags) + fprintf(output, " OOPS:%#x", theFlags); + fprintf(output, "\n"); +} + +#undef DUMP + +static void dump_flat_HTTag(FILE *output, unsigned n, HTTag * tag) +{ + fprintf(output, "\t%u:%s\n", n, tag->name); +#ifdef USE_JUSTIFY_ELTS + fprintf(output, "\t\t%s\n", tag->can_justify ? "justify" : "nojustify"); +#endif + dump_flat_attrs(output, tag->attributes, AttrCount(tag)); + dump_flat_attr_types(output, tag->attr_types); + dump_flat_SGMLContent(output, "contents", tag->contents); + dump_flat_TagClass(output, "tagclass", tag->tagclass); + dump_flat_TagClass(output, "contains", tag->contains); + dump_flat_TagClass(output, "icontains", tag->icontains); + dump_flat_TagClass(output, "contained", tag->contained); + dump_flat_TagClass(output, "icontained", tag->icontained); + dump_flat_TagClass(output, "canclose", tag->canclose); + dump_flat_TagFlags(output, "flags", tag->flags); +} + +static int count_attr_types(AttrType * attr_types, HTTag * tag) +{ + int count = 0; + const AttrType *p; + AttrType *q; + + if ((p = tag->attr_types) != 0) { + while (p->name != 0) { + if ((q = attr_types) != 0) { + while (q->name != 0) { + if (!strcmp(q->name, p->name)) { + --count; + break; + } + ++q; + } + *q = *p; + } + ++count; + ++p; + } + } + return count; +} + +static void dump_flatfile(FILE *output, const SGML_dtd * dtd) +{ + AttrType *attr_types = 0; + int pass; + unsigned count = 0; + unsigned n; + + /* merge all of the attr_types data */ + for (pass = 0; pass < 2; ++pass) { + for (n = 0; (int) n < dtd->number_of_tags; ++n) { + count += count_attr_types(attr_types, &(dtd->tags[n])); + } + if (pass == 0) { + attr_types = typecallocn(AttrType, count + 1); + count = 0; + } else { + count = len_AttrTypes(attr_types); + qsort(attr_types, count, sizeof(*attr_types), compare_attr_types); + fprintf(output, "%d attr_types\n", count); + for (n = 0; n < count; ++n) { + fprintf(output, "\t%d:%s\n", n, attr_types[n].name); + dump_flat_attrs(output, attr_types[n].list, + len_AttrList(attr_types[n].list)); + } + } + } + + fprintf(output, "%d tags\n", dtd->number_of_tags); + for (n = 0; (int) n < dtd->number_of_tags; ++n) { + dump_flat_HTTag(output, n, &(dtd->tags[n])); + } +#if 0 + fprintf(output, "%d entities\n", dtd->number_of_entities); + for (n = 0; n < dtd->number_of_entities; ++n) { + } +#endif +} + +static char *get_line(FILE *input) +{ + char temp[1024]; + char *result = 0; + + if (fgets(temp, (int) sizeof(temp), input) != 0) { + result = strdup(temp); + } + return result; +} + +#define LOAD(name) \ + if (!strcmp(data, #name)) {\ + *theClass |= Tgc_##name; \ + continue; \ + } + +static int load_flat_TagClass(FILE *input, const char *name, TagClass * theClass) +{ + char prefix[80]; + char *next = get_line(input); + char *data; + int result = 0; + + *theClass = 0; + if (next != 0) { + sprintf(prefix, "\t\t%s:", name); + data = strtok(next, "\n "); + + if (data != 0 && !strcmp(data, prefix)) { + result = 1; + + while ((data = strtok(NULL, "\n ")) != 0) { + + LOAD(FONTlike); + LOAD(EMlike); + LOAD(MATHlike); + LOAD(Alike); + LOAD(formula); + LOAD(TRlike); + LOAD(SELECTlike); + LOAD(FORMlike); + LOAD(Plike); + LOAD(DIVlike); + LOAD(LIlike); + LOAD(ULlike); + LOAD(BRlike); + LOAD(APPLETlike); + LOAD(HRlike); + LOAD(MAPlike); + LOAD(outer); + LOAD(BODYlike); + LOAD(HEADstuff); + LOAD(same); + + fprintf(stderr, "Unexpected TagClass '%s'\n", data); + result = 0; + break; + } + } else if (data) { + fprintf(stderr, "load_flat_TagClass: '%s' vs '%s'\n", data, prefix); + } + free(next); + } else { + fprintf(stderr, "Did not find contents\n"); + } + return result; +} + +#undef LOAD + +#define LOAD(name) \ + if (!strcmp(data, #name)) {\ + *flags |= Tgf_##name; \ + continue; \ + } + +static int load_flat_TagFlags(FILE *input, const char *name, TagFlags * flags) +{ + char prefix[80]; + char *next = get_line(input); + char *data; + int result = 0; + + *flags = 0; + if (next != 0) { + sprintf(prefix, "\t\t%s:", name); + data = strtok(next, "\n "); + + if (data != 0 && !strcmp(data, prefix)) { + result = 1; + + while ((data = strtok(NULL, "\n ")) != 0) { + + LOAD(endO); + LOAD(startO); + LOAD(mafse); + LOAD(strict); + LOAD(nreie); + LOAD(frecyc); + LOAD(nolyspcl); + + fprintf(stderr, "Unexpected TagFlag '%s'\n", data); + result = 0; + break; + } + } else if (data) { + fprintf(stderr, "load_flat_TagFlags: '%s' vs '%s'\n", data, prefix); + } + free(next); + } + return result; +} + +#undef LOAD + +static int load_flat_AttrList(FILE *input, AttrList * attrs, int *length) +{ + attr *attributes; + int j, jcmp, code; + int result = 1; + char name[1024]; + +#ifdef USE_PRETTYSRC + int atype; +#endif + + if (fscanf(input, FMT_NUM_ATTRS, length) == 1 + && *length > 0 + && (attributes = typecallocn(attr, (size_t) (*length + 1))) != 0) { + *attrs = attributes; + for (j = 0; j < *length; ++j) { + code = fscanf(input, FMT_ONE_ATTR, + &jcmp, + &atype, + name + ); + if (code == NUM_ONE_ATTR && (j == jcmp)) { + attributes[j].name = strdup(name); +#ifdef USE_PRETTYSRC + attributes[j].type = atype; +#endif + } else { + fprintf(stderr, "Did not find attributes\n"); + result = 0; + break; + } + } + if (*length > 1) + qsort(attributes, *length, sizeof(attributes[0]), compare_attr); + } + return result; +} + +static int load_flat_HTTag(FILE *input, unsigned nref, HTTag * tag, AttrType * allTypes) +{ + int result = 0; + unsigned ncmp = 0; + char name[1024]; + int code; + int j; + + code = fscanf(input, "%d:%s\n", &ncmp, name); + if (code == 2 && (nref == ncmp)) { + result = 1; + tag->name = strdup(name); +#ifdef USE_COLOR_STYLE + tag->name_len = strlen(tag->name); +#endif +#ifdef USE_JUSTIFY_ELTS + if (fscanf(input, "%s\n", name) == 1) { + tag->can_justify = !strcmp(name, "justify"); + } else { + fprintf(stderr, "Did not find can_justify\n"); + result = 0; + } +#endif + if (result) { + result = load_flat_AttrList(input, &(tag->attributes), &(tag->number_of_attributes)); + } + if (result) { + AttrType *myTypes; + int k, count; + char *next = get_line(input); + + if (next != 0 + && sscanf(next, "%d attr_types\n", &count) + && (myTypes = typecallocn(AttrType, (size_t) (count + 1))) + != 0) { + tag->attr_types = myTypes; + for (k = 0; k < count; ++k) { + next = get_line(input); + if (next != 0 + && sscanf(next, "%s\n", name)) { + for (j = 0; allTypes[j].name != 0; ++j) { + if (!strcmp(allTypes[j].name, name)) { + myTypes[k].name = strdup(name); + myTypes[k].list = allTypes[j].list; + break; + } + } + } else { + result = 0; + break; + } + } + if (result && count > 1) + qsort(myTypes, count, sizeof(myTypes[0]), compare_attr_types); + } + } + if (result) { + char *next = get_line(input); + + if (next != 0 + && sscanf(next, "\t\tcontents: %s\n", name)) { + tag->contents = s2SGMLContent(name); + free(next); + } else { + fprintf(stderr, "Did not find contents\n"); + result = 0; + } + } + if (result) { + result = load_flat_TagClass(input, "tagclass", &(tag->tagclass)); + } + if (result) { + result = load_flat_TagClass(input, "contains", &(tag->contains)); + } + if (result) { + result = load_flat_TagClass(input, "icontains", &(tag->icontains)); + } + if (result) { + result = load_flat_TagClass(input, "contained", &(tag->contained)); + } + if (result) { + result = load_flat_TagClass(input, "icontained", &(tag->icontained)); + } + if (result) { + result = load_flat_TagClass(input, "canclose", &(tag->canclose)); + } + if (result) { + result = load_flat_TagFlags(input, "flags", &(tag->flags)); + } + } else { + fprintf(stderr, "load_flat_HTTag error\n"); + } + return result; +} + +static int load_flat_AttrType(FILE *input, AttrType * types, size_t ncmp) +{ + int result = 0; + int ntst; + char name[1024]; + + if (fscanf(input, "%d:%s\n", &ntst, name) == 2 + && (ntst == (int) ncmp)) { + result = 1; + types->name = strdup(name); + if (!load_flat_AttrList(input, &(types->list), &ntst)) + result = 0; + } + return result; +} + +static SGML_dtd *load_flatfile(FILE *input) +{ + AttrType *attr_types = 0; + SGML_dtd *result = 0; + size_t n; + size_t number_of_attrs = 0; + size_t number_of_tags = 0; + HTTag *tag; + int code; + + code = fscanf(input, "%d attr_types\n", &number_of_attrs); + if (code + && number_of_attrs + && (attr_types = typecallocn(AttrType, number_of_attrs + 1)) != 0) { + for (n = 0; n < number_of_attrs; ++n) { + if (!load_flat_AttrType(input, attr_types + n, n)) { + break; + } + } + } + + code = fscanf(input, "%d tags\n", &number_of_tags); + if (code == 1) { + if ((result = typecalloc(SGML_dtd)) != 0 + && (result->tags = typecallocn(HTTag, (number_of_tags + 2))) != 0) { + for (n = 0; n < number_of_tags; ++n) { + if (load_flat_HTTag(input, n, &(result->tags[n]), attr_types)) { + result->number_of_tags = (n + 1); + } else { + break; + } + } + tag = 0; + for (n = 0; n < number_of_tags; ++n) { + if (result->tags[n].name != 0 + && !strcmp(result->tags[n].name, "OBJECT")) { + tag = result->tags + number_of_tags; + *tag = result->tags[n]; + tag->contents = SGML_MIXED; + tag->flags = Tgf_strict; + break; + } + } + if (tag == 0) { + fprintf(stderr, "Did not find OBJECT tag\n"); + result = 0; + } + } + } + return result; +} + +int main(int argc, char *argv[]) +{ + const SGML_dtd *the_dtd = &HTML_dtd; + int ch; + int dtd_version = 0; + int c_option = FALSE; + int h_option = FALSE; + int l_option = FALSE; + FILE *input = stdin; + FILE *output = stdout; + + while ((ch = getopt(argc, argv, GETOPT)) != -1) { + switch (ch) { + case 'c': + c_option = TRUE; + break; + case 'h': + h_option = TRUE; + break; + case 'l': + l_option = TRUE; + input = fopen(optarg, "r"); + if (input == 0) + failed(optarg); + break; + case 'o': + output = fopen(optarg, "w"); + if (output == 0) + failed(optarg); + break; + case 't': + dtd_version = 1; + break; + case 's': + dtd_version = 0; + break; + default: + usage(); + } + } + + HTSwitchDTD(dtd_version); + if (l_option) + the_dtd = load_flatfile(input); + + if (the_dtd != 0) { + if (c_option) + dump_source(output, the_dtd, dtd_version); + if (h_option) + dump_header(output, the_dtd); + if (!c_option && !h_option) + dump_flatfile(output, the_dtd); + } + + return EXIT_SUCCESS; +} diff --git a/WWW/Library/Implementation/hdr_HTMLDTD.h b/WWW/Library/Implementation/hdr_HTMLDTD.h new file mode 100644 index 00000000..9e4d47b0 --- /dev/null +++ b/WWW/Library/Implementation/hdr_HTMLDTD.h @@ -0,0 +1,980 @@ +/* $LynxId: hdr_HTMLDTD.h,v 1.22 2011/10/07 00:54:36 Kihara.Hideto Exp $ */ +#ifndef hdr_HTMLDTD_H +#define hdr_HTMLDTD_H 1 + +#ifdef __cplusplus +extern "C" { +#endif +/* + + Element Numbers + + Must Match all tables by element! + These include tables in HTMLDTD.c + and code in HTML.c. + + */ + typedef enum { + HTML_A, + HTML_ABBR, + HTML_ACRONYM, + HTML_ADDRESS, + HTML_APPLET, + HTML_AREA, + HTML_AU, + HTML_AUTHOR, + HTML_B, + HTML_BANNER, + HTML_BASE, + HTML_BASEFONT, + HTML_BDO, + HTML_BGSOUND, + HTML_BIG, + HTML_BLINK, + HTML_BLOCKQUOTE, + HTML_BODY, + HTML_BODYTEXT, + HTML_BQ, + HTML_BR, + HTML_BUTTON, + HTML_CAPTION, + HTML_CENTER, + HTML_CITE, + HTML_CODE, + HTML_COL, + HTML_COLGROUP, + HTML_COMMENT, + HTML_CREDIT, + HTML_DD, + HTML_DEL, + HTML_DFN, + HTML_DIR, + HTML_DIV, + HTML_DL, + HTML_DLC, + HTML_DT, + HTML_EM, + HTML_EMBED, + HTML_FIELDSET, + HTML_FIG, + HTML_FN, + HTML_FONT, + HTML_FORM, + HTML_FRAME, + HTML_FRAMESET, + HTML_H1, + HTML_H2, + HTML_H3, + HTML_H4, + HTML_H5, + HTML_H6, + HTML_HEAD, + HTML_HR, + HTML_HTML, + HTML_HY, + HTML_I, + HTML_IFRAME, + HTML_IMG, + HTML_INPUT, + HTML_INS, + HTML_ISINDEX, + HTML_KBD, + HTML_KEYGEN, + HTML_LABEL, + HTML_LEGEND, + HTML_LH, + HTML_LI, + HTML_LINK, + HTML_LISTING, + HTML_MAP, + HTML_MARQUEE, + HTML_MATH, + HTML_MENU, + HTML_META, + HTML_NEXTID, + HTML_NOFRAMES, + HTML_NOTE, + HTML_OBJECT, + HTML_OL, + HTML_OPTION, + HTML_OVERLAY, + HTML_P, + HTML_PARAM, + HTML_PLAINTEXT, + HTML_PRE, + HTML_Q, + HTML_S, + HTML_SAMP, + HTML_SCRIPT, + HTML_SELECT, + HTML_SHY, + HTML_SMALL, + HTML_SPAN, + HTML_SPOT, + HTML_STRIKE, + HTML_STRONG, + HTML_STYLE, + HTML_SUB, + HTML_SUP, + HTML_TAB, + HTML_TABLE, + HTML_TBODY, + HTML_TD, + HTML_TEXTAREA, + HTML_TEXTFLOW, + HTML_TFOOT, + HTML_TH, + HTML_THEAD, + HTML_TITLE, + HTML_TR, + HTML_TT, + HTML_U, + HTML_UL, + HTML_VAR, + HTML_WBR, + HTML_XMP, + HTML_ALT_OBJECT + } HTMLElement; + +/* Notes: HTML.c uses a different extension of the + HTML_ELEMENTS space privately, see + HTNestedList.h. + + Do NOT replace HTML_ELEMENTS with + TABLESIZE(mumble_dtd.tags). + + Keep the following defines in synch with + the above enum! + */ + +/* # of elements generally visible to Lynx code */ +#define HTML_ELEMENTS 118 + +/* # of elements visible to SGML parser */ +#define HTML_ALL_ELEMENTS 119 + +/* + + Attribute numbers + + Identifier is HTML_<element>_<attribute>. + These must match the tables in HTML.c! + + */ +#define HTML_A_ACCESSKEY 0 +#define HTML_A_CHARSET 1 +#define HTML_A_CLASS 2 +#define HTML_A_CLEAR 3 +#define HTML_A_COORDS 4 +#define HTML_A_DIR 5 +#define HTML_A_HREF 6 +#define HTML_A_HREFLANG 7 +#define HTML_A_ID 8 +#define HTML_A_ISMAP 9 +#define HTML_A_LANG 10 +#define HTML_A_MD 11 +#define HTML_A_NAME 12 +#define HTML_A_NOTAB 13 +#define HTML_A_ONBLUR 14 +#define HTML_A_ONFOCUS 15 +#define HTML_A_REL 16 +#define HTML_A_REV 17 +#define HTML_A_SHAPE 18 +#define HTML_A_STYLE 19 +#define HTML_A_TABINDEX 20 +#define HTML_A_TARGET 21 +#define HTML_A_TITLE 22 +#define HTML_A_TYPE 23 +#define HTML_A_URN 24 +#define HTML_A_ATTRIBUTES 25 + +#define HTML_ADDRESS_CLASS 0 +#define HTML_ADDRESS_CLEAR 1 +#define HTML_ADDRESS_DIR 2 +#define HTML_ADDRESS_ID 3 +#define HTML_ADDRESS_LANG 4 +#define HTML_ADDRESS_NOWRAP 5 +#define HTML_ADDRESS_STYLE 6 +#define HTML_ADDRESS_TITLE 7 +#define HTML_ADDRESS_ATTRIBUTES 8 + +#define HTML_APPLET_ALIGN 0 +#define HTML_APPLET_ALT 1 +#define HTML_APPLET_CLASS 2 +#define HTML_APPLET_CLEAR 3 +#define HTML_APPLET_CODE 4 +#define HTML_APPLET_CODEBASE 5 +#define HTML_APPLET_DIR 6 +#define HTML_APPLET_DOWNLOAD 7 +#define HTML_APPLET_HEIGHT 8 +#define HTML_APPLET_HSPACE 9 +#define HTML_APPLET_ID 10 +#define HTML_APPLET_LANG 11 +#define HTML_APPLET_NAME 12 +#define HTML_APPLET_STYLE 13 +#define HTML_APPLET_TITLE 14 +#define HTML_APPLET_VSPACE 15 +#define HTML_APPLET_WIDTH 16 +#define HTML_APPLET_ATTRIBUTES 17 + +#define HTML_AREA_ACCESSKEY 0 +#define HTML_AREA_ALT 1 +#define HTML_AREA_CLASS 2 +#define HTML_AREA_CLEAR 3 +#define HTML_AREA_COORDS 4 +#define HTML_AREA_DIR 5 +#define HTML_AREA_HREF 6 +#define HTML_AREA_ID 7 +#define HTML_AREA_LANG 8 +#define HTML_AREA_NOHREF 9 +#define HTML_AREA_NOTAB 10 +#define HTML_AREA_ONBLUR 11 +#define HTML_AREA_ONFOCUS 12 +#define HTML_AREA_SHAPE 13 +#define HTML_AREA_STYLE 14 +#define HTML_AREA_TABINDEX 15 +#define HTML_AREA_TARGET 16 +#define HTML_AREA_TITLE 17 +#define HTML_AREA_ATTRIBUTES 18 + +#define HTML_BASE_CLASS 0 +#define HTML_BASE_HREF 1 +#define HTML_BASE_ID 2 +#define HTML_BASE_STYLE 3 +#define HTML_BASE_TARGET 4 +#define HTML_BASE_TITLE 5 +#define HTML_BASE_ATTRIBUTES 6 + +#define HTML_BGSOUND_CLASS 0 +#define HTML_BGSOUND_CLEAR 1 +#define HTML_BGSOUND_DIR 2 +#define HTML_BGSOUND_ID 3 +#define HTML_BGSOUND_LANG 4 +#define HTML_BGSOUND_LOOP 5 +#define HTML_BGSOUND_SRC 6 +#define HTML_BGSOUND_STYLE 7 +#define HTML_BGSOUND_TITLE 8 +#define HTML_BGSOUND_ATTRIBUTES 9 + +#define HTML_BODY_ALINK 0 +#define HTML_BODY_BACKGROUND 1 +#define HTML_BODY_BGCOLOR 2 +#define HTML_BODY_CLASS 3 +#define HTML_BODY_CLEAR 4 +#define HTML_BODY_DIR 5 +#define HTML_BODY_ID 6 +#define HTML_BODY_LANG 7 +#define HTML_BODY_LINK 8 +#define HTML_BODY_ONLOAD 9 +#define HTML_BODY_ONUNLOAD 10 +#define HTML_BODY_STYLE 11 +#define HTML_BODY_TEXT 12 +#define HTML_BODY_TITLE 13 +#define HTML_BODY_VLINK 14 +#define HTML_BODY_ATTRIBUTES 15 + +#define HTML_BODYTEXT_CLASS 0 +#define HTML_BODYTEXT_CLEAR 1 +#define HTML_BODYTEXT_DATA 2 +#define HTML_BODYTEXT_DIR 3 +#define HTML_BODYTEXT_ID 4 +#define HTML_BODYTEXT_LANG 5 +#define HTML_BODYTEXT_NAME 6 +#define HTML_BODYTEXT_OBJECT 7 +#define HTML_BODYTEXT_REF 8 +#define HTML_BODYTEXT_STYLE 9 +#define HTML_BODYTEXT_TITLE 10 +#define HTML_BODYTEXT_TYPE 11 +#define HTML_BODYTEXT_VALUE 12 +#define HTML_BODYTEXT_VALUETYPE 13 +#define HTML_BODYTEXT_ATTRIBUTES 14 + +#define HTML_BQ_CITE 0 +#define HTML_BQ_CLASS 1 +#define HTML_BQ_CLEAR 2 +#define HTML_BQ_DIR 3 +#define HTML_BQ_ID 4 +#define HTML_BQ_LANG 5 +#define HTML_BQ_NOWRAP 6 +#define HTML_BQ_STYLE 7 +#define HTML_BQ_TITLE 8 +#define HTML_BQ_ATTRIBUTES 9 + +#define HTML_BUTTON_ACCESSKEY 0 +#define HTML_BUTTON_CLASS 1 +#define HTML_BUTTON_CLEAR 2 +#define HTML_BUTTON_DIR 3 +#define HTML_BUTTON_DISABLED 4 +#define HTML_BUTTON_ID 5 +#define HTML_BUTTON_LANG 6 +#define HTML_BUTTON_NAME 7 +#define HTML_BUTTON_ONBLUR 8 +#define HTML_BUTTON_ONFOCUS 9 +#define HTML_BUTTON_READONLY 10 +#define HTML_BUTTON_STYLE 11 +#define HTML_BUTTON_TABINDEX 12 +#define HTML_BUTTON_TITLE 13 +#define HTML_BUTTON_TYPE 14 +#define HTML_BUTTON_VALUE 15 +#define HTML_BUTTON_ATTRIBUTES 16 + +#define HTML_CAPTION_ACCESSKEY 0 +#define HTML_CAPTION_ALIGN 1 +#define HTML_CAPTION_CLASS 2 +#define HTML_CAPTION_CLEAR 3 +#define HTML_CAPTION_DIR 4 +#define HTML_CAPTION_ID 5 +#define HTML_CAPTION_LANG 6 +#define HTML_CAPTION_STYLE 7 +#define HTML_CAPTION_TITLE 8 +#define HTML_CAPTION_ATTRIBUTES 9 + +#define HTML_COL_ALIGN 0 +#define HTML_COL_CHAR 1 +#define HTML_COL_CHAROFF 2 +#define HTML_COL_CLASS 3 +#define HTML_COL_CLEAR 4 +#define HTML_COL_DIR 5 +#define HTML_COL_ID 6 +#define HTML_COL_LANG 7 +#define HTML_COL_SPAN 8 +#define HTML_COL_STYLE 9 +#define HTML_COL_TITLE 10 +#define HTML_COL_VALIGN 11 +#define HTML_COL_WIDTH 12 +#define HTML_COL_ATTRIBUTES 13 + +#define HTML_DEL_CITE 0 +#define HTML_DEL_CLASS 1 +#define HTML_DEL_DATETIME 2 +#define HTML_DEL_DIR 3 +#define HTML_DEL_ID 4 +#define HTML_DEL_LANG 5 +#define HTML_DEL_STYLE 6 +#define HTML_DEL_TITLE 7 +#define HTML_DEL_ATTRIBUTES 8 + +#define HTML_DIV_ALIGN 0 +#define HTML_DIV_CLASS 1 +#define HTML_DIV_CLEAR 2 +#define HTML_DIV_DIR 3 +#define HTML_DIV_ID 4 +#define HTML_DIV_LANG 5 +#define HTML_DIV_STYLE 6 +#define HTML_DIV_TITLE 7 +#define HTML_DIV_ATTRIBUTES 8 + +#define HTML_DL_CLASS 0 +#define HTML_DL_CLEAR 1 +#define HTML_DL_COMPACT 2 +#define HTML_DL_DIR 3 +#define HTML_DL_ID 4 +#define HTML_DL_LANG 5 +#define HTML_DL_STYLE 6 +#define HTML_DL_TITLE 7 +#define HTML_DL_ATTRIBUTES 8 + +#define HTML_EMBED_ALIGN 0 +#define HTML_EMBED_ALT 1 +#define HTML_EMBED_BORDER 2 +#define HTML_EMBED_CLASS 3 +#define HTML_EMBED_CLEAR 4 +#define HTML_EMBED_DIR 5 +#define HTML_EMBED_HEIGHT 6 +#define HTML_EMBED_ID 7 +#define HTML_EMBED_IMAGEMAP 8 +#define HTML_EMBED_ISMAP 9 +#define HTML_EMBED_LANG 10 +#define HTML_EMBED_MD 11 +#define HTML_EMBED_NAME 12 +#define HTML_EMBED_NOFLOW 13 +#define HTML_EMBED_PARAMS 14 +#define HTML_EMBED_SRC 15 +#define HTML_EMBED_STYLE 16 +#define HTML_EMBED_TITLE 17 +#define HTML_EMBED_UNITS 18 +#define HTML_EMBED_USEMAP 19 +#define HTML_EMBED_WIDTH 20 +#define HTML_EMBED_ATTRIBUTES 21 + +#define HTML_FIG_ALIGN 0 +#define HTML_FIG_BORDER 1 +#define HTML_FIG_CLASS 2 +#define HTML_FIG_CLEAR 3 +#define HTML_FIG_DIR 4 +#define HTML_FIG_HEIGHT 5 +#define HTML_FIG_ID 6 +#define HTML_FIG_IMAGEMAP 7 +#define HTML_FIG_ISOBJECT 8 +#define HTML_FIG_LANG 9 +#define HTML_FIG_MD 10 +#define HTML_FIG_NOFLOW 11 +#define HTML_FIG_SRC 12 +#define HTML_FIG_STYLE 13 +#define HTML_FIG_TITLE 14 +#define HTML_FIG_UNITS 15 +#define HTML_FIG_WIDTH 16 +#define HTML_FIG_ATTRIBUTES 17 + +#define HTML_FONT_CLASS 0 +#define HTML_FONT_CLEAR 1 +#define HTML_FONT_COLOR 2 +#define HTML_FONT_DIR 3 +#define HTML_FONT_END 4 +#define HTML_FONT_FACE 5 +#define HTML_FONT_ID 6 +#define HTML_FONT_LANG 7 +#define HTML_FONT_SIZE 8 +#define HTML_FONT_STYLE 9 +#define HTML_FONT_TITLE 10 +#define HTML_FONT_ATTRIBUTES 11 + +#define HTML_FORM_ACCEPT 0 +#define HTML_FORM_ACCEPT_CHARSET 1 +#define HTML_FORM_ACTION 2 +#define HTML_FORM_CLASS 3 +#define HTML_FORM_CLEAR 4 +#define HTML_FORM_DIR 5 +#define HTML_FORM_ENCTYPE 6 +#define HTML_FORM_ID 7 +#define HTML_FORM_LANG 8 +#define HTML_FORM_METHOD 9 +#define HTML_FORM_ONRESET 10 +#define HTML_FORM_ONSUBMIT 11 +#define HTML_FORM_SCRIPT 12 +#define HTML_FORM_STYLE 13 +#define HTML_FORM_SUBJECT 14 +#define HTML_FORM_TARGET 15 +#define HTML_FORM_TITLE 16 +#define HTML_FORM_ATTRIBUTES 17 + +#define HTML_FRAME_CLASS 0 +#define HTML_FRAME_FRAMEBORDER 1 +#define HTML_FRAME_ID 2 +#define HTML_FRAME_LONGDESC 3 +#define HTML_FRAME_MARGINHEIGHT 4 +#define HTML_FRAME_MARGINWIDTH 5 +#define HTML_FRAME_NAME 6 +#define HTML_FRAME_NORESIZE 7 +#define HTML_FRAME_SCROLLING 8 +#define HTML_FRAME_SRC 9 +#define HTML_FRAME_STYLE 10 +#define HTML_FRAME_TITLE 11 +#define HTML_FRAME_ATTRIBUTES 12 + +#define HTML_FRAMESET_COLS 0 +#define HTML_FRAMESET_ONLOAD 1 +#define HTML_FRAMESET_ONUNLOAD 2 +#define HTML_FRAMESET_ROWS 3 +#define HTML_FRAMESET_ATTRIBUTES 4 + +#define HTML_GEN_CLASS 0 +#define HTML_GEN_CLEAR 1 +#define HTML_GEN_DIR 2 +#define HTML_GEN_ID 3 +#define HTML_GEN_LANG 4 +#define HTML_GEN_STYLE 5 +#define HTML_GEN_TITLE 6 +#define HTML_GEN_ATTRIBUTES 7 + +#define HTML_H_ALIGN 0 +#define HTML_H_CLASS 1 +#define HTML_H_CLEAR 2 +#define HTML_H_DINGBAT 3 +#define HTML_H_DIR 4 +#define HTML_H_ID 5 +#define HTML_H_LANG 6 +#define HTML_H_MD 7 +#define HTML_H_NOWRAP 8 +#define HTML_H_SEQNUM 9 +#define HTML_H_SKIP 10 +#define HTML_H_SRC 11 +#define HTML_H_STYLE 12 +#define HTML_H_TITLE 13 +#define HTML_H_ATTRIBUTES 14 + +#define HTML_HR_ALIGN 0 +#define HTML_HR_CLASS 1 +#define HTML_HR_CLEAR 2 +#define HTML_HR_DIR 3 +#define HTML_HR_ID 4 +#define HTML_HR_LANG 5 +#define HTML_HR_MD 6 +#define HTML_HR_NOSHADE 7 +#define HTML_HR_SIZE 8 +#define HTML_HR_SRC 9 +#define HTML_HR_STYLE 10 +#define HTML_HR_TITLE 11 +#define HTML_HR_WIDTH 12 +#define HTML_HR_ATTRIBUTES 13 + +#define HTML_IFRAME_ALIGN 0 +#define HTML_IFRAME_CLASS 1 +#define HTML_IFRAME_FRAMEBORDER 2 +#define HTML_IFRAME_HEIGHT 3 +#define HTML_IFRAME_ID 4 +#define HTML_IFRAME_LONGDESC 5 +#define HTML_IFRAME_MARGINHEIGHT 6 +#define HTML_IFRAME_MARGINWIDTH 7 +#define HTML_IFRAME_NAME 8 +#define HTML_IFRAME_SCROLLING 9 +#define HTML_IFRAME_SRC 10 +#define HTML_IFRAME_STYLE 11 +#define HTML_IFRAME_TITLE 12 +#define HTML_IFRAME_WIDTH 13 +#define HTML_IFRAME_ATTRIBUTES 14 + +#define HTML_IMG_ALIGN 0 +#define HTML_IMG_ALT 1 +#define HTML_IMG_BORDER 2 +#define HTML_IMG_CLASS 3 +#define HTML_IMG_CLEAR 4 +#define HTML_IMG_DIR 5 +#define HTML_IMG_HEIGHT 6 +#define HTML_IMG_HSPACE 7 +#define HTML_IMG_ID 8 +#define HTML_IMG_ISMAP 9 +#define HTML_IMG_ISOBJECT 10 +#define HTML_IMG_LANG 11 +#define HTML_IMG_LONGDESC 12 +#define HTML_IMG_MD 13 +#define HTML_IMG_NAME 14 +#define HTML_IMG_SRC 15 +#define HTML_IMG_STYLE 16 +#define HTML_IMG_TITLE 17 +#define HTML_IMG_UNITS 18 +#define HTML_IMG_USEMAP 19 +#define HTML_IMG_VSPACE 20 +#define HTML_IMG_WIDTH 21 +#define HTML_IMG_ATTRIBUTES 22 + +#define HTML_INPUT_ACCEPT 0 +#define HTML_INPUT_ACCEPT_CHARSET 1 +#define HTML_INPUT_ACCESSKEY 2 +#define HTML_INPUT_ALIGN 3 +#define HTML_INPUT_ALT 4 +#define HTML_INPUT_CHECKED 5 +#define HTML_INPUT_CLASS 6 +#define HTML_INPUT_CLEAR 7 +#define HTML_INPUT_DIR 8 +#define HTML_INPUT_DISABLED 9 +#define HTML_INPUT_ERROR 10 +#define HTML_INPUT_HEIGHT 11 +#define HTML_INPUT_ID 12 +#define HTML_INPUT_ISMAP 13 +#define HTML_INPUT_LANG 14 +#define HTML_INPUT_MAX 15 +#define HTML_INPUT_MAXLENGTH 16 +#define HTML_INPUT_MD 17 +#define HTML_INPUT_MIN 18 +#define HTML_INPUT_NAME 19 +#define HTML_INPUT_NOTAB 20 +#define HTML_INPUT_ONBLUR 21 +#define HTML_INPUT_ONCHANGE 22 +#define HTML_INPUT_ONFOCUS 23 +#define HTML_INPUT_ONSELECT 24 +#define HTML_INPUT_READONLY 25 +#define HTML_INPUT_SIZE 26 +#define HTML_INPUT_SRC 27 +#define HTML_INPUT_STYLE 28 +#define HTML_INPUT_TABINDEX 29 +#define HTML_INPUT_TITLE 30 +#define HTML_INPUT_TYPE 31 +#define HTML_INPUT_USEMAP 32 +#define HTML_INPUT_VALUE 33 +#define HTML_INPUT_WIDTH 34 +#define HTML_INPUT_ATTRIBUTES 35 + +#define HTML_ISINDEX_ACTION 0 +#define HTML_ISINDEX_CLASS 1 +#define HTML_ISINDEX_DIR 2 +#define HTML_ISINDEX_HREF 3 +#define HTML_ISINDEX_ID 4 +#define HTML_ISINDEX_LANG 5 +#define HTML_ISINDEX_PROMPT 6 +#define HTML_ISINDEX_STYLE 7 +#define HTML_ISINDEX_TITLE 8 +#define HTML_ISINDEX_ATTRIBUTES 9 + +#define HTML_KEYGEN_CHALLENGE 0 +#define HTML_KEYGEN_CLASS 1 +#define HTML_KEYGEN_DIR 2 +#define HTML_KEYGEN_ID 3 +#define HTML_KEYGEN_LANG 4 +#define HTML_KEYGEN_NAME 5 +#define HTML_KEYGEN_STYLE 6 +#define HTML_KEYGEN_TITLE 7 +#define HTML_KEYGEN_ATTRIBUTES 8 + +#define HTML_LABEL_ACCESSKEY 0 +#define HTML_LABEL_CLASS 1 +#define HTML_LABEL_CLEAR 2 +#define HTML_LABEL_DIR 3 +#define HTML_LABEL_FOR 4 +#define HTML_LABEL_ID 5 +#define HTML_LABEL_LANG 6 +#define HTML_LABEL_ONBLUR 7 +#define HTML_LABEL_ONFOCUS 8 +#define HTML_LABEL_STYLE 9 +#define HTML_LABEL_TITLE 10 +#define HTML_LABEL_ATTRIBUTES 11 + +#define HTML_LI_CLASS 0 +#define HTML_LI_CLEAR 1 +#define HTML_LI_DINGBAT 2 +#define HTML_LI_DIR 3 +#define HTML_LI_ID 4 +#define HTML_LI_LANG 5 +#define HTML_LI_MD 6 +#define HTML_LI_SKIP 7 +#define HTML_LI_SRC 8 +#define HTML_LI_STYLE 9 +#define HTML_LI_TITLE 10 +#define HTML_LI_TYPE 11 +#define HTML_LI_VALUE 12 +#define HTML_LI_ATTRIBUTES 13 + +#define HTML_LINK_CHARSET 0 +#define HTML_LINK_CLASS 1 +#define HTML_LINK_DIR 2 +#define HTML_LINK_HREF 3 +#define HTML_LINK_HREFLANG 4 +#define HTML_LINK_ID 5 +#define HTML_LINK_LANG 6 +#define HTML_LINK_MEDIA 7 +#define HTML_LINK_REL 8 +#define HTML_LINK_REV 9 +#define HTML_LINK_STYLE 10 +#define HTML_LINK_TARGET 11 +#define HTML_LINK_TITLE 12 +#define HTML_LINK_TYPE 13 +#define HTML_LINK_ATTRIBUTES 14 + +#define HTML_MAP_CLASS 0 +#define HTML_MAP_CLEAR 1 +#define HTML_MAP_DIR 2 +#define HTML_MAP_ID 3 +#define HTML_MAP_LANG 4 +#define HTML_MAP_NAME 5 +#define HTML_MAP_STYLE 6 +#define HTML_MAP_TITLE 7 +#define HTML_MAP_ATTRIBUTES 8 + +#define HTML_MATH_BOX 0 +#define HTML_MATH_CLASS 1 +#define HTML_MATH_CLEAR 2 +#define HTML_MATH_DIR 3 +#define HTML_MATH_ID 4 +#define HTML_MATH_LANG 5 +#define HTML_MATH_STYLE 6 +#define HTML_MATH_TITLE 7 +#define HTML_MATH_ATTRIBUTES 8 + +#define HTML_META_CHARSET 0 +#define HTML_META_CONTENT 1 +#define HTML_META_HTTP_EQUIV 2 +#define HTML_META_NAME 3 +#define HTML_META_SCHEME 4 +#define HTML_META_ATTRIBUTES 5 + +#define HTML_NEXTID_N 0 +#define HTML_NEXTID_ATTRIBUTES 1 + +#define HTML_NOTE_CLASS 0 +#define HTML_NOTE_CLEAR 1 +#define HTML_NOTE_DIR 2 +#define HTML_NOTE_ID 3 +#define HTML_NOTE_LANG 4 +#define HTML_NOTE_MD 5 +#define HTML_NOTE_ROLE 6 +#define HTML_NOTE_SRC 7 +#define HTML_NOTE_STYLE 8 +#define HTML_NOTE_TITLE 9 +#define HTML_NOTE_ATTRIBUTES 10 + +#define HTML_OBJECT_ALIGN 0 +#define HTML_OBJECT_ARCHIVE 1 +#define HTML_OBJECT_BORDER 2 +#define HTML_OBJECT_CLASS 3 +#define HTML_OBJECT_CLASSID 4 +#define HTML_OBJECT_CODEBASE 5 +#define HTML_OBJECT_CODETYPE 6 +#define HTML_OBJECT_DATA 7 +#define HTML_OBJECT_DECLARE 8 +#define HTML_OBJECT_DIR 9 +#define HTML_OBJECT_HEIGHT 10 +#define HTML_OBJECT_HSPACE 11 +#define HTML_OBJECT_ID 12 +#define HTML_OBJECT_ISMAP 13 +#define HTML_OBJECT_LANG 14 +#define HTML_OBJECT_NAME 15 +#define HTML_OBJECT_NOTAB 16 +#define HTML_OBJECT_SHAPES 17 +#define HTML_OBJECT_STANDBY 18 +#define HTML_OBJECT_STYLE 19 +#define HTML_OBJECT_TABINDEX 20 +#define HTML_OBJECT_TITLE 21 +#define HTML_OBJECT_TYPE 22 +#define HTML_OBJECT_USEMAP 23 +#define HTML_OBJECT_VSPACE 24 +#define HTML_OBJECT_WIDTH 25 +#define HTML_OBJECT_ATTRIBUTES 26 + +#define HTML_OL_CLASS 0 +#define HTML_OL_CLEAR 1 +#define HTML_OL_COMPACT 2 +#define HTML_OL_CONTINUE 3 +#define HTML_OL_DIR 4 +#define HTML_OL_ID 5 +#define HTML_OL_LANG 6 +#define HTML_OL_SEQNUM 7 +#define HTML_OL_START 8 +#define HTML_OL_STYLE 9 +#define HTML_OL_TITLE 10 +#define HTML_OL_TYPE 11 +#define HTML_OL_ATTRIBUTES 12 + +#define HTML_OPTION_CLASS 0 +#define HTML_OPTION_CLEAR 1 +#define HTML_OPTION_DIR 2 +#define HTML_OPTION_DISABLED 3 +#define HTML_OPTION_ERROR 4 +#define HTML_OPTION_ID 5 +#define HTML_OPTION_LABEL 6 +#define HTML_OPTION_LANG 7 +#define HTML_OPTION_SELECTED 8 +#define HTML_OPTION_SHAPE 9 +#define HTML_OPTION_STYLE 10 +#define HTML_OPTION_TITLE 11 +#define HTML_OPTION_VALUE 12 +#define HTML_OPTION_ATTRIBUTES 13 + +#define HTML_OVERLAY_CLASS 0 +#define HTML_OVERLAY_HEIGHT 1 +#define HTML_OVERLAY_ID 2 +#define HTML_OVERLAY_IMAGEMAP 3 +#define HTML_OVERLAY_MD 4 +#define HTML_OVERLAY_SRC 5 +#define HTML_OVERLAY_STYLE 6 +#define HTML_OVERLAY_TITLE 7 +#define HTML_OVERLAY_UNITS 8 +#define HTML_OVERLAY_WIDTH 9 +#define HTML_OVERLAY_X 10 +#define HTML_OVERLAY_Y 11 +#define HTML_OVERLAY_ATTRIBUTES 12 + +#define HTML_P_ALIGN 0 +#define HTML_P_CLASS 1 +#define HTML_P_CLEAR 2 +#define HTML_P_DIR 3 +#define HTML_P_ID 4 +#define HTML_P_LANG 5 +#define HTML_P_NOWRAP 6 +#define HTML_P_STYLE 7 +#define HTML_P_TITLE 8 +#define HTML_P_ATTRIBUTES 9 + +#define HTML_PARAM_ACCEPT 0 +#define HTML_PARAM_ACCEPT_CHARSET 1 +#define HTML_PARAM_ACCEPT_ENCODING 2 +#define HTML_PARAM_CLASS 3 +#define HTML_PARAM_CLEAR 4 +#define HTML_PARAM_DATA 5 +#define HTML_PARAM_DIR 6 +#define HTML_PARAM_ID 7 +#define HTML_PARAM_LANG 8 +#define HTML_PARAM_NAME 9 +#define HTML_PARAM_OBJECT 10 +#define HTML_PARAM_REF 11 +#define HTML_PARAM_STYLE 12 +#define HTML_PARAM_TITLE 13 +#define HTML_PARAM_TYPE 14 +#define HTML_PARAM_VALUE 15 +#define HTML_PARAM_VALUEREF 16 +#define HTML_PARAM_VALUETYPE 17 +#define HTML_PARAM_ATTRIBUTES 18 + +#define HTML_Q_CITE 0 +#define HTML_Q_CLASS 1 +#define HTML_Q_CLEAR 2 +#define HTML_Q_DIR 3 +#define HTML_Q_ID 4 +#define HTML_Q_LANG 5 +#define HTML_Q_STYLE 6 +#define HTML_Q_TITLE 7 +#define HTML_Q_ATTRIBUTES 8 + +#define HTML_SCRIPT_CHARSET 0 +#define HTML_SCRIPT_CLASS 1 +#define HTML_SCRIPT_CLEAR 2 +#define HTML_SCRIPT_DEFER 3 +#define HTML_SCRIPT_DIR 4 +#define HTML_SCRIPT_EVENT 5 +#define HTML_SCRIPT_FOR 6 +#define HTML_SCRIPT_ID 7 +#define HTML_SCRIPT_LANG 8 +#define HTML_SCRIPT_LANGUAGE 9 +#define HTML_SCRIPT_NAME 10 +#define HTML_SCRIPT_SCRIPTENGINE 11 +#define HTML_SCRIPT_SRC 12 +#define HTML_SCRIPT_STYLE 13 +#define HTML_SCRIPT_TITLE 14 +#define HTML_SCRIPT_TYPE 15 +#define HTML_SCRIPT_ATTRIBUTES 16 + +#define HTML_SELECT_ALIGN 0 +#define HTML_SELECT_CLASS 1 +#define HTML_SELECT_CLEAR 2 +#define HTML_SELECT_DIR 3 +#define HTML_SELECT_DISABLED 4 +#define HTML_SELECT_ERROR 5 +#define HTML_SELECT_HEIGHT 6 +#define HTML_SELECT_ID 7 +#define HTML_SELECT_LANG 8 +#define HTML_SELECT_MD 9 +#define HTML_SELECT_MULTIPLE 10 +#define HTML_SELECT_NAME 11 +#define HTML_SELECT_NOTAB 12 +#define HTML_SELECT_ONBLUR 13 +#define HTML_SELECT_ONCHANGE 14 +#define HTML_SELECT_ONFOCUS 15 +#define HTML_SELECT_SIZE 16 +#define HTML_SELECT_STYLE 17 +#define HTML_SELECT_TABINDEX 18 +#define HTML_SELECT_TITLE 19 +#define HTML_SELECT_UNITS 20 +#define HTML_SELECT_WIDTH 21 +#define HTML_SELECT_ATTRIBUTES 22 + +#define HTML_STYLE_CLASS 0 +#define HTML_STYLE_DIR 1 +#define HTML_STYLE_ID 2 +#define HTML_STYLE_LANG 3 +#define HTML_STYLE_MEDIA 4 +#define HTML_STYLE_NOTATION 5 +#define HTML_STYLE_STYLE 6 +#define HTML_STYLE_TITLE 7 +#define HTML_STYLE_TYPE 8 +#define HTML_STYLE_ATTRIBUTES 9 + +#define HTML_TAB_ALIGN 0 +#define HTML_TAB_CLASS 1 +#define HTML_TAB_CLEAR 2 +#define HTML_TAB_DIR 3 +#define HTML_TAB_DP 4 +#define HTML_TAB_ID 5 +#define HTML_TAB_INDENT 6 +#define HTML_TAB_LANG 7 +#define HTML_TAB_STYLE 8 +#define HTML_TAB_TITLE 9 +#define HTML_TAB_TO 10 +#define HTML_TAB_ATTRIBUTES 11 + +#define HTML_TABLE_ALIGN 0 +#define HTML_TABLE_BACKGROUND 1 +#define HTML_TABLE_BORDER 2 +#define HTML_TABLE_CELLPADDING 3 +#define HTML_TABLE_CELLSPACING 4 +#define HTML_TABLE_CLASS 5 +#define HTML_TABLE_CLEAR 6 +#define HTML_TABLE_COLS 7 +#define HTML_TABLE_COLSPEC 8 +#define HTML_TABLE_DIR 9 +#define HTML_TABLE_DP 10 +#define HTML_TABLE_FRAME 11 +#define HTML_TABLE_ID 12 +#define HTML_TABLE_LANG 13 +#define HTML_TABLE_NOFLOW 14 +#define HTML_TABLE_NOWRAP 15 +#define HTML_TABLE_RULES 16 +#define HTML_TABLE_STYLE 17 +#define HTML_TABLE_SUMMARY 18 +#define HTML_TABLE_TITLE 19 +#define HTML_TABLE_UNITS 20 +#define HTML_TABLE_WIDTH 21 +#define HTML_TABLE_ATTRIBUTES 22 + +#define HTML_TD_ABBR 0 +#define HTML_TD_ALIGN 1 +#define HTML_TD_AXES 2 +#define HTML_TD_AXIS 3 +#define HTML_TD_BACKGROUND 4 +#define HTML_TD_CHAR 5 +#define HTML_TD_CHAROFF 6 +#define HTML_TD_CLASS 7 +#define HTML_TD_CLEAR 8 +#define HTML_TD_COLSPAN 9 +#define HTML_TD_DIR 10 +#define HTML_TD_DP 11 +#define HTML_TD_HEADERS 12 +#define HTML_TD_HEIGHT 13 +#define HTML_TD_ID 14 +#define HTML_TD_LANG 15 +#define HTML_TD_NOWRAP 16 +#define HTML_TD_ROWSPAN 17 +#define HTML_TD_SCOPE 18 +#define HTML_TD_STYLE 19 +#define HTML_TD_TITLE 20 +#define HTML_TD_VALIGN 21 +#define HTML_TD_WIDTH 22 +#define HTML_TD_ATTRIBUTES 23 + +#define HTML_TEXTAREA_ACCEPT_CHARSET 0 +#define HTML_TEXTAREA_ACCESSKEY 1 +#define HTML_TEXTAREA_ALIGN 2 +#define HTML_TEXTAREA_CLASS 3 +#define HTML_TEXTAREA_CLEAR 4 +#define HTML_TEXTAREA_COLS 5 +#define HTML_TEXTAREA_DIR 6 +#define HTML_TEXTAREA_DISABLED 7 +#define HTML_TEXTAREA_ERROR 8 +#define HTML_TEXTAREA_ID 9 +#define HTML_TEXTAREA_LANG 10 +#define HTML_TEXTAREA_NAME 11 +#define HTML_TEXTAREA_NOTAB 12 +#define HTML_TEXTAREA_ONBLUR 13 +#define HTML_TEXTAREA_ONCHANGE 14 +#define HTML_TEXTAREA_ONFOCUS 15 +#define HTML_TEXTAREA_ONSELECT 16 +#define HTML_TEXTAREA_READONLY 17 +#define HTML_TEXTAREA_ROWS 18 +#define HTML_TEXTAREA_STYLE 19 +#define HTML_TEXTAREA_TABINDEX 20 +#define HTML_TEXTAREA_TITLE 21 +#define HTML_TEXTAREA_ATTRIBUTES 22 + +#define HTML_TR_ALIGN 0 +#define HTML_TR_CHAR 1 +#define HTML_TR_CHAROFF 2 +#define HTML_TR_CLASS 3 +#define HTML_TR_CLEAR 4 +#define HTML_TR_DIR 5 +#define HTML_TR_DP 6 +#define HTML_TR_ID 7 +#define HTML_TR_LANG 8 +#define HTML_TR_NOWRAP 9 +#define HTML_TR_STYLE 10 +#define HTML_TR_TITLE 11 +#define HTML_TR_VALIGN 12 +#define HTML_TR_ATTRIBUTES 13 + +#define HTML_UL_CLASS 0 +#define HTML_UL_CLEAR 1 +#define HTML_UL_COMPACT 2 +#define HTML_UL_DINGBAT 3 +#define HTML_UL_DIR 4 +#define HTML_UL_ID 5 +#define HTML_UL_LANG 6 +#define HTML_UL_MD 7 +#define HTML_UL_PLAIN 8 +#define HTML_UL_SRC 9 +#define HTML_UL_STYLE 10 +#define HTML_UL_TITLE 11 +#define HTML_UL_TYPE 12 +#define HTML_UL_WRAP 13 +#define HTML_UL_ATTRIBUTES 14 + +#ifdef __cplusplus +} +#endif +#endif /* hdr_HTMLDTD_H */ diff --git a/WWW/Library/Implementation/makefile.in b/WWW/Library/Implementation/makefile.in new file mode 100644 index 00000000..1b7118f9 --- /dev/null +++ b/WWW/Library/Implementation/makefile.in @@ -0,0 +1,378 @@ +# $LynxId: makefile.in,v 1.30 2010/09/19 19:29:35 tom Exp $ +# Make WWW under unix for a.n.other unix system (bsd) +# Use this as a template + +# For W3 distribution, machine type for subdirectories +WWW_MACH = Implementation +WWWINC = $(top_srcdir)/WWW/Library/Implementation + +ECHO = @DONT_ECHO_CC@ +LFLAGS = + +prefix = @prefix@ +exec_prefix = @exec_prefix@ +top_srcdir = @top_srcdir@ +srcdir = @srcdir@ +VPATH = $(srcdir) + +LYFLAGS = # FIXME: set in parent makefile + +CC = @CC@ +DEFS = @DEFS@ +EXTRA_CPPFLAGS = @EXTRA_CPPFLAGS@ +CPPFLAGS = @CPPFLAGS@ + +AR = @AR@ +ARFLAGS = @ARFLAGS@ + +RANLIB = @RANLIB@ + +o = .@OBJEXT@ +x = @EXEEXT@ + +INTLDIR_CPPFLAGS= @INTLDIR_CPPFLAGS@ -I$(top_srcdir)/intl + +CPP = @CPP@ +CPPOPTS = $(DEFS) $(LYFLAGS) \ + -I../../.. \ + -I../../../src \ + -I$(top_srcdir) \ + -I$(top_srcdir)/src \ + $(INTLDIR_CPPFLAGS) -I$(WWWINC) $(EXTRA_CPPFLAGS) $(CPPFLAGS) + +LY_CFLAGS = @CFLAGS@ +CFLAGS = $(CPPOPTS) $(LY_CFLAGS) + +LINT = @LINT@ +LINTOPTS = + +CTAGS = @CTAGS@ + +# Directory for installed binary: +BINDIR = @bindir@ + +# Where is the W3 object library to be installed (not normally done)? +#_________________ OK if normal W3 distribution +# Where is the WWW source root? +WWW = $(top_srcdir)/WWW + +# Where should temporary (object) files go? +WTMP = ../.. + +# (Version.make) +VC = 2.14 +#______________________________________________________________________ +# (originally CommonMakefile) + +# If this env var is set to something else Some makes will use that instead +SHELL = @CONFIG_SHELL@ + +# .h files are distributed but originally are made from the +# self-documenting hypertext files. + +.SUFFIXES: .h .html +.html.h: +# - chmod +w $*.h + www -w90 -na -to text/x-c $*.html > $*.h +# chmod -w $*.h + +# If this is actually run in a subdirectory, +# +# WWW = ../../.. +# WWW = ../.. For [cernlib] build in this directory + +CMN = $(WWW)/Library/Implementation/ + +# Where shall we put the objects and built library? + +LOB = . + +# Only needed if HTWAIS.c is to be compiled. Put into your Makefile.include +# uncomment these and fill in WAISINC for adding direct wais access +# to Lynx. +@MAKE_WAIS@HTWAIS_c = $(CMN)/HTWAIS.c +@MAKE_WAIS@HTWAIS_o = $(LOB)/HTWAIS$o +@MAKE_WAIS@WAIS = YES +#WAISINC = -I../../../../freeWAIS-0.202/ir +@MAKE_WAIS@WAISCFLAGS = -DDIRECT_WAIS +# + +# add -DNEW_GATEWAY here for the new gateway config stuff +CFLAGS2 = $(CFLAGS) $(LYFLAGS) $(WAISCFLAGS) -I$(CMN) -DACCESS_AUTH + +COMPILE = $(ECHO) $(CC) $(CFLAGS2) -c + +COMMON = $(LOB)/HTParse$o $(LOB)/HTAccess$o $(LOB)/HTTP$o \ + $(LOB)/HTFile$o $(LOB)/HTBTree$o $(LOB)/HTFTP$o $(LOB)/HTTCP$o \ + $(LOB)/SGML$o $(LOB)/HTMLDTD$o $(LOB)/HTChunk$o \ + $(LOB)/HTPlain$o \ + $(LOB)/HTMLGen$o \ + $(LOB)/HTAtom$o $(LOB)/HTAnchor$o $(LOB)/HTStyle$o \ + $(LOB)/HTList$o $(LOB)/HTString$o \ + $(LOB)/HTRules$o $(LOB)/HTFormat$o $(LOB)/HTMIME$o \ + $(LOB)/HTNews$o $(LOB)/HTGopher$o \ + $(LOB)/HTTelnet$o $(LOB)/HTFinger$o $(LOB)/HTWSRC$o $(HTWAIS_o) \ + $(LOB)/HTAAUtil$o $(LOB)/HTAABrow$o \ + $(LOB)/HTGroup$o \ + $(LOB)/HTAAProt$o \ + $(LOB)/HTAssoc$o $(LOB)/HTLex$o $(LOB)/HTUU$o \ + $(LOB)/HTDOS$o + +CFILES = $(CMN)HTParse.c $(CMN)HTAccess.c $(CMN)HTTP.c $(CMN)HTFile.c \ + $(CMN)HTBTree.c \ + $(CMN)HTFTP.c $(CMN)HTTCP.c $(CMN)SGML.c \ + $(CMN)HTMLDTD.c \ + $(CMN)HTPlain.c \ + $(CMN)HTMLGen.c \ + $(CMN)HTChunk.c $(CMN)HTAtom.c $(CMN)HTAnchor.c $(CMN)HTStyle.c \ + $(CMN)HTList.c $(CMN)HTString.c $(CMN)HTRules.c \ + $(CMN)HTFormat.c $(CMN)HTMIME.c \ + $(CMN)HTNews.c $(CMN)HTGopher.c $(CMN)HTTelnet.c \ + $(CMN)HTFinger.c $(HTWAIS_c) $(CMN)HTWSRC.c \ + $(CMN)HTAABrow.c \ + $(CMN)HTGroup.c \ + $(CMN)HTAAProt.c \ + $(CMN)HTAssoc.c $(CMN)HTLex.c $(CMN)HTUU.c + +HFILES = $(CMN)HTParse.h $(CMN)HTAccess.h $(CMN)HTTP.h $(CMN)HTFile.h \ + $(CMN)HTBTree.h $(CMN)HTFTP.h $(CMN)HTTCP.h \ + $(CMN)SGML.h $(CMN)HTML.h $(CMN)HTMLDTD.h $(CMN)HTChunk.h \ + $(CMN)HTPlain.h \ + $(CMN)HTFWriter.h $(CMN)HTMLGen.h \ + $(CMN)HTStream.h \ + $(CMN)HTAtom.h $(CMN)HTAnchor.h $(CMN)HTStyle.h \ + $(CMN)HTList.h \ + $(CMN)HTString.h $(CMN)HTRules.h \ + $(CMN)HTFormat.h $(CMN)HTInit.h \ + $(CMN)HTMIME.h $(CMN)HTNews.h \ + $(CMN)HTGopher.h \ + $(CMN)HTUtils.h $(CMN)www_tcp.h $(CMN)HText.h \ + $(CMN)HTTelnet.h $(CMN)HTFinger.h \ + $(CMN)HTWAIS.h $(CMN)HTWSRC.h \ + $(CMN)HTAABrow.h \ + $(CMN)HTGroup.h \ + $(CMN)HTAAProt.h \ + $(CMN)HTAssoc.h $(CMN)HTLex.h $(CMN)HTUU.h + +C_SRC = $(COMMON:$o=.c) + +all : $(LOB)/libwww.a + +lint: + $(LINT) $(LINTOPTS) $(CPPOPTS) $(C_SRC) 2>&1 |tee ../../../lint.libwww + +.SUFFIXES: $o .i .h .html + +.c$o: + @RULE_CC@ + @ECHO_CC@$(CC) $(CPPOPTS) $(CFLAGS) -c $(srcdir)/$*.c + +.c.i: + @RULE_CC@ + @ECHO_CC@$(CPP) -C $(CPPOPTS) $(srcdir)/$*.c >$@ + +depend : + makedepend -fmakefile -- $(CFLAGS) -- $(CFILES) + +# Library +# +# On SGI, ranlib is unnecessary and does not exist so we ignore errors +# for that step +$(LOB)/libwww.a : $(COMMON) + $(AR) $(ARFLAGS) $(LOB)/libwww.a $(COMMON) + -$(RANLIB) $(LOB)/libwww.a + +# Clean up everything generatable except final products +clean : + rm -f core *.core *.leaks *.[oi] *.bak tags TAGS + rm -f dtd_util$x + rm -f $(LOB)/*$o + +distclean : clean + @echo made $@ + +tags: + $(CTAGS) *.[ch] + +# Common code +# ----------- + +$(LOB)/HTList$o : $(CMN)HTList.c $(CMN)HTUtils.h $(CMN)HTList.h + $(COMPILE) $(CMN)HTList.c + +$(LOB)/HTAnchor$o : $(CMN)HTAnchor.c $(CMN)HTUtils.h $(CMN)HTList.h + $(COMPILE) $(CMN)HTAnchor.c + +$(LOB)/HTFormat$o : $(CMN)HTFormat.c $(CMN)HTUtils.h $(CMN)HTList.h + $(COMPILE) $(CMN)HTFormat.c + +$(LOB)/HTMIME$o : $(CMN)HTMIME.c $(CMN)HTUtils.h $(CMN)HTList.h + $(COMPILE) $(CMN)HTMIME.c + +$(LOB)/HTNews$o : $(CMN)HTNews.c $(CMN)HTUtils.h $(CMN)HTList.h\ + $(CMN)HTMLDTD.h + $(COMPILE) $(CMN)HTNews.c + +$(LOB)/HTGopher$o : $(CMN)HTGopher.c $(CMN)HTUtils.h $(CMN)HTList.h \ + $(CMN)HTMLDTD.h + $(COMPILE) $(CMN)HTGopher.c + +$(LOB)/HTTelnet$o : $(CMN)HTTelnet.c $(CMN)HTUtils.h $(CMN)HTTelnet.h $(CMN)../../../userdefs.h + $(COMPILE) $(CMN)HTTelnet.c + +$(LOB)/HTFinger$o : $(CMN)HTFinger.c $(CMN)HTUtils.h $(CMN)HTList.h \ + $(CMN)HTMLDTD.h + $(COMPILE) $(CMN)HTFinger.c + +$(LOB)/HTStyle$o : $(CMN)HTStyle.c $(CMN)HTUtils.h + $(COMPILE) $(CMN)HTStyle.c + +$(LOB)/HTAtom$o : $(CMN)HTAtom.c $(CMN)HTUtils.h $(CMN)HTList.h + $(COMPILE) $(CMN)HTAtom.c + +$(LOB)/HTChunk$o : $(CMN)HTChunk.c $(CMN)HTUtils.h + $(COMPILE) $(CMN)HTChunk.c + +$(LOB)/HTString$o : $(CMN)HTString.c $(CMN)HTUtils.h $(CMN)Version.make + $(COMPILE) -DVC=\"$(VC)\" $(CMN)HTString.c + +$(LOB)/HTRules$o : $(CMN)HTRules.c $(CMN)HTUtils.h $(CMN)Version.make \ + $(CMN)HTAAProt.h + $(COMPILE) -DVC=\"$(VC)\" $(CMN)HTRules.c + +$(LOB)/SGML$o : $(CMN)SGML.c $(CMN)HTUtils.h $(CMN)UCAux.h + $(COMPILE) $(CMN)SGML.c + +$(LOB)/HTMLGen$o : $(CMN)HTMLGen.c $(CMN)HTUtils.h $(CMN)HTMLDTD.h + $(COMPILE) $(CMN)HTMLGen.c + +$(LOB)/HTMLDTD$o : $(CMN)HTMLDTD.c $(CMN)SGML.h + $(COMPILE) $(CMN)HTMLDTD.c + +$(LOB)/HTPlain$o : $(CMN)HTPlain.c $(CMN)HTPlain.h $(CMN)HTStream.h \ + $(CMN)UCAux.h + $(COMPILE) $(CMN)HTPlain.c + +$(LOB)/HTWAIS$o : $(CMN)HTWAIS.c $(CMN)HTUtils.h $(CMN)HTList.h + $(COMPILE) $(WAISINC) $(CMN)HTWAIS.c + +$(LOB)/HTWSRC$o : $(CMN)HTWSRC.c $(CMN)HTUtils.h $(CMN)HTList.h + $(COMPILE) $(CMN)HTWSRC.c + +# Access Authorization + +$(LOB)/HTAAUtil$o : $(CMN)HTAAUtil.c $(CMN)HTAAUtil.h \ + $(CMN)HTUtils.h $(CMN)HTString.h + $(COMPILE) $(CMN)HTAAUtil.c + +$(LOB)/HTGroup$o : $(CMN)HTGroup.c $(CMN)HTGroup.h \ + $(CMN)HTAAUtil.h \ + $(CMN)HTAssoc.h $(CMN)HTLex.h + $(COMPILE) $(CMN)HTGroup.c + +$(LOB)/HTAABrow$o : $(CMN)HTAABrow.c $(CMN)HTAABrow.h \ + $(CMN)HTAAUtil.h $(CMN)HTUU.h \ + $(CMN)HTUtils.h $(CMN)HTString.h \ + $(CMN)HTParse.h $(CMN)HTList.h \ + $(CMN)HTAssoc.h + $(COMPILE) $(CMN)HTAABrow.c + +$(LOB)/HTAAProt$o : $(CMN)HTAAProt.c $(CMN)HTAAProt.h \ + $(CMN)HTUtils.h $(CMN)HTAAUtil.h \ + $(CMN)HTAssoc.h $(CMN)HTLex.h + $(COMPILE) $(CMN)HTAAProt.c + +$(LOB)/HTAssoc$o : $(CMN)HTAssoc.c $(CMN)HTAssoc.h \ + $(CMN)HTUtils.h $(CMN)HTString.h $(CMN)HTList.h + $(COMPILE) $(CMN)HTAssoc.c + +$(LOB)/HTLex$o : $(CMN)HTLex.c $(CMN)HTLex.h $(CMN)HTUtils.h + $(COMPILE) $(CMN)HTLex.c + +$(LOB)/HTUU$o : $(CMN)HTUU.c $(CMN)HTUU.h $(CMN)HTUtils.h + $(COMPILE) $(CMN)HTUU.c + + +# Communications & Files + +$(LOB)/HTTP$o : $(CMN)HTTP.c $(CMN)HTUtils.h $(CMN)HTAABrow.h + $(COMPILE) $(CMN)HTTP.c + +$(LOB)/HTTCP$o : $(CMN)HTTCP.c $(CMN)HTUtils.h + $(COMPILE) $(CMN)HTTCP.c + +$(LOB)/HTFile$o : $(CMN)HTFile.c $(CMN)HTUtils.h \ + $(CMN)HTMLDTD.h + $(COMPILE) $(CMN)HTFile.c + +$(LOB)/HTBTree$o : $(CMN)HTBTree.c $(CMN)HTUtils.h + $(COMPILE) $(CMN)HTBTree.c + +$(LOB)/HTFTP$o : $(CMN)HTFTP.c $(CMN)HTUtils.h + $(COMPILE) $(CMN)HTFTP.c + +$(LOB)/HTAccess$o : $(CMN)HTAccess.c $(CMN)HTUtils.h + $(COMPILE) $(CMN)HTAccess.c + +$(LOB)/HTParse$o : $(CMN)HTParse.c $(CMN)HTUtils.h + $(COMPILE) $(CMN)HTParse.c + +$(LOB)/HTVMS_WaisUI$o : $(CMN)HTVMS_WaisUI.c $(CMN)HTUtils.h + $(COMPILE) $(CMN)HTVMS_WaisUI.c + +$(LOB)/HTDOS$o : $(CMN)HTDOS.c $(CMN)HTUtils.h + $(COMPILE) $(CMN)HTDOS.c + +# Utilities +$(LOB)/dtd_util$o : $(CMN)dtd_util.c $(CMN)HTUtils.h + $(COMPILE) $(CMN)dtd_util.c + +DTD_UTIL = $(LOB)/dtd_util$o $(LOB)/HTMLDTD$o + +sources: dtd_util$x src0_HTMLDTD.txt src1_HTMLDTD.txt + -rm -f *_HTMLDTD.h + ./dtd_util$x -l src0_HTMLDTD.txt -s -c -o src0_HTMLDTD.h + ./dtd_util$x -l src1_HTMLDTD.txt -t -c -o src1_HTMLDTD.h + ./dtd_util$x -l src0_HTMLDTD.txt -s -h -o hdr_HTMLDTD.h + +dtd_util$x: $(DTD_UTIL) + $(CC) $(CC_OPTS) $(LDFLAGS) -o $@ $(DTD_UTIL) $(LIBS) + +check: dtd_util$x + @echo "** comparing builtin src0_HTMLDTD.txt" + ./dtd_util$x >HTMLDTD.log + -diff -u src0_HTMLDTD.txt HTMLDTD.log + + @echo "** comparing reloaded src0_HTMLDTD.txt" + ./dtd_util$x -l src0_HTMLDTD.txt >HTMLDTD.log + -diff -u src0_HTMLDTD.txt HTMLDTD.log + + @echo "** comparing header generated from builtin" + ./dtd_util$x -s -h -o HTMLDTD.log + -diff -u hdr_HTMLDTD.h HTMLDTD.log + ./dtd_util$x -t -h -o HTMLDTD.log + -diff -u hdr_HTMLDTD.h HTMLDTD.log + + @echo "** comparing header generated by load" + ./dtd_util$x -s -h -o HTMLDTD.log -l src0_HTMLDTD.txt + -diff -u hdr_HTMLDTD.h HTMLDTD.log + ./dtd_util$x -t -h -o HTMLDTD.log -l src1_HTMLDTD.txt + -diff -u hdr_HTMLDTD.h HTMLDTD.log + + @echo "** comparing strict source generated from builtin" + ./dtd_util$x -s -c -o HTMLDTD.log + -diff -u src0_HTMLDTD.h HTMLDTD.log + + @echo "** comparing strict source generated by load" + ./dtd_util$x -s -c -o HTMLDTD.log -l src0_HTMLDTD.txt + -diff -u src0_HTMLDTD.h HTMLDTD.log + + @echo "** comparing tagsoup source generated from builtin" + ./dtd_util$x -t -c -o HTMLDTD.log + -diff -u src1_HTMLDTD.h HTMLDTD.log + + @echo "** comparing tagsoup source generated by load" + ./dtd_util$x -t -c -o HTMLDTD.log -l src1_HTMLDTD.txt + -diff -u src1_HTMLDTD.h HTMLDTD.log + +# DO NOT DELETE THIS LINE -- make depend depends on it. diff --git a/WWW/Library/Implementation/src0_HTMLDTD.h b/WWW/Library/Implementation/src0_HTMLDTD.h new file mode 100644 index 00000000..83884709 --- /dev/null +++ b/WWW/Library/Implementation/src0_HTMLDTD.h @@ -0,0 +1,2422 @@ +/* $LynxId: src0_HTMLDTD.h,v 1.45 2011/10/07 00:54:36 Kihara.Hideto Exp $ */ +#ifndef src_HTMLDTD_H0 +#define src_HTMLDTD_H0 1 + +#ifndef once_HTMLDTD +#define once_HTMLDTD 1 + +#define T_A 0x00008,0x0B007,0x0FF17,0x37787,0x77BA7,0x8604F,0x00014 +#define T_ABBR 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00003,0x00000 +#define T_ACRONYM 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00003,0x00000 +#define T_ADDRESS 0x00200,0x0F14F,0x8FFFF,0x36680,0xB6FAF,0x80317,0x00000 +#define T_APPLET 0x02000,0x0B0CF,0x8FFFF,0x37F9F,0xB7FBF,0x8300F,0x00000 +#define T_AREA 0x08000,0x00000,0x00000,0x08000,0x3FFFF,0x00F1F,0x00001 +#define T_AU 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00003,0x00000 +#define T_AUTHOR 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00003,0x00000 +#define T_B 0x00001,0x8B04F,0xAFFFF,0xA778F,0xF7FBF,0x00001,0x00014 +#define T_BANNER 0x00200,0x0FB8F,0x0FFFF,0x30000,0x30000,0x8031F,0x00000 +#define T_BASE 0x40000,0x00000,0x00000,0x50000,0x50000,0x8000F,0x00001 +#define T_BASEFONT 0x01000,0x00000,0x00000,0x377AF,0x37FAF,0x8F000,0x00001 +#define T_BDO 0x00100,0x0B04F,0x8FFFF,0x36680,0xB6FAF,0x0033F,0x00000 +#define T_BGSOUND 0x01000,0x00000,0x00000,0x777AF,0x77FAF,0x8730F,0x00001 +#define T_BIG 0x00001,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00001,0x00014 +#define T_BLINK 0x00001,0x8B04F,0x8FFFF,0xA778F,0xF7FAF,0x00001,0x00014 +#define T_BLOCKQUOTE 0x00200,0xAFBCF,0xAFFFF,0xB6680,0xB6FAF,0x8031F,0x00000 +#define T_BODY 0x20000,0x2FB8F,0x2FFFF,0x30000,0x30000,0xDFF7F,0x00003 +#define T_BODYTEXT 0x20000,0x0FB8F,0xAFFFF,0x30200,0xB7FAF,0x8F17F,0x00003 +#define T_BQ 0x00200,0xAFBCF,0xAFFFF,0xB6680,0xB6FAF,0x8031F,0x00000 +#define T_BR 0x01000,0x00000,0x00000,0x377BF,0x77FBF,0x8101F,0x00001 +#define T_BUTTON 0x02000,0x0BB07,0x0FF37,0x0378F,0x37FBF,0x8115F,0x00000 +#define T_CAPTION 0x00100,0x0B04F,0x8FFFF,0x06A00,0xB6FA7,0x8035F,0x00000 +#define T_CENTER 0x00200,0x8FBCF,0x8FFFF,0xB6680,0xB6FA7,0x8071F,0x00000 +#define T_CITE 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00002,0x00010 +#define T_CODE 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00002,0x00000 +#define T_COL 0x04000,0x00000,0x00000,0x00820,0x36FA7,0x88F5F,0x00001 +#define T_COLGROUP 0x00020,0x04000,0x04000,0x00800,0x36FA7,0x8875F,0x00001 +#define T_COMMENT 0x00004,0x00000,0x00000,0xA77AF,0x7FFFF,0x00003,0x00000 +#define T_CREDIT 0x00100,0x0B04F,0x8FFFF,0x06A00,0xB7FBF,0x8030F,0x00000 +#define T_DD 0x00400,0x0FBCF,0x8FFFF,0x00800,0xB6FFF,0x8071F,0x00001 +#define T_DEL 0x00002,0x8BBCF,0x8FFFF,0xA7F8F,0xF7FBF,0x00003,0x00000 +#define T_DFN 0x00002,0x8B0CF,0x8FFFF,0x8778F,0xF7FBF,0x00003,0x00000 +#define T_DIR 0x00800,0x0B400,0x0F75F,0x37680,0x36FB7,0x84F7F,0x00000 +#define T_DIV 0x00200,0x8FBCF,0x8FFFF,0xB66A0,0xB7FFF,0x8031F,0x00004 +#define T_DL 0x00800,0x0C480,0x8FFFF,0x36680,0xB7FB7,0x0075F,0x00000 +#define T_DLC 0x00800,0x0C480,0x8FFFF,0x36680,0xB7FB7,0x0075F,0x00000 +#define T_DT 0x00400,0x0B04F,0x0B1FF,0x00800,0x17FFF,0x8071F,0x00001 +#define T_EM 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FAF,0x00003,0x00010 +#define T_EMBED 0x02000,0x8F107,0x8FFF7,0xB6FBF,0xB7FBF,0x1FF7F,0x00001 +#define T_FIELDSET 0x00200,0x8FB4F,0x8FF7F,0x86787,0xB7FF7,0x8805F,0x00000 +#define T_FIG 0x00200,0x0FB00,0x8FFFF,0x36680,0xB6FBF,0x8834F,0x00000 +#define T_FN 0x00200,0x8FBCF,0x8FFFF,0xB6680,0xB7EBF,0x8114F,0x00000 +#define T_FONT 0x00001,0x8B04F,0x8FFFF,0xB778F,0xF7FBF,0x00001,0x00014 +#define T_FORM 0x00080,0x0FF6F,0x0FF7F,0x36E07,0x32F07,0x88DFF,0x00000 +#define T_FRAME 0x10000,0x00000,0x00000,0x10000,0x10000,0x9FFFF,0x00001 +#define T_FRAMESET 0x10000,0x90000,0x90000,0x90000,0x93000,0x9FFFF,0x00000 +#define T_H1 0x00100,0x0B04F,0x0B05F,0x36680,0x37FAF,0x80117,0x00000 +#define T_H2 0x00100,0x0B04F,0x0B05F,0x36680,0x37FAF,0x80117,0x00000 +#define T_H3 0x00100,0x0B04F,0x0B05F,0x36680,0x37FAF,0x80117,0x00000 +#define T_H4 0x00100,0x0B04F,0x0B05F,0x36680,0x37FAF,0x80117,0x00000 +#define T_H5 0x00100,0x0B04F,0x0B05F,0x36680,0x37FAF,0x80117,0x00000 +#define T_H6 0x00100,0x0B04F,0x0B05F,0x36680,0x37FAF,0x80117,0x00000 +#define T_HEAD 0x40000,0x4F000,0x47000,0x10000,0x10000,0x9FF7F,0x00007 +#define T_HR 0x04000,0x00000,0x00000,0x3FE80,0x3FFBF,0x87F37,0x00001 +#define T_HTML 0x10000,0x7FB8F,0x7FFFF,0x00000,0x00000,0x1FFFF,0x00003 +#define T_HY 0x01000,0x00000,0x00000,0x3779F,0x77FBF,0x8101F,0x00001 +#define T_I 0x00001,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00001,0x00014 +#define T_IFRAME 0x02000,0x8FBCF,0x8FFFF,0xB679F,0xB6FBF,0xD315F,0x00000 +#define T_IMG 0x01000,0x00000,0x00000,0x3779F,0x37FBF,0x80000,0x00001 +#define T_INPUT 0x00040,0x00000,0x00000,0x03F87,0x37F87,0x8904F,0x00001 +#define T_INS 0x00002,0x8BBCF,0x8FFFF,0xA7F8F,0xF7FBF,0x00003,0x00000 +#define T_ISINDEX 0x08000,0x00000,0x00000,0x7778F,0x7FFAF,0x80007,0x00001 +#define T_KBD 0x00002,0x00000,0x00000,0x2778F,0x77FBF,0x00003,0x00000 +#define T_KEYGEN 0x00040,0x00000,0x00000,0x07FB7,0x37FB7,0x80070,0x00001 +#define T_LABEL 0x00002,0x0304F,0x0FFFF,0x0679F,0x36FBF,0x00007,0x00000 +#define T_LEGEND 0x00002,0x0B04F,0x8FF7F,0x00200,0xB7FA7,0x00003,0x00000 +#define T_LH 0x00400,0x0BB7F,0x8FFFF,0x00800,0x97FFF,0x8071F,0x00001 +#define T_LI 0x00400,0x0BBFF,0x8FFFF,0x00800,0x97FFF,0x8071F,0x00001 +#define T_LINK 0x08000,0x00000,0x00000,0x50000,0x50000,0x0FF7F,0x00001 +#define T_LISTING 0x00800,0x00000,0x00000,0x36600,0x36F00,0x80F1F,0x00000 +#define T_MAP 0x08000,0x08000,0x08000,0x37FCF,0x37FBF,0x0051F,0x00000 +#define T_MARQUEE 0x04000,0x0000F,0x8F01F,0x37787,0xB7FA7,0x8301C,0x00000 +#define T_MATH 0x00004,0x0B05F,0x8FFFF,0x2778F,0xF7FBF,0x0001F,0x00000 +#define T_MENU 0x00800,0x0B400,0x0F75F,0x17680,0x36FB7,0x88F7F,0x00000 +#define T_META 0x08000,0x00000,0x00000,0x50000,0x50000,0x0FF7F,0x00001 +#define T_NEXTID 0x01000,0x00000,0x00000,0x50000,0x1FFF7,0x00001,0x00001 +#define T_NOFRAMES 0x20000,0x2FB8F,0x0FFFF,0x17000,0x17000,0x0CF5F,0x00000 +#define T_NOTE 0x00200,0x0BBAF,0x8FFFF,0x376B0,0xB7FFF,0x8031F,0x00000 +#define T_OBJECT 0x02000,0x8FBCF,0x8FFFF,0xB679F,0xB6FBF,0x83D5F,0x00020 +#define T_OL 0x00800,0x0C400,0x8FFFF,0x37680,0xB7FB7,0x88F7F,0x00000 +#define T_OPTION 0x08000,0x00000,0x00000,0x00040,0x37FFF,0x8031F,0x00001 +#define T_OVERLAY 0x04000,0x00000,0x00000,0x00200,0x37FBF,0x83F7F,0x00001 +#define T_P 0x00100,0x0B04F,0x8FFFF,0x36680,0xB6FA7,0x80117,0x00001 +#define T_PARAM 0x01000,0x00000,0x00000,0x33500,0x37FFF,0x81560,0x00001 +#define T_PLAINTEXT 0x10000,0xFFFFF,0xFFFFF,0x90000,0x90000,0x3FFFF,0x00001 +#define T_PRE 0x00200,0x0F04F,0x0F05E,0x36680,0x36FF0,0x8071E,0x00000 +#define T_Q 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FAF,0x00003,0x00000 +#define T_S 0x00001,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00001,0x00000 +#define T_SAMP 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00002,0x00010 +#define T_SCRIPT 0x02000,0x00000,0x00000,0x77F9F,0x77FFF,0x87D5F,0x00000 +#define T_SELECT 0x00040,0x08000,0x08000,0x03FAF,0x33FBF,0x80D5F,0x00008 +#define T_SHY 0x01000,0x00000,0x00000,0x3779F,0x77FBF,0x8101F,0x00001 +#define T_SMALL 0x00001,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00001,0x00014 +#define T_SPAN 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x80003,0x00000 +#define T_SPOT 0x00008,0x00000,0x00000,0x3FFF7,0x3FFF7,0x00008,0x00001 +#define T_STRIKE 0x00001,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00001,0x00000 +#define T_STRONG 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FAF,0x00003,0x00010 +#define T_STYLE 0x40000,0x00000,0x00000,0x7638F,0x76FAF,0x8001F,0x00000 +#define T_SUB 0x00004,0x8B05F,0x8FFFF,0x8779F,0xF7FBF,0x00007,0x00000 +#define T_SUP 0x00004,0x8B05F,0x8FFFF,0x8779F,0xF7FBF,0x00007,0x00000 +#define T_TAB 0x01000,0x00000,0x00000,0x3778F,0x57FAF,0x00001,0x00001 +#define T_TABLE 0x00800,0x0F1E0,0x8FFFF,0x36680,0xB6FA7,0x8C57F,0x00000 +#define T_TBODY 0x00020,0x00020,0x8FFFF,0x00880,0xB7FB7,0x8C75F,0x00003 +#define T_TD 0x00400,0x0FBCF,0x8FFFF,0x00020,0xB7FB7,0x8C75F,0x00001 +#define T_TEXTAREA 0x00040,0x00000,0x00000,0x07F8F,0x33FBF,0x80D5F,0x00040 +#define T_TEXTFLOW 0x20000,0x8FBFF,0x9FFFF,0x977B0,0xB7FB7,0x9B00F,0x00003 +#define T_TFOOT 0x00020,0x00020,0x8FFFF,0x00800,0xB7FB7,0x8CF5F,0x00001 +#define T_TH 0x00400,0x0FBCF,0x0FFFF,0x00020,0xB7FB7,0x8CF5F,0x00001 +#define T_THEAD 0x00020,0x00020,0x8FFFF,0x00800,0xB7FB7,0x8CF5F,0x00001 +#define T_TITLE 0x40000,0x00000,0x00000,0x50000,0x50000,0x0031F,0x0000C +#define T_TR 0x00020,0x00400,0x8FFFF,0x00820,0xB7FB7,0x8C75F,0x00001 +#define T_TT 0x00001,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00001,0x00010 +#define T_U 0x00001,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00001,0x00014 +#define T_UL 0x00800,0x0C480,0x8FFFF,0x36680,0xB7FFF,0x8075F,0x00000 +#define T_VAR 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00001,0x00000 +#define T_WBR 0x00001,0x00000,0x00000,0x3778F,0x77FBF,0x8101F,0x00001 +#define T_XMP 0x00800,0x00000,0x00000,0x367E0,0x36FFF,0x0875F,0x00001 +#define T_OBJECT_PCDATA 0x02000,0x8FBCF,0x8FFFF,0xB679F,0xB6FBF,0x83D5F,0x00008 +#define T__UNREC_ 0x00000,0x00000,0x00000,0x00000,0x00000,0x00000,0x00000 +#ifdef USE_PRETTYSRC +# define N HTMLA_NORMAL +# define i HTMLA_ANAME +# define h HTMLA_HREF +# define c HTMLA_CLASS +# define x HTMLA_AUXCLASS +# define T(t) , t +#else +# define T(t) /*nothing */ +#endif +/* *INDENT-OFF* */ + +#define ATTR_TYPE(name) #name, name##_attr_list + +/* generic attributes, used in different tags */ +static const attr core_attr_list[] = { + { "CLASS" T(c) }, + { "ID" T(i) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType core_attr_type[] = { + { ATTR_TYPE(core) }, + { 0, 0 }, +}; + +static const attr i18n_attr_list[] = { + { "DIR" T(N) }, + { "LANG" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType i18n_attr_type[] = { + { ATTR_TYPE(i18n) }, + { 0, 0 }, +}; + +static const attr events_attr_list[] = { + { "ONCLICK" T(N) }, + { "ONDBLCLICK" T(N) }, + { "ONKEYDOWN" T(N) }, + { "ONKEYPRESS" T(N) }, + { "ONKEYUP" T(N) }, + { "ONMOUSEDOWN" T(N) }, + { "ONMOUSEMOVE" T(N) }, + { "ONMOUSEOUT" T(N) }, + { "ONMOUSEOVER" T(N) }, + { "ONMOUSEUP" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType events_attr_type[] = { + { ATTR_TYPE(events) }, + { 0, 0 }, +}; + +static const attr align_attr_list[] = { + { "ALIGN" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType align_attr_type[] = { + { ATTR_TYPE(align) }, + { 0, 0 }, +}; + +static const attr cellalign_attr_list[] = { + { "ALIGN" T(N) }, + { "CHAR" T(N) }, + { "CHAROFF" T(N) }, + { "VALIGN" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType cellalign_attr_type[] = { + { ATTR_TYPE(cellalign) }, + { 0, 0 }, +}; + +static const attr bgcolor_attr_list[] = { + { "BGCOLOR" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType bgcolor_attr_type[] = { + { ATTR_TYPE(bgcolor) }, + { 0, 0 }, +}; + + +/* tables defining attributes per-tag in terms of generic attributes (editable) */ +static const attr A_attr_list[] = { + { "ACCESSKEY" T(N) }, + { "CHARSET" T(N) }, + { "CLEAR" T(N) }, + { "COORDS" T(N) }, + { "HREF" T(h) }, + { "HREFLANG" T(N) }, + { "ISMAP" T(N) }, + { "MD" T(N) }, + { "NAME" T(i) }, + { "NOTAB" T(N) }, + { "ONBLUR" T(N) }, + { "ONFOCUS" T(N) }, + { "REL" T(N) }, + { "REV" T(N) }, + { "SHAPE" T(N) }, + { "TABINDEX" T(N) }, + { "TARGET" T(N) }, + { "TYPE" T(N) }, + { "URN" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType A_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(A) }, + { 0, 0 }, +}; + +static const attr ADDRESS_attr_list[] = { + { "CLEAR" T(N) }, + { "NOWRAP" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType ADDRESS_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(ADDRESS) }, + { 0, 0 }, +}; + +static const attr APPLET_attr_list[] = { + { "ALT" T(N) }, + { "CLEAR" T(N) }, + { "CODE" T(N) }, + { "CODEBASE" T(h) }, + { "DOWNLOAD" T(N) }, + { "HEIGHT" T(N) }, + { "HSPACE" T(N) }, + { "NAME" T(i) }, + { "VSPACE" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType APPLET_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(APPLET) }, + { 0, 0 }, +}; + +static const attr AREA_attr_list[] = { + { "ACCESSKEY" T(N) }, + { "ALT" T(N) }, + { "CLEAR" T(N) }, + { "COORDS" T(N) }, + { "HREF" T(h) }, + { "NOHREF" T(N) }, + { "NOTAB" T(N) }, + { "ONBLUR" T(N) }, + { "ONFOCUS" T(N) }, + { "SHAPE" T(N) }, + { "TABINDEX" T(N) }, + { "TARGET" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType AREA_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(AREA) }, + { 0, 0 }, +}; + +static const attr BASE_attr_list[] = { + { "HREF" T(h) }, + { "TARGET" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType BASE_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(BASE) }, + { 0, 0 }, +}; + +static const attr BGSOUND_attr_list[] = { + { "CLEAR" T(N) }, + { "LOOP" T(N) }, + { "SRC" T(h) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType BGSOUND_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(BGSOUND) }, + { 0, 0 }, +}; + +static const attr BODY_attr_list[] = { + { "ALINK" T(N) }, + { "BACKGROUND" T(h) }, + { "CLEAR" T(N) }, + { "LINK" T(N) }, + { "ONLOAD" T(N) }, + { "ONUNLOAD" T(N) }, + { "TEXT" T(N) }, + { "VLINK" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType BODY_attr_type[] = { + { ATTR_TYPE(bgcolor) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(BODY) }, + { 0, 0 }, +}; + +static const attr BODYTEXT_attr_list[] = { + { "CLEAR" T(N) }, + { "DATA" T(N) }, + { "NAME" T(N) }, + { "OBJECT" T(N) }, + { "REF" T(N) }, + { "TYPE" T(N) }, + { "VALUE" T(N) }, + { "VALUETYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType BODYTEXT_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(BODYTEXT) }, + { 0, 0 }, +}; + +static const attr BQ_attr_list[] = { + { "CITE" T(h) }, + { "CLEAR" T(N) }, + { "NOWRAP" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType BQ_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(BQ) }, + { 0, 0 }, +}; + +static const attr BUTTON_attr_list[] = { + { "ACCESSKEY" T(N) }, + { "CLEAR" T(N) }, + { "DISABLED" T(N) }, + { "NAME" T(N) }, + { "ONBLUR" T(N) }, + { "ONFOCUS" T(N) }, + { "READONLY" T(N) }, + { "TABINDEX" T(N) }, + { "TYPE" T(N) }, + { "VALUE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType BUTTON_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(BUTTON) }, + { 0, 0 }, +}; + +static const attr CAPTION_attr_list[] = { + { "ACCESSKEY" T(N) }, + { "CLEAR" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType CAPTION_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(CAPTION) }, + { 0, 0 }, +}; + +static const attr COL_attr_list[] = { + { "CLEAR" T(N) }, + { "SPAN" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType COL_attr_type[] = { + { ATTR_TYPE(cellalign) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(COL) }, + { 0, 0 }, +}; + +static const attr DEL_attr_list[] = { + { "CITE" T(N) }, + { "DATETIME" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType DEL_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(DEL) }, + { 0, 0 }, +}; + +static const attr DIV_attr_list[] = { + { "CLEAR" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType DIV_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(DIV) }, + { 0, 0 }, +}; + +static const attr DL_attr_list[] = { + { "CLEAR" T(N) }, + { "COMPACT" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType DL_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(DL) }, + { 0, 0 }, +}; + +static const attr EMBED_attr_list[] = { + { "ALT" T(N) }, + { "BORDER" T(N) }, + { "CLEAR" T(N) }, + { "HEIGHT" T(N) }, + { "IMAGEMAP" T(N) }, + { "ISMAP" T(N) }, + { "MD" T(N) }, + { "NAME" T(i) }, + { "NOFLOW" T(N) }, + { "PARAMS" T(N) }, + { "SRC" T(h) }, + { "UNITS" T(N) }, + { "USEMAP" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType EMBED_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(EMBED) }, + { 0, 0 }, +}; + +static const attr FIG_attr_list[] = { + { "BORDER" T(N) }, + { "CLEAR" T(N) }, + { "HEIGHT" T(N) }, + { "IMAGEMAP" T(N) }, + { "ISOBJECT" T(N) }, + { "MD" T(N) }, + { "NOFLOW" T(N) }, + { "SRC" T(h) }, + { "UNITS" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType FIG_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(FIG) }, + { 0, 0 }, +}; + +static const attr FONT_attr_list[] = { + { "CLEAR" T(N) }, + { "COLOR" T(N) }, + { "END" T(N) }, + { "FACE" T(N) }, + { "SIZE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType FONT_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(FONT) }, + { 0, 0 }, +}; + +static const attr FORM_attr_list[] = { + { "ACCEPT" T(N) }, + { "ACCEPT-CHARSET" T(N) }, + { "ACTION" T(h) }, + { "CLEAR" T(N) }, + { "ENCTYPE" T(N) }, + { "METHOD" T(N) }, + { "ONRESET" T(N) }, + { "ONSUBMIT" T(N) }, + { "SCRIPT" T(N) }, + { "SUBJECT" T(N) }, + { "TARGET" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType FORM_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(FORM) }, + { 0, 0 }, +}; + +static const attr FRAME_attr_list[] = { + { "FRAMEBORDER" T(N) }, + { "LONGDESC" T(h) }, + { "MARGINHEIGHT" T(N) }, + { "MARGINWIDTH" T(N) }, + { "NAME" T(N) }, + { "NORESIZE" T(N) }, + { "SCROLLING" T(N) }, + { "SRC" T(h) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType FRAME_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(FRAME) }, + { 0, 0 }, +}; + +static const attr FRAMESET_attr_list[] = { + { "COLS" T(N) }, + { "ONLOAD" T(N) }, + { "ONUNLOAD" T(N) }, + { "ROWS" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType FRAMESET_attr_type[] = { + { ATTR_TYPE(FRAMESET) }, + { 0, 0 }, +}; + +static const attr GEN_attr_list[] = { + { "CLEAR" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType GEN_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(GEN) }, + { 0, 0 }, +}; + +static const attr H_attr_list[] = { + { "CLEAR" T(N) }, + { "DINGBAT" T(N) }, + { "MD" T(N) }, + { "NOWRAP" T(N) }, + { "SEQNUM" T(N) }, + { "SKIP" T(N) }, + { "SRC" T(h) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType H_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(H) }, + { 0, 0 }, +}; + +static const attr HR_attr_list[] = { + { "CLEAR" T(N) }, + { "MD" T(N) }, + { "NOSHADE" T(N) }, + { "SIZE" T(N) }, + { "SRC" T(h) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType HR_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(HR) }, + { 0, 0 }, +}; + +static const attr IFRAME_attr_list[] = { + { "FRAMEBORDER" T(N) }, + { "HEIGHT" T(N) }, + { "LONGDESC" T(h) }, + { "MARGINHEIGHT" T(N) }, + { "MARGINWIDTH" T(N) }, + { "NAME" T(N) }, + { "SCROLLING" T(N) }, + { "SRC" T(h) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType IFRAME_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(IFRAME) }, + { 0, 0 }, +}; + +static const attr IMG_attr_list[] = { + { "ALT" T(N) }, + { "BORDER" T(N) }, + { "CLEAR" T(N) }, + { "HEIGHT" T(N) }, + { "HSPACE" T(N) }, + { "ISMAP" T(N) }, + { "ISOBJECT" T(N) }, + { "LONGDESC" T(h) }, + { "MD" T(N) }, + { "NAME" T(N) }, + { "SRC" T(h) }, + { "UNITS" T(N) }, + { "USEMAP" T(h) }, + { "VSPACE" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType IMG_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(IMG) }, + { 0, 0 }, +}; + +static const attr INPUT_attr_list[] = { + { "ACCEPT" T(N) }, + { "ACCEPT-CHARSET" T(N) }, + { "ACCESSKEY" T(N) }, + { "ALT" T(N) }, + { "CHECKED" T(N) }, + { "CLEAR" T(N) }, + { "DISABLED" T(N) }, + { "ERROR" T(N) }, + { "HEIGHT" T(N) }, + { "ISMAP" T(N) }, + { "MAX" T(N) }, + { "MAXLENGTH" T(N) }, + { "MD" T(N) }, + { "MIN" T(N) }, + { "NAME" T(N) }, + { "NOTAB" T(N) }, + { "ONBLUR" T(N) }, + { "ONCHANGE" T(N) }, + { "ONFOCUS" T(N) }, + { "ONSELECT" T(N) }, + { "READONLY" T(N) }, + { "SIZE" T(N) }, + { "SRC" T(h) }, + { "TABINDEX" T(N) }, + { "TYPE" T(N) }, + { "USEMAP" T(N) }, + { "VALUE" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType INPUT_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(INPUT) }, + { 0, 0 }, +}; + +static const attr ISINDEX_attr_list[] = { + { "ACTION" T(h) }, + { "HREF" T(h) }, + { "PROMPT" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType ISINDEX_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(ISINDEX) }, + { 0, 0 }, +}; + +static const attr KEYGEN_attr_list[] = { + { "CHALLENGE" T(N) }, + { "NAME" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType KEYGEN_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(KEYGEN) }, + { 0, 0 }, +}; + +static const attr LABEL_attr_list[] = { + { "ACCESSKEY" T(N) }, + { "CLEAR" T(N) }, + { "FOR" T(N) }, + { "ONBLUR" T(N) }, + { "ONFOCUS" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType LABEL_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(LABEL) }, + { 0, 0 }, +}; + +static const attr LI_attr_list[] = { + { "CLEAR" T(N) }, + { "DINGBAT" T(N) }, + { "MD" T(N) }, + { "SKIP" T(N) }, + { "SRC" T(h) }, + { "TYPE" T(N) }, + { "VALUE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType LI_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(LI) }, + { 0, 0 }, +}; + +static const attr LINK_attr_list[] = { + { "CHARSET" T(N) }, + { "HREF" T(h) }, + { "HREFLANG" T(N) }, + { "MEDIA" T(N) }, + { "REL" T(N) }, + { "REV" T(N) }, + { "TARGET" T(N) }, + { "TYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType LINK_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(LINK) }, + { 0, 0 }, +}; + +static const attr MAP_attr_list[] = { + { "CLEAR" T(N) }, + { "NAME" T(i) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType MAP_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(MAP) }, + { 0, 0 }, +}; + +static const attr MATH_attr_list[] = { + { "BOX" T(N) }, + { "CLEAR" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType MATH_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(MATH) }, + { 0, 0 }, +}; + +static const attr META_attr_list[] = { + { "CHARSET" T(N) }, + { "CONTENT" T(N) }, + { "HTTP-EQUIV" T(N) }, + { "NAME" T(N) }, + { "SCHEME" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType META_attr_type[] = { + { ATTR_TYPE(META) }, + { 0, 0 }, +}; + +static const attr NEXTID_attr_list[] = { + { "N" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType NEXTID_attr_type[] = { + { ATTR_TYPE(NEXTID) }, + { 0, 0 }, +}; + +static const attr NOTE_attr_list[] = { + { "CLEAR" T(N) }, + { "MD" T(N) }, + { "ROLE" T(x) }, + { "SRC" T(h) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType NOTE_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(NOTE) }, + { 0, 0 }, +}; + +static const attr OBJECT_attr_list[] = { + { "ARCHIVE" T(N) }, + { "BORDER" T(N) }, + { "CLASSID" T(h) }, + { "CODEBASE" T(h) }, + { "CODETYPE" T(N) }, + { "DATA" T(h) }, + { "DECLARE" T(N) }, + { "HEIGHT" T(N) }, + { "HSPACE" T(N) }, + { "ISMAP" T(N) }, + { "NAME" T(N) }, + { "NOTAB" T(N) }, + { "SHAPES" T(N) }, + { "STANDBY" T(N) }, + { "TABINDEX" T(N) }, + { "TYPE" T(N) }, + { "USEMAP" T(h) }, + { "VSPACE" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType OBJECT_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(OBJECT) }, + { 0, 0 }, +}; + +static const attr OL_attr_list[] = { + { "CLEAR" T(N) }, + { "COMPACT" T(N) }, + { "CONTINUE" T(N) }, + { "SEQNUM" T(N) }, + { "START" T(N) }, + { "TYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType OL_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(OL) }, + { 0, 0 }, +}; + +static const attr OPTION_attr_list[] = { + { "CLEAR" T(N) }, + { "DISABLED" T(N) }, + { "ERROR" T(N) }, + { "LABEL" T(N) }, + { "SELECTED" T(N) }, + { "SHAPE" T(N) }, + { "VALUE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType OPTION_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(OPTION) }, + { 0, 0 }, +}; + +static const attr OVERLAY_attr_list[] = { + { "HEIGHT" T(N) }, + { "IMAGEMAP" T(N) }, + { "MD" T(N) }, + { "SRC" T(h) }, + { "UNITS" T(N) }, + { "WIDTH" T(N) }, + { "X" T(N) }, + { "Y" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType OVERLAY_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(OVERLAY) }, + { 0, 0 }, +}; + +static const attr P_attr_list[] = { + { "CLEAR" T(N) }, + { "NOWRAP" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType P_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(P) }, + { 0, 0 }, +}; + +static const attr PARAM_attr_list[] = { + { "ACCEPT" T(N) }, + { "ACCEPT-CHARSET" T(N) }, + { "ACCEPT-ENCODING" T(N) }, + { "CLEAR" T(N) }, + { "DATA" T(N) }, + { "NAME" T(N) }, + { "OBJECT" T(N) }, + { "REF" T(N) }, + { "TYPE" T(N) }, + { "VALUE" T(N) }, + { "VALUEREF" T(N) }, + { "VALUETYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType PARAM_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(PARAM) }, + { 0, 0 }, +}; + +static const attr Q_attr_list[] = { + { "CITE" T(h) }, + { "CLEAR" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType Q_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(Q) }, + { 0, 0 }, +}; + +static const attr SCRIPT_attr_list[] = { + { "CHARSET" T(N) }, + { "CLEAR" T(N) }, + { "DEFER" T(N) }, + { "EVENT" T(N) }, + { "FOR" T(N) }, + { "LANGUAGE" T(N) }, + { "NAME" T(N) }, + { "SCRIPTENGINE" T(N) }, + { "SRC" T(h) }, + { "TYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType SCRIPT_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(SCRIPT) }, + { 0, 0 }, +}; + +static const attr SELECT_attr_list[] = { + { "CLEAR" T(N) }, + { "DISABLED" T(N) }, + { "ERROR" T(N) }, + { "HEIGHT" T(N) }, + { "MD" T(N) }, + { "MULTIPLE" T(N) }, + { "NAME" T(N) }, + { "NOTAB" T(N) }, + { "ONBLUR" T(N) }, + { "ONCHANGE" T(N) }, + { "ONFOCUS" T(N) }, + { "SIZE" T(N) }, + { "TABINDEX" T(N) }, + { "UNITS" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType SELECT_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(SELECT) }, + { 0, 0 }, +}; + +static const attr STYLE_attr_list[] = { + { "MEDIA" T(N) }, + { "NOTATION" T(N) }, + { "TYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType STYLE_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(STYLE) }, + { 0, 0 }, +}; + +static const attr TAB_attr_list[] = { + { "CLEAR" T(N) }, + { "DP" T(N) }, + { "INDENT" T(N) }, + { "TO" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType TAB_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(TAB) }, + { 0, 0 }, +}; + +static const attr TABLE_attr_list[] = { + { "BACKGROUND" T(h) }, + { "BORDER" T(N) }, + { "CELLPADDING" T(N) }, + { "CELLSPACING" T(N) }, + { "CLEAR" T(N) }, + { "COLS" T(N) }, + { "COLSPEC" T(N) }, + { "DP" T(N) }, + { "FRAME" T(N) }, + { "NOFLOW" T(N) }, + { "NOWRAP" T(N) }, + { "RULES" T(N) }, + { "SUMMARY" T(N) }, + { "UNITS" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType TABLE_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(TABLE) }, + { 0, 0 }, +}; + +static const attr TD_attr_list[] = { + { "ABBR" T(N) }, + { "AXES" T(N) }, + { "AXIS" T(N) }, + { "BACKGROUND" T(h) }, + { "CLEAR" T(N) }, + { "COLSPAN" T(N) }, + { "DP" T(N) }, + { "HEADERS" T(N) }, + { "HEIGHT" T(N) }, + { "NOWRAP" T(N) }, + { "ROWSPAN" T(N) }, + { "SCOPE" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType TD_attr_type[] = { + { ATTR_TYPE(cellalign) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(TD) }, + { 0, 0 }, +}; + +static const attr TEXTAREA_attr_list[] = { + { "ACCEPT-CHARSET" T(N) }, + { "ACCESSKEY" T(N) }, + { "CLEAR" T(N) }, + { "COLS" T(N) }, + { "DISABLED" T(N) }, + { "ERROR" T(N) }, + { "NAME" T(N) }, + { "NOTAB" T(N) }, + { "ONBLUR" T(N) }, + { "ONCHANGE" T(N) }, + { "ONFOCUS" T(N) }, + { "ONSELECT" T(N) }, + { "READONLY" T(N) }, + { "ROWS" T(N) }, + { "TABINDEX" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType TEXTAREA_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(TEXTAREA) }, + { 0, 0 }, +}; + +static const attr TR_attr_list[] = { + { "CLEAR" T(N) }, + { "DP" T(N) }, + { "NOWRAP" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType TR_attr_type[] = { + { ATTR_TYPE(cellalign) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(TR) }, + { 0, 0 }, +}; + +static const attr UL_attr_list[] = { + { "CLEAR" T(N) }, + { "COMPACT" T(N) }, + { "DINGBAT" T(N) }, + { "MD" T(N) }, + { "PLAIN" T(N) }, + { "SRC" T(h) }, + { "TYPE" T(N) }, + { "WRAP" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType UL_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(UL) }, + { 0, 0 }, +}; + + +/* attribute lists for the runtime (generated by dtd_util) */ +static const attr A_attr[] = { /* A attributes */ + { "ACCESSKEY" T(N) }, + { "CHARSET" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "COORDS" T(N) }, + { "DIR" T(N) }, + { "HREF" T(h) }, + { "HREFLANG" T(N) }, + { "ID" T(i) }, + { "ISMAP" T(N) }, + { "LANG" T(N) }, + { "MD" T(N) }, + { "NAME" T(i) }, + { "NOTAB" T(N) }, + { "ONBLUR" T(N) }, + { "ONFOCUS" T(N) }, + { "REL" T(N) }, + { "REV" T(N) }, + { "SHAPE" T(N) }, + { "STYLE" T(N) }, + { "TABINDEX" T(N) }, + { "TARGET" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { "URN" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr ADDRESS_attr[] = { /* ADDRESS attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NOWRAP" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr APPLET_attr[] = { /* APPLET attributes */ + { "ALIGN" T(N) }, + { "ALT" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "CODE" T(N) }, + { "CODEBASE" T(h) }, + { "DIR" T(N) }, + { "DOWNLOAD" T(N) }, + { "HEIGHT" T(N) }, + { "HSPACE" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NAME" T(i) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "VSPACE" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr AREA_attr[] = { /* AREA attributes */ + { "ACCESSKEY" T(N) }, + { "ALT" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "COORDS" T(N) }, + { "DIR" T(N) }, + { "HREF" T(h) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NOHREF" T(N) }, + { "NOTAB" T(N) }, + { "ONBLUR" T(N) }, + { "ONFOCUS" T(N) }, + { "SHAPE" T(N) }, + { "STYLE" T(N) }, + { "TABINDEX" T(N) }, + { "TARGET" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr BASE_attr[] = { /* BASE attributes */ + { "CLASS" T(c) }, + { "HREF" T(h) }, + { "ID" T(i) }, + { "STYLE" T(N) }, + { "TARGET" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr BGSOUND_attr[] = { /* BGSOUND attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "LOOP" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr BODY_attr[] = { /* BODY attributes */ + { "ALINK" T(N) }, + { "BACKGROUND" T(h) }, + { "BGCOLOR" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "LINK" T(N) }, + { "ONLOAD" T(N) }, + { "ONUNLOAD" T(N) }, + { "STYLE" T(N) }, + { "TEXT" T(N) }, + { "TITLE" T(N) }, + { "VLINK" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr BODYTEXT_attr[] = { /* BODYTEXT attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DATA" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NAME" T(N) }, + { "OBJECT" T(N) }, + { "REF" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { "VALUE" T(N) }, + { "VALUETYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr BQ_attr[] = { /* BLOCKQUOTE attributes */ + { "CITE" T(h) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NOWRAP" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr BUTTON_attr[] = { /* BUTTON attributes */ + { "ACCESSKEY" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "DISABLED" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NAME" T(N) }, + { "ONBLUR" T(N) }, + { "ONFOCUS" T(N) }, + { "READONLY" T(N) }, + { "STYLE" T(N) }, + { "TABINDEX" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { "VALUE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr CAPTION_attr[] = { /* CAPTION attributes */ + { "ACCESSKEY" T(N) }, + { "ALIGN" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr COL_attr[] = { /* COL attributes */ + { "ALIGN" T(N) }, + { "CHAR" T(N) }, + { "CHAROFF" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "SPAN" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "VALIGN" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr DEL_attr[] = { /* DEL attributes */ + { "CITE" T(N) }, + { "CLASS" T(c) }, + { "DATETIME" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr DIV_attr[] = { /* CENTER attributes */ + { "ALIGN" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr DL_attr[] = { /* DL attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "COMPACT" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr EMBED_attr[] = { /* EMBED attributes */ + { "ALIGN" T(N) }, + { "ALT" T(N) }, + { "BORDER" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "HEIGHT" T(N) }, + { "ID" T(i) }, + { "IMAGEMAP" T(N) }, + { "ISMAP" T(N) }, + { "LANG" T(N) }, + { "MD" T(N) }, + { "NAME" T(i) }, + { "NOFLOW" T(N) }, + { "PARAMS" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "UNITS" T(N) }, + { "USEMAP" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr FIG_attr[] = { /* FIG attributes */ + { "ALIGN" T(N) }, + { "BORDER" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "HEIGHT" T(N) }, + { "ID" T(i) }, + { "IMAGEMAP" T(N) }, + { "ISOBJECT" T(N) }, + { "LANG" T(N) }, + { "MD" T(N) }, + { "NOFLOW" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "UNITS" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr FONT_attr[] = { /* BASEFONT attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "COLOR" T(N) }, + { "DIR" T(N) }, + { "END" T(N) }, + { "FACE" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "SIZE" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr FORM_attr[] = { /* FORM attributes */ + { "ACCEPT" T(N) }, + { "ACCEPT-CHARSET" T(N) }, + { "ACTION" T(h) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ENCTYPE" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "METHOD" T(N) }, + { "ONRESET" T(N) }, + { "ONSUBMIT" T(N) }, + { "SCRIPT" T(N) }, + { "STYLE" T(N) }, + { "SUBJECT" T(N) }, + { "TARGET" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr FRAME_attr[] = { /* FRAME attributes */ + { "CLASS" T(c) }, + { "FRAMEBORDER" T(N) }, + { "ID" T(i) }, + { "LONGDESC" T(h) }, + { "MARGINHEIGHT" T(N) }, + { "MARGINWIDTH" T(N) }, + { "NAME" T(N) }, + { "NORESIZE" T(N) }, + { "SCROLLING" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr FRAMESET_attr[] = { /* FRAMESET attributes */ + { "COLS" T(N) }, + { "ONLOAD" T(N) }, + { "ONUNLOAD" T(N) }, + { "ROWS" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr GEN_attr[] = { /* ABBR attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr H_attr[] = { /* H1 attributes */ + { "ALIGN" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DINGBAT" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "MD" T(N) }, + { "NOWRAP" T(N) }, + { "SEQNUM" T(N) }, + { "SKIP" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr HR_attr[] = { /* HR attributes */ + { "ALIGN" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "MD" T(N) }, + { "NOSHADE" T(N) }, + { "SIZE" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr IFRAME_attr[] = { /* IFRAME attributes */ + { "ALIGN" T(N) }, + { "CLASS" T(c) }, + { "FRAMEBORDER" T(N) }, + { "HEIGHT" T(N) }, + { "ID" T(i) }, + { "LONGDESC" T(h) }, + { "MARGINHEIGHT" T(N) }, + { "MARGINWIDTH" T(N) }, + { "NAME" T(N) }, + { "SCROLLING" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr IMG_attr[] = { /* IMG attributes */ + { "ALIGN" T(N) }, + { "ALT" T(N) }, + { "BORDER" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "HEIGHT" T(N) }, + { "HSPACE" T(N) }, + { "ID" T(i) }, + { "ISMAP" T(N) }, + { "ISOBJECT" T(N) }, + { "LANG" T(N) }, + { "LONGDESC" T(h) }, + { "MD" T(N) }, + { "NAME" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "UNITS" T(N) }, + { "USEMAP" T(h) }, + { "VSPACE" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr INPUT_attr[] = { /* INPUT attributes */ + { "ACCEPT" T(N) }, + { "ACCEPT-CHARSET" T(N) }, + { "ACCESSKEY" T(N) }, + { "ALIGN" T(N) }, + { "ALT" T(N) }, + { "CHECKED" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "DISABLED" T(N) }, + { "ERROR" T(N) }, + { "HEIGHT" T(N) }, + { "ID" T(i) }, + { "ISMAP" T(N) }, + { "LANG" T(N) }, + { "MAX" T(N) }, + { "MAXLENGTH" T(N) }, + { "MD" T(N) }, + { "MIN" T(N) }, + { "NAME" T(N) }, + { "NOTAB" T(N) }, + { "ONBLUR" T(N) }, + { "ONCHANGE" T(N) }, + { "ONFOCUS" T(N) }, + { "ONSELECT" T(N) }, + { "READONLY" T(N) }, + { "SIZE" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TABINDEX" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { "USEMAP" T(N) }, + { "VALUE" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr ISINDEX_attr[] = { /* ISINDEX attributes */ + { "ACTION" T(h) }, + { "CLASS" T(c) }, + { "DIR" T(N) }, + { "HREF" T(h) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "PROMPT" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr KEYGEN_attr[] = { /* KEYGEN attributes */ + { "CHALLENGE" T(N) }, + { "CLASS" T(c) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NAME" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr LABEL_attr[] = { /* LABEL attributes */ + { "ACCESSKEY" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "FOR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "ONBLUR" T(N) }, + { "ONFOCUS" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr LI_attr[] = { /* LI attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DINGBAT" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "MD" T(N) }, + { "SKIP" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { "VALUE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr LINK_attr[] = { /* LINK attributes */ + { "CHARSET" T(N) }, + { "CLASS" T(c) }, + { "DIR" T(N) }, + { "HREF" T(h) }, + { "HREFLANG" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "MEDIA" T(N) }, + { "REL" T(N) }, + { "REV" T(N) }, + { "STYLE" T(N) }, + { "TARGET" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr MAP_attr[] = { /* MAP attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NAME" T(i) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr MATH_attr[] = { /* MATH attributes */ + { "BOX" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr META_attr[] = { /* META attributes */ + { "CHARSET" T(N) }, + { "CONTENT" T(N) }, + { "HTTP-EQUIV" T(N) }, + { "NAME" T(N) }, + { "SCHEME" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr NEXTID_attr[] = { /* NEXTID attributes */ + { "N" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr NOTE_attr[] = { /* NOTE attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "MD" T(N) }, + { "ROLE" T(x) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr OBJECT_attr[] = { /* OBJECT attributes */ + { "ALIGN" T(N) }, + { "ARCHIVE" T(N) }, + { "BORDER" T(N) }, + { "CLASS" T(c) }, + { "CLASSID" T(h) }, + { "CODEBASE" T(h) }, + { "CODETYPE" T(N) }, + { "DATA" T(h) }, + { "DECLARE" T(N) }, + { "DIR" T(N) }, + { "HEIGHT" T(N) }, + { "HSPACE" T(N) }, + { "ID" T(i) }, + { "ISMAP" T(N) }, + { "LANG" T(N) }, + { "NAME" T(N) }, + { "NOTAB" T(N) }, + { "SHAPES" T(N) }, + { "STANDBY" T(N) }, + { "STYLE" T(N) }, + { "TABINDEX" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { "USEMAP" T(h) }, + { "VSPACE" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr OL_attr[] = { /* OL attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "COMPACT" T(N) }, + { "CONTINUE" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "SEQNUM" T(N) }, + { "START" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr OPTION_attr[] = { /* OPTION attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "DISABLED" T(N) }, + { "ERROR" T(N) }, + { "ID" T(i) }, + { "LABEL" T(N) }, + { "LANG" T(N) }, + { "SELECTED" T(N) }, + { "SHAPE" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "VALUE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr OVERLAY_attr[] = { /* OVERLAY attributes */ + { "CLASS" T(c) }, + { "HEIGHT" T(N) }, + { "ID" T(i) }, + { "IMAGEMAP" T(N) }, + { "MD" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "UNITS" T(N) }, + { "WIDTH" T(N) }, + { "X" T(N) }, + { "Y" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr P_attr[] = { /* P attributes */ + { "ALIGN" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NOWRAP" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr PARAM_attr[] = { /* PARAM attributes */ + { "ACCEPT" T(N) }, + { "ACCEPT-CHARSET" T(N) }, + { "ACCEPT-ENCODING" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DATA" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NAME" T(N) }, + { "OBJECT" T(N) }, + { "REF" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { "VALUE" T(N) }, + { "VALUEREF" T(N) }, + { "VALUETYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr Q_attr[] = { /* Q attributes */ + { "CITE" T(h) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr SCRIPT_attr[] = { /* SCRIPT attributes */ + { "CHARSET" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DEFER" T(N) }, + { "DIR" T(N) }, + { "EVENT" T(N) }, + { "FOR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "LANGUAGE" T(N) }, + { "NAME" T(N) }, + { "SCRIPTENGINE" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr SELECT_attr[] = { /* SELECT attributes */ + { "ALIGN" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "DISABLED" T(N) }, + { "ERROR" T(N) }, + { "HEIGHT" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "MD" T(N) }, + { "MULTIPLE" T(N) }, + { "NAME" T(N) }, + { "NOTAB" T(N) }, + { "ONBLUR" T(N) }, + { "ONCHANGE" T(N) }, + { "ONFOCUS" T(N) }, + { "SIZE" T(N) }, + { "STYLE" T(N) }, + { "TABINDEX" T(N) }, + { "TITLE" T(N) }, + { "UNITS" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr STYLE_attr[] = { /* STYLE attributes */ + { "CLASS" T(c) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "MEDIA" T(N) }, + { "NOTATION" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr TAB_attr[] = { /* TAB attributes */ + { "ALIGN" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "DP" T(N) }, + { "ID" T(i) }, + { "INDENT" T(N) }, + { "LANG" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "TO" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr TABLE_attr[] = { /* TABLE attributes */ + { "ALIGN" T(N) }, + { "BACKGROUND" T(h) }, + { "BORDER" T(N) }, + { "CELLPADDING" T(N) }, + { "CELLSPACING" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "COLS" T(N) }, + { "COLSPEC" T(N) }, + { "DIR" T(N) }, + { "DP" T(N) }, + { "FRAME" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NOFLOW" T(N) }, + { "NOWRAP" T(N) }, + { "RULES" T(N) }, + { "STYLE" T(N) }, + { "SUMMARY" T(N) }, + { "TITLE" T(N) }, + { "UNITS" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr TD_attr[] = { /* TD attributes */ + { "ABBR" T(N) }, + { "ALIGN" T(N) }, + { "AXES" T(N) }, + { "AXIS" T(N) }, + { "BACKGROUND" T(h) }, + { "CHAR" T(N) }, + { "CHAROFF" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "COLSPAN" T(N) }, + { "DIR" T(N) }, + { "DP" T(N) }, + { "HEADERS" T(N) }, + { "HEIGHT" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NOWRAP" T(N) }, + { "ROWSPAN" T(N) }, + { "SCOPE" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "VALIGN" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr TEXTAREA_attr[] = { /* TEXTAREA attributes */ + { "ACCEPT-CHARSET" T(N) }, + { "ACCESSKEY" T(N) }, + { "ALIGN" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "COLS" T(N) }, + { "DIR" T(N) }, + { "DISABLED" T(N) }, + { "ERROR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NAME" T(N) }, + { "NOTAB" T(N) }, + { "ONBLUR" T(N) }, + { "ONCHANGE" T(N) }, + { "ONFOCUS" T(N) }, + { "ONSELECT" T(N) }, + { "READONLY" T(N) }, + { "ROWS" T(N) }, + { "STYLE" T(N) }, + { "TABINDEX" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr TR_attr[] = { /* TBODY attributes */ + { "ALIGN" T(N) }, + { "CHAR" T(N) }, + { "CHAROFF" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "DP" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NOWRAP" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "VALIGN" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr UL_attr[] = { /* DIR attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "COMPACT" T(N) }, + { "DINGBAT" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "MD" T(N) }, + { "PLAIN" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { "WRAP" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +/* *INDENT-ON* */ + +/* justification-flags */ +#undef N +#undef i +#undef h +#undef c +#undef x + +#undef T + +/* tag-names */ +#undef A +#undef ABBR +#undef ACRONYM +#undef ADDRESS +#undef APPLET +#undef AREA +#undef AU +#undef AUTHOR +#undef B +#undef BANNER +#undef BASE +#undef BASEFONT +#undef BDO +#undef BGSOUND +#undef BIG +#undef BLINK +#undef BLOCKQUOTE +#undef BODY +#undef BODYTEXT +#undef BQ +#undef BR +#undef BUTTON +#undef CAPTION +#undef CENTER +#undef CITE +#undef CODE +#undef COL +#undef COLGROUP +#undef COMMENT +#undef CREDIT +#undef DD +#undef DEL +#undef DFN +#undef DIR +#undef DIV +#undef DL +#undef DLC +#undef DT +#undef EM +#undef EMBED +#undef FIELDSET +#undef FIG +#undef FN +#undef FONT +#undef FORM +#undef FRAME +#undef FRAMESET +#undef H1 +#undef H2 +#undef H3 +#undef H4 +#undef H5 +#undef H6 +#undef HEAD +#undef HR +#undef HTML +#undef HY +#undef I +#undef IFRAME +#undef IMG +#undef INPUT +#undef INS +#undef ISINDEX +#undef KBD +#undef KEYGEN +#undef LABEL +#undef LEGEND +#undef LH +#undef LI +#undef LINK +#undef LISTING +#undef MAP +#undef MARQUEE +#undef MATH +#undef MENU +#undef META +#undef NEXTID +#undef NOFRAMES +#undef NOTE +#undef OBJECT +#undef OL +#undef OPTION +#undef OVERLAY +#undef P +#undef PARAM +#undef PLAINTEXT +#undef PRE +#undef Q +#undef S +#undef SAMP +#undef SCRIPT +#undef SELECT +#undef SHY +#undef SMALL +#undef SPAN +#undef SPOT +#undef STRIKE +#undef STRONG +#undef STYLE +#undef SUB +#undef SUP +#undef TAB +#undef TABLE +#undef TBODY +#undef TD +#undef TEXTAREA +#undef TEXTFLOW +#undef TFOOT +#undef TH +#undef THEAD +#undef TITLE +#undef TR +#undef TT +#undef U +#undef UL +#undef VAR +#undef WBR +#undef XMP +#undef OBJECT_PCDATA + +/* these definitions are used in the tags-tables */ +#undef P +#undef P_ +#ifdef USE_COLOR_STYLE +#define P_(x) #x, (sizeof #x) -1 +#define NULL_HTTag_ NULL, 0 +#else +#define P_(x) #x +#define NULL_HTTag_ NULL +#endif + +#ifdef USE_JUSTIFY_ELTS +#define P(x) P_(x), 1 +#define P0(x) P_(x), 0 +#define NULL_HTTag NULL_HTTag_,0 +#else +#define P(x) P_(x) +#define P0(x) P_(x) +#define NULL_HTTag NULL_HTTag_ +#endif + +#define ATTR_DATA(name) name##_attr, HTML_##name##_ATTRIBUTES, name##_attr_type + +#endif /* once_HTMLDTD */ +/* *INDENT-OFF* */ +static const HTTag tags_table0[HTML_ALL_ELEMENTS] = { + { P(A), ATTR_DATA(A), SGML_EMPTY, T_A}, + { P(ABBR), ATTR_DATA(GEN), SGML_MIXED, T_ABBR}, + { P(ACRONYM), ATTR_DATA(GEN), SGML_MIXED, T_ACRONYM}, + { P(ADDRESS), ATTR_DATA(ADDRESS), SGML_MIXED, T_ADDRESS}, + { P(APPLET), ATTR_DATA(APPLET), SGML_MIXED, T_APPLET}, + { P(AREA), ATTR_DATA(AREA), SGML_EMPTY, T_AREA}, + { P(AU), ATTR_DATA(GEN), SGML_MIXED, T_AU}, + { P(AUTHOR), ATTR_DATA(GEN), SGML_MIXED, T_AUTHOR}, + { P(B), ATTR_DATA(GEN), SGML_EMPTY, T_B}, + { P0(BANNER), ATTR_DATA(GEN), SGML_MIXED, T_BANNER}, + { P(BASE), ATTR_DATA(BASE), SGML_EMPTY, T_BASE}, + { P(BASEFONT), ATTR_DATA(FONT), SGML_EMPTY, T_BASEFONT}, + { P(BDO), ATTR_DATA(GEN), SGML_MIXED, T_BDO}, + { P(BGSOUND), ATTR_DATA(BGSOUND), SGML_EMPTY, T_BGSOUND}, + { P(BIG), ATTR_DATA(GEN), SGML_MIXED, T_BIG}, + { P(BLINK), ATTR_DATA(GEN), SGML_EMPTY, T_BLINK}, + { P(BLOCKQUOTE), ATTR_DATA(BQ), SGML_MIXED, T_BLOCKQUOTE}, + { P(BODY), ATTR_DATA(BODY), SGML_MIXED, T_BODY}, + { P(BODYTEXT), ATTR_DATA(BODYTEXT), SGML_MIXED, T_BODYTEXT}, + { P(BQ), ATTR_DATA(BQ), SGML_MIXED, T_BQ}, + { P(BR), ATTR_DATA(GEN), SGML_EMPTY, T_BR}, + { P(BUTTON), ATTR_DATA(BUTTON), SGML_MIXED, T_BUTTON}, + { P(CAPTION), ATTR_DATA(CAPTION), SGML_MIXED, T_CAPTION}, + { P(CENTER), ATTR_DATA(DIV), SGML_MIXED, T_CENTER}, + { P(CITE), ATTR_DATA(GEN), SGML_EMPTY, T_CITE}, + { P(CODE), ATTR_DATA(GEN), SGML_MIXED, T_CODE}, + { P(COL), ATTR_DATA(COL), SGML_EMPTY, T_COL}, + { P(COLGROUP), ATTR_DATA(COL), SGML_EMPTY, T_COLGROUP}, + { P(COMMENT), ATTR_DATA(GEN), SGML_MIXED, T_COMMENT}, + { P(CREDIT), ATTR_DATA(GEN), SGML_MIXED, T_CREDIT}, + { P(DD), ATTR_DATA(GEN), SGML_EMPTY, T_DD}, + { P(DEL), ATTR_DATA(DEL), SGML_MIXED, T_DEL}, + { P(DFN), ATTR_DATA(GEN), SGML_MIXED, T_DFN}, + { P(DIR), ATTR_DATA(UL), SGML_MIXED, T_DIR}, + { P(DIV), ATTR_DATA(DIV), SGML_MIXED, T_DIV}, + { P(DL), ATTR_DATA(DL), SGML_MIXED, T_DL}, + { P(DLC), ATTR_DATA(DL), SGML_MIXED, T_DLC}, + { P(DT), ATTR_DATA(GEN), SGML_EMPTY, T_DT}, + { P(EM), ATTR_DATA(GEN), SGML_EMPTY, T_EM}, + { P(EMBED), ATTR_DATA(EMBED), SGML_EMPTY, T_EMBED}, + { P(FIELDSET), ATTR_DATA(GEN), SGML_MIXED, T_FIELDSET}, + { P(FIG), ATTR_DATA(FIG), SGML_MIXED, T_FIG}, + { P(FN), ATTR_DATA(GEN), SGML_MIXED, T_FN}, + { P(FONT), ATTR_DATA(FONT), SGML_EMPTY, T_FONT}, + { P(FORM), ATTR_DATA(FORM), SGML_EMPTY, T_FORM}, + { P(FRAME), ATTR_DATA(FRAME), SGML_EMPTY, T_FRAME}, + { P(FRAMESET), ATTR_DATA(FRAMESET), SGML_MIXED, T_FRAMESET}, + { P0(H1), ATTR_DATA(H), SGML_MIXED, T_H1}, + { P0(H2), ATTR_DATA(H), SGML_MIXED, T_H2}, + { P0(H3), ATTR_DATA(H), SGML_MIXED, T_H3}, + { P0(H4), ATTR_DATA(H), SGML_MIXED, T_H4}, + { P0(H5), ATTR_DATA(H), SGML_MIXED, T_H5}, + { P0(H6), ATTR_DATA(H), SGML_MIXED, T_H6}, + { P(HEAD), ATTR_DATA(GEN), SGML_MIXED, T_HEAD}, + { P(HR), ATTR_DATA(HR), SGML_EMPTY, T_HR}, + { P(HTML), ATTR_DATA(GEN), SGML_MIXED, T_HTML}, + { P(HY), ATTR_DATA(GEN), SGML_EMPTY, T_HY}, + { P(I), ATTR_DATA(GEN), SGML_EMPTY, T_I}, + { P(IFRAME), ATTR_DATA(IFRAME), SGML_MIXED, T_IFRAME}, + { P(IMG), ATTR_DATA(IMG), SGML_EMPTY, T_IMG}, + { P(INPUT), ATTR_DATA(INPUT), SGML_EMPTY, T_INPUT}, + { P(INS), ATTR_DATA(DEL), SGML_MIXED, T_INS}, + { P(ISINDEX), ATTR_DATA(ISINDEX), SGML_EMPTY, T_ISINDEX}, + { P(KBD), ATTR_DATA(GEN), SGML_MIXED, T_KBD}, + { P(KEYGEN), ATTR_DATA(KEYGEN), SGML_EMPTY, T_KEYGEN}, + { P(LABEL), ATTR_DATA(LABEL), SGML_MIXED, T_LABEL}, + { P(LEGEND), ATTR_DATA(CAPTION), SGML_MIXED, T_LEGEND}, + { P(LH), ATTR_DATA(GEN), SGML_EMPTY, T_LH}, + { P(LI), ATTR_DATA(LI), SGML_EMPTY, T_LI}, + { P(LINK), ATTR_DATA(LINK), SGML_EMPTY, T_LINK}, + { P(LISTING), ATTR_DATA(GEN), SGML_LITTERAL,T_LISTING}, + { P(MAP), ATTR_DATA(MAP), SGML_MIXED, T_MAP}, + { P(MARQUEE), ATTR_DATA(GEN), SGML_MIXED, T_MARQUEE}, + { P(MATH), ATTR_DATA(MATH), SGML_LITTERAL,T_MATH}, + { P(MENU), ATTR_DATA(UL), SGML_MIXED, T_MENU}, + { P(META), ATTR_DATA(META), SGML_EMPTY, T_META}, + { P(NEXTID), ATTR_DATA(NEXTID), SGML_EMPTY, T_NEXTID}, + { P(NOFRAMES), ATTR_DATA(GEN), SGML_MIXED, T_NOFRAMES}, + { P(NOTE), ATTR_DATA(NOTE), SGML_MIXED, T_NOTE}, + { P(OBJECT), ATTR_DATA(OBJECT), SGML_LITTERAL,T_OBJECT}, + { P(OL), ATTR_DATA(OL), SGML_MIXED, T_OL}, + { P(OPTION), ATTR_DATA(OPTION), SGML_EMPTY, T_OPTION}, + { P(OVERLAY), ATTR_DATA(OVERLAY), SGML_EMPTY, T_OVERLAY}, + { P(P), ATTR_DATA(P), SGML_EMPTY, T_P}, + { P(PARAM), ATTR_DATA(PARAM), SGML_EMPTY, T_PARAM}, + { P(PLAINTEXT), ATTR_DATA(GEN), SGML_LITTERAL,T_PLAINTEXT}, + { P0(PRE), ATTR_DATA(GEN), SGML_MIXED, T_PRE}, + { P(Q), ATTR_DATA(Q), SGML_MIXED, T_Q}, + { P(S), ATTR_DATA(GEN), SGML_MIXED, T_S}, + { P(SAMP), ATTR_DATA(GEN), SGML_MIXED, T_SAMP}, + { P(SCRIPT), ATTR_DATA(SCRIPT), SGML_LITTERAL,T_SCRIPT}, + { P(SELECT), ATTR_DATA(SELECT), SGML_MIXED, T_SELECT}, + { P(SHY), ATTR_DATA(GEN), SGML_EMPTY, T_SHY}, + { P(SMALL), ATTR_DATA(GEN), SGML_MIXED, T_SMALL}, + { P(SPAN), ATTR_DATA(GEN), SGML_MIXED, T_SPAN}, + { P(SPOT), ATTR_DATA(GEN), SGML_EMPTY, T_SPOT}, + { P(STRIKE), ATTR_DATA(GEN), SGML_MIXED, T_STRIKE}, + { P(STRONG), ATTR_DATA(GEN), SGML_EMPTY, T_STRONG}, + { P(STYLE), ATTR_DATA(STYLE), SGML_LITTERAL,T_STYLE}, + { P(SUB), ATTR_DATA(GEN), SGML_MIXED, T_SUB}, + { P(SUP), ATTR_DATA(GEN), SGML_MIXED, T_SUP}, + { P(TAB), ATTR_DATA(TAB), SGML_EMPTY, T_TAB}, + { P(TABLE), ATTR_DATA(TABLE), SGML_MIXED, T_TABLE}, + { P(TBODY), ATTR_DATA(TR), SGML_EMPTY, T_TBODY}, + { P(TD), ATTR_DATA(TD), SGML_EMPTY, T_TD}, + { P(TEXTAREA), ATTR_DATA(TEXTAREA), SGML_LITTERAL,T_TEXTAREA}, + { P(TEXTFLOW), ATTR_DATA(BODYTEXT), SGML_MIXED, T_TEXTFLOW}, + { P(TFOOT), ATTR_DATA(TR), SGML_EMPTY, T_TFOOT}, + { P(TH), ATTR_DATA(TD), SGML_EMPTY, T_TH}, + { P(THEAD), ATTR_DATA(TR), SGML_EMPTY, T_THEAD}, + { P(TITLE), ATTR_DATA(GEN), SGML_RCDATA, T_TITLE}, + { P(TR), ATTR_DATA(TR), SGML_EMPTY, T_TR}, + { P(TT), ATTR_DATA(GEN), SGML_EMPTY, T_TT}, + { P(U), ATTR_DATA(GEN), SGML_EMPTY, T_U}, + { P(UL), ATTR_DATA(UL), SGML_MIXED, T_UL}, + { P(VAR), ATTR_DATA(GEN), SGML_MIXED, T_VAR}, + { P(WBR), ATTR_DATA(GEN), SGML_EMPTY, T_WBR}, + { P0(XMP), ATTR_DATA(GEN), SGML_LITTERAL,T_XMP}, +/* additional (alternative variants), not counted in HTML_ELEMENTS: */ +/* This one will be used as a temporary substitute within the parser when + it has been signalled to parse OBJECT content as MIXED. - kw */ + { P(OBJECT), ATTR_DATA(OBJECT), SGML_MIXED, T_OBJECT_PCDATA}, +}; +/* *INDENT-ON* */ + +#endif /* src_HTMLDTD_H0 */ diff --git a/WWW/Library/Implementation/src0_HTMLDTD.txt b/WWW/Library/Implementation/src0_HTMLDTD.txt new file mode 100644 index 00000000..87bebeea --- /dev/null +++ b/WWW/Library/Implementation/src0_HTMLDTD.txt @@ -0,0 +1,3664 @@ +59 attr_types + 0:align + 1 attributes: + 0:0:ALIGN + 1:bgcolor + 1 attributes: + 0:0:BGCOLOR + 2:cellalign + 4 attributes: + 0:0:ALIGN + 1:0:CHAR + 2:0:CHAROFF + 3:0:VALIGN + 3:core + 4 attributes: + 0:4:CLASS + 1:1:ID + 2:0:STYLE + 3:0:TITLE + 4:events + 10 attributes: + 0:0:ONCLICK + 1:0:ONDBLCLICK + 2:0:ONKEYDOWN + 3:0:ONKEYPRESS + 4:0:ONKEYUP + 5:0:ONMOUSEDOWN + 6:0:ONMOUSEMOVE + 7:0:ONMOUSEOUT + 8:0:ONMOUSEOVER + 9:0:ONMOUSEUP + 5:i18n + 2 attributes: + 0:0:DIR + 1:0:LANG + 6:A + 19 attributes: + 0:0:ACCESSKEY + 1:0:CHARSET + 2:0:CLEAR + 3:0:COORDS + 4:2:HREF + 5:0:HREFLANG + 6:0:ISMAP + 7:0:MD + 8:1:NAME + 9:0:NOTAB + 10:0:ONBLUR + 11:0:ONFOCUS + 12:0:REL + 13:0:REV + 14:0:SHAPE + 15:0:TABINDEX + 16:0:TARGET + 17:0:TYPE + 18:0:URN + 7:ADDRESS + 2 attributes: + 0:0:CLEAR + 1:0:NOWRAP + 8:APPLET + 10 attributes: + 0:0:ALT + 1:0:CLEAR + 2:0:CODE + 3:2:CODEBASE + 4:0:DOWNLOAD + 5:0:HEIGHT + 6:0:HSPACE + 7:1:NAME + 8:0:VSPACE + 9:0:WIDTH + 9:AREA + 12 attributes: + 0:0:ACCESSKEY + 1:0:ALT + 2:0:CLEAR + 3:0:COORDS + 4:2:HREF + 5:0:NOHREF + 6:0:NOTAB + 7:0:ONBLUR + 8:0:ONFOCUS + 9:0:SHAPE + 10:0:TABINDEX + 11:0:TARGET + 10:BASE + 2 attributes: + 0:2:HREF + 1:0:TARGET + 11:BGSOUND + 3 attributes: + 0:0:CLEAR + 1:0:LOOP + 2:2:SRC + 12:BODY + 8 attributes: + 0:0:ALINK + 1:2:BACKGROUND + 2:0:CLEAR + 3:0:LINK + 4:0:ONLOAD + 5:0:ONUNLOAD + 6:0:TEXT + 7:0:VLINK + 13:BODYTEXT + 8 attributes: + 0:0:CLEAR + 1:0:DATA + 2:0:NAME + 3:0:OBJECT + 4:0:REF + 5:0:TYPE + 6:0:VALUE + 7:0:VALUETYPE + 14:BQ + 3 attributes: + 0:2:CITE + 1:0:CLEAR + 2:0:NOWRAP + 15:BUTTON + 10 attributes: + 0:0:ACCESSKEY + 1:0:CLEAR + 2:0:DISABLED + 3:0:NAME + 4:0:ONBLUR + 5:0:ONFOCUS + 6:0:READONLY + 7:0:TABINDEX + 8:0:TYPE + 9:0:VALUE + 16:CAPTION + 2 attributes: + 0:0:ACCESSKEY + 1:0:CLEAR + 17:COL + 3 attributes: + 0:0:CLEAR + 1:0:SPAN + 2:0:WIDTH + 18:DEL + 2 attributes: + 0:0:CITE + 1:0:DATETIME + 19:DIV + 1 attributes: + 0:0:CLEAR + 20:DL + 2 attributes: + 0:0:CLEAR + 1:0:COMPACT + 21:EMBED + 14 attributes: + 0:0:ALT + 1:0:BORDER + 2:0:CLEAR + 3:0:HEIGHT + 4:0:IMAGEMAP + 5:0:ISMAP + 6:0:MD + 7:1:NAME + 8:0:NOFLOW + 9:0:PARAMS + 10:2:SRC + 11:0:UNITS + 12:0:USEMAP + 13:0:WIDTH + 22:FIG + 10 attributes: + 0:0:BORDER + 1:0:CLEAR + 2:0:HEIGHT + 3:0:IMAGEMAP + 4:0:ISOBJECT + 5:0:MD + 6:0:NOFLOW + 7:2:SRC + 8:0:UNITS + 9:0:WIDTH + 23:FONT + 5 attributes: + 0:0:CLEAR + 1:0:COLOR + 2:0:END + 3:0:FACE + 4:0:SIZE + 24:FORM + 11 attributes: + 0:0:ACCEPT + 1:0:ACCEPT-CHARSET + 2:2:ACTION + 3:0:CLEAR + 4:0:ENCTYPE + 5:0:METHOD + 6:0:ONRESET + 7:0:ONSUBMIT + 8:0:SCRIPT + 9:0:SUBJECT + 10:0:TARGET + 25:FRAME + 8 attributes: + 0:0:FRAMEBORDER + 1:2:LONGDESC + 2:0:MARGINHEIGHT + 3:0:MARGINWIDTH + 4:0:NAME + 5:0:NORESIZE + 6:0:SCROLLING + 7:2:SRC + 26:FRAMESET + 4 attributes: + 0:0:COLS + 1:0:ONLOAD + 2:0:ONUNLOAD + 3:0:ROWS + 27:GEN + 1 attributes: + 0:0:CLEAR + 28:H + 7 attributes: + 0:0:CLEAR + 1:0:DINGBAT + 2:0:MD + 3:0:NOWRAP + 4:0:SEQNUM + 5:0:SKIP + 6:2:SRC + 29:HR + 6 attributes: + 0:0:CLEAR + 1:0:MD + 2:0:NOSHADE + 3:0:SIZE + 4:2:SRC + 5:0:WIDTH + 30:IFRAME + 9 attributes: + 0:0:FRAMEBORDER + 1:0:HEIGHT + 2:2:LONGDESC + 3:0:MARGINHEIGHT + 4:0:MARGINWIDTH + 5:0:NAME + 6:0:SCROLLING + 7:2:SRC + 8:0:WIDTH + 31:IMG + 15 attributes: + 0:0:ALT + 1:0:BORDER + 2:0:CLEAR + 3:0:HEIGHT + 4:0:HSPACE + 5:0:ISMAP + 6:0:ISOBJECT + 7:2:LONGDESC + 8:0:MD + 9:0:NAME + 10:2:SRC + 11:0:UNITS + 12:2:USEMAP + 13:0:VSPACE + 14:0:WIDTH + 32:INPUT + 28 attributes: + 0:0:ACCEPT + 1:0:ACCEPT-CHARSET + 2:0:ACCESSKEY + 3:0:ALT + 4:0:CHECKED + 5:0:CLEAR + 6:0:DISABLED + 7:0:ERROR + 8:0:HEIGHT + 9:0:ISMAP + 10:0:MAX + 11:0:MAXLENGTH + 12:0:MD + 13:0:MIN + 14:0:NAME + 15:0:NOTAB + 16:0:ONBLUR + 17:0:ONCHANGE + 18:0:ONFOCUS + 19:0:ONSELECT + 20:0:READONLY + 21:0:SIZE + 22:2:SRC + 23:0:TABINDEX + 24:0:TYPE + 25:0:USEMAP + 26:0:VALUE + 27:0:WIDTH + 33:ISINDEX + 3 attributes: + 0:2:ACTION + 1:2:HREF + 2:0:PROMPT + 34:KEYGEN + 2 attributes: + 0:0:CHALLENGE + 1:0:NAME + 35:LABEL + 5 attributes: + 0:0:ACCESSKEY + 1:0:CLEAR + 2:0:FOR + 3:0:ONBLUR + 4:0:ONFOCUS + 36:LI + 7 attributes: + 0:0:CLEAR + 1:0:DINGBAT + 2:0:MD + 3:0:SKIP + 4:2:SRC + 5:0:TYPE + 6:0:VALUE + 37:LINK + 8 attributes: + 0:0:CHARSET + 1:2:HREF + 2:0:HREFLANG + 3:0:MEDIA + 4:0:REL + 5:0:REV + 6:0:TARGET + 7:0:TYPE + 38:MAP + 2 attributes: + 0:0:CLEAR + 1:1:NAME + 39:MATH + 2 attributes: + 0:0:BOX + 1:0:CLEAR + 40:META + 5 attributes: + 0:0:CHARSET + 1:0:CONTENT + 2:0:HTTP-EQUIV + 3:0:NAME + 4:0:SCHEME + 41:NEXTID + 1 attributes: + 0:0:N + 42:NOTE + 4 attributes: + 0:0:CLEAR + 1:0:MD + 2:8:ROLE + 3:2:SRC + 43:OBJECT + 19 attributes: + 0:0:ARCHIVE + 1:0:BORDER + 2:2:CLASSID + 3:2:CODEBASE + 4:0:CODETYPE + 5:2:DATA + 6:0:DECLARE + 7:0:HEIGHT + 8:0:HSPACE + 9:0:ISMAP + 10:0:NAME + 11:0:NOTAB + 12:0:SHAPES + 13:0:STANDBY + 14:0:TABINDEX + 15:0:TYPE + 16:2:USEMAP + 17:0:VSPACE + 18:0:WIDTH + 44:OL + 6 attributes: + 0:0:CLEAR + 1:0:COMPACT + 2:0:CONTINUE + 3:0:SEQNUM + 4:0:START + 5:0:TYPE + 45:OPTION + 7 attributes: + 0:0:CLEAR + 1:0:DISABLED + 2:0:ERROR + 3:0:LABEL + 4:0:SELECTED + 5:0:SHAPE + 6:0:VALUE + 46:OVERLAY + 8 attributes: + 0:0:HEIGHT + 1:0:IMAGEMAP + 2:0:MD + 3:2:SRC + 4:0:UNITS + 5:0:WIDTH + 6:0:X + 7:0:Y + 47:P + 2 attributes: + 0:0:CLEAR + 1:0:NOWRAP + 48:PARAM + 12 attributes: + 0:0:ACCEPT + 1:0:ACCEPT-CHARSET + 2:0:ACCEPT-ENCODING + 3:0:CLEAR + 4:0:DATA + 5:0:NAME + 6:0:OBJECT + 7:0:REF + 8:0:TYPE + 9:0:VALUE + 10:0:VALUEREF + 11:0:VALUETYPE + 49:Q + 2 attributes: + 0:2:CITE + 1:0:CLEAR + 50:SCRIPT + 10 attributes: + 0:0:CHARSET + 1:0:CLEAR + 2:0:DEFER + 3:0:EVENT + 4:0:FOR + 5:0:LANGUAGE + 6:0:NAME + 7:0:SCRIPTENGINE + 8:2:SRC + 9:0:TYPE + 51:SELECT + 15 attributes: + 0:0:CLEAR + 1:0:DISABLED + 2:0:ERROR + 3:0:HEIGHT + 4:0:MD + 5:0:MULTIPLE + 6:0:NAME + 7:0:NOTAB + 8:0:ONBLUR + 9:0:ONCHANGE + 10:0:ONFOCUS + 11:0:SIZE + 12:0:TABINDEX + 13:0:UNITS + 14:0:WIDTH + 52:STYLE + 3 attributes: + 0:0:MEDIA + 1:0:NOTATION + 2:0:TYPE + 53:TAB + 4 attributes: + 0:0:CLEAR + 1:0:DP + 2:0:INDENT + 3:0:TO + 54:TABLE + 15 attributes: + 0:2:BACKGROUND + 1:0:BORDER + 2:0:CELLPADDING + 3:0:CELLSPACING + 4:0:CLEAR + 5:0:COLS + 6:0:COLSPEC + 7:0:DP + 8:0:FRAME + 9:0:NOFLOW + 10:0:NOWRAP + 11:0:RULES + 12:0:SUMMARY + 13:0:UNITS + 14:0:WIDTH + 55:TD + 13 attributes: + 0:0:ABBR + 1:0:AXES + 2:0:AXIS + 3:2:BACKGROUND + 4:0:CLEAR + 5:0:COLSPAN + 6:0:DP + 7:0:HEADERS + 8:0:HEIGHT + 9:0:NOWRAP + 10:0:ROWSPAN + 11:0:SCOPE + 12:0:WIDTH + 56:TEXTAREA + 15 attributes: + 0:0:ACCEPT-CHARSET + 1:0:ACCESSKEY + 2:0:CLEAR + 3:0:COLS + 4:0:DISABLED + 5:0:ERROR + 6:0:NAME + 7:0:NOTAB + 8:0:ONBLUR + 9:0:ONCHANGE + 10:0:ONFOCUS + 11:0:ONSELECT + 12:0:READONLY + 13:0:ROWS + 14:0:TABINDEX + 57:TR + 3 attributes: + 0:0:CLEAR + 1:0:DP + 2:0:NOWRAP + 58:UL + 8 attributes: + 0:0:CLEAR + 1:0:COMPACT + 2:0:DINGBAT + 3:0:MD + 4:0:PLAIN + 5:2:SRC + 6:0:TYPE + 7:0:WRAP +118 tags + 0:A + justify + 25 attributes: + 0:0:ACCESSKEY + 1:0:CHARSET + 2:4:CLASS + 3:0:CLEAR + 4:0:COORDS + 5:0:DIR + 6:2:HREF + 7:0:HREFLANG + 8:1:ID + 9:0:ISMAP + 10:0:LANG + 11:0:MD + 12:1:NAME + 13:0:NOTAB + 14:0:ONBLUR + 15:0:ONFOCUS + 16:0:REL + 17:0:REV + 18:0:SHAPE + 19:0:STYLE + 20:0:TABINDEX + 21:0:TARGET + 22:0:TITLE + 23:0:TYPE + 24:0:URN + 4 attr_types + core + events + i18n + A + contents: SGML_EMPTY + tagclass: Alike + contains: FONTlike EMlike MATHlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike formula Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike + contained: FONTlike EMlike MATHlike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike TRlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff + canclose: FONTlike EMlike MATHlike Alike SELECTlike APPLETlike HRlike same + flags: mafse nreie + 1:ABBR + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike + flags: + 2:ACRONYM + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike + flags: + 3:ADDRESS + justify + 8 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:NOWRAP + 6:0:STYLE + 7:0:TITLE + 3 attr_types + core + i18n + ADDRESS + contents: SGML_MIXED + tagclass: DIVlike + contains: FONTlike EMlike MATHlike Alike SELECTlike Plike BRlike APPLETlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike formula Plike DIVlike same + flags: + 4:APPLET + justify + 17 attributes: + 0:0:ALIGN + 1:0:ALT + 2:4:CLASS + 3:0:CLEAR + 4:0:CODE + 5:2:CODEBASE + 6:0:DIR + 7:0:DOWNLOAD + 8:0:HEIGHT + 9:0:HSPACE + 10:1:ID + 11:0:LANG + 12:1:NAME + 13:0:STYLE + 14:0:TITLE + 15:0:VSPACE + 16:0:WIDTH + 4 attr_types + align + core + i18n + APPLET + contents: SGML_MIXED + tagclass: APPLETlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike formula FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike BRlike APPLETlike same + flags: + 5:AREA + justify + 18 attributes: + 0:0:ACCESSKEY + 1:0:ALT + 2:4:CLASS + 3:0:CLEAR + 4:0:COORDS + 5:0:DIR + 6:2:HREF + 7:1:ID + 8:0:LANG + 9:0:NOHREF + 10:0:NOTAB + 11:0:ONBLUR + 12:0:ONFOCUS + 13:0:SHAPE + 14:0:STYLE + 15:0:TABINDEX + 16:0:TARGET + 17:0:TITLE + 4 attr_types + core + events + i18n + AREA + contents: SGML_EMPTY + tagclass: MAPlike + contains: + icontains: + contained: MAPlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike LIlike ULlike + flags: endO + 6:AU + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike + flags: + 7:AUTHOR + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike + flags: + 8:B + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_EMPTY + tagclass: FONTlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike BODYlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike + flags: mafse nreie + 9:BANNER + nojustify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: DIVlike + contains: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike + contained: outer BODYlike + icontained: outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike same + flags: + 10:BASE + justify + 6 attributes: + 0:4:CLASS + 1:2:HREF + 2:1:ID + 3:0:STYLE + 4:0:TARGET + 5:0:TITLE + 2 attr_types + core + BASE + contents: SGML_EMPTY + tagclass: HEADstuff + contains: + icontains: + contained: outer HEADstuff + icontained: outer HEADstuff + canclose: FONTlike EMlike MATHlike Alike same + flags: endO + 11:BASEFONT + justify + 11 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:COLOR + 3:0:DIR + 4:0:END + 5:0:FACE + 6:1:ID + 7:0:LANG + 8:0:SIZE + 9:0:STYLE + 10:0:TITLE + 3 attr_types + core + i18n + FONT + contents: SGML_EMPTY + tagclass: BRlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: BRlike APPLETlike HRlike MAPlike same + flags: endO + 12:BDO + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: Plike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula TRlike Plike DIVlike + flags: + 13:BGSOUND + justify + 9 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:LOOP + 6:2:SRC + 7:0:STYLE + 8:0:TITLE + 3 attr_types + core + i18n + BGSOUND + contents: SGML_EMPTY + tagclass: BRlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike HEADstuff + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff + canclose: FONTlike EMlike MATHlike Alike Plike DIVlike BRlike APPLETlike HRlike same + flags: endO + 14:BIG + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: FONTlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike + flags: mafse nreie + 15:BLINK + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_EMPTY + tagclass: FONTlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike + flags: mafse nreie + 16:BLOCKQUOTE + justify + 9 attributes: + 0:2:CITE + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:NOWRAP + 7:0:STYLE + 8:0:TITLE + 3 attr_types + core + i18n + BQ + contents: SGML_MIXED + tagclass: DIVlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike BODYlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike BODYlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike same + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike same + flags: + 17:BODY + justify + 15 attributes: + 0:0:ALINK + 1:2:BACKGROUND + 2:0:BGCOLOR + 3:4:CLASS + 4:0:CLEAR + 5:0:DIR + 6:1:ID + 7:0:LANG + 8:0:LINK + 9:0:ONLOAD + 10:0:ONUNLOAD + 11:0:STYLE + 12:0:TEXT + 13:0:TITLE + 14:0:VLINK + 4 attr_types + bgcolor + core + i18n + BODY + contents: SGML_MIXED + tagclass: BODYlike + contains: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike BODYlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike BODYlike + contained: outer BODYlike + icontained: outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer HEADstuff same + flags: endO startO + 18:BODYTEXT + justify + 14 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DATA + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:NAME + 7:0:OBJECT + 8:0:REF + 9:0:STYLE + 10:0:TITLE + 11:0:TYPE + 12:0:VALUE + 13:0:VALUETYPE + 3 attr_types + core + i18n + BODYTEXT + contents: SGML_MIXED + tagclass: BODYlike + contains: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike BODYlike same + contained: DIVlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike BRlike APPLETlike HRlike MAPlike same + flags: endO startO + 19:BQ + justify + 9 attributes: + 0:2:CITE + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:NOWRAP + 7:0:STYLE + 8:0:TITLE + 3 attr_types + core + i18n + BQ + contents: SGML_MIXED + tagclass: DIVlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike BODYlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike BODYlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike same + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike same + flags: + 20:BR + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_EMPTY + tagclass: BRlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff + canclose: FONTlike EMlike MATHlike Alike formula BRlike same + flags: endO + 21:BUTTON + justify + 16 attributes: + 0:0:ACCESSKEY + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:0:DISABLED + 5:1:ID + 6:0:LANG + 7:0:NAME + 8:0:ONBLUR + 9:0:ONFOCUS + 10:0:READONLY + 11:0:STYLE + 12:0:TABINDEX + 13:0:TITLE + 14:0:TYPE + 15:0:VALUE + 4 attr_types + core + events + i18n + BUTTON + contents: SGML_MIXED + tagclass: APPLETlike + contains: FONTlike EMlike MATHlike Plike DIVlike ULlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike formula TRlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike BRlike same + flags: + 22:CAPTION + justify + 9 attributes: + 0:0:ACCESSKEY + 1:0:ALIGN + 2:4:CLASS + 3:0:CLEAR + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:STYLE + 8:0:TITLE + 5 attr_types + align + core + events + i18n + CAPTION + contents: SGML_MIXED + tagclass: Plike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: DIVlike ULlike APPLETlike HRlike + icontained: FONTlike EMlike MATHlike TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike same + flags: + 23:CENTER + justify + 8 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:STYLE + 7:0:TITLE + 4 attr_types + align + core + i18n + DIV + contents: SGML_MIXED + tagclass: DIVlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike same + icontained: FONTlike EMlike MATHlike TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike LIlike same + flags: + 24:CITE + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_EMPTY + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: EMlike + flags: nreie + 25:CODE + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: EMlike + flags: + 26:COL + justify + 13 attributes: + 0:0:ALIGN + 1:0:CHAR + 2:0:CHAROFF + 3:4:CLASS + 4:0:CLEAR + 5:0:DIR + 6:1:ID + 7:0:LANG + 8:0:SPAN + 9:0:STYLE + 10:0:TITLE + 11:0:VALIGN + 12:0:WIDTH + 5 attr_types + cellalign + core + events + i18n + COL + contents: SGML_EMPTY + tagclass: HRlike + contains: + icontains: + contained: TRlike ULlike + icontained: FONTlike EMlike MATHlike TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike ULlike MAPlike same + flags: endO + 27:COLGROUP + justify + 13 attributes: + 0:0:ALIGN + 1:0:CHAR + 2:0:CHAROFF + 3:4:CLASS + 4:0:CLEAR + 5:0:DIR + 6:1:ID + 7:0:LANG + 8:0:SPAN + 9:0:STYLE + 10:0:TITLE + 11:0:VALIGN + 12:0:WIDTH + 5 attr_types + cellalign + core + events + i18n + COL + contents: SGML_EMPTY + tagclass: TRlike + contains: HRlike + icontains: HRlike + contained: ULlike + icontained: FONTlike EMlike MATHlike TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike MAPlike same + flags: endO + 28:COMMENT + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: MATHlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike HEADstuff + canclose: FONTlike EMlike + flags: + 29:CREDIT + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: Plike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: DIVlike ULlike APPLETlike HRlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike Plike DIVlike same + flags: + 30:DD + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_EMPTY + tagclass: LIlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: ULlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike LIlike same + flags: endO + 31:DEL + justify + 8 attributes: + 0:0:CITE + 1:4:CLASS + 2:0:DATETIME + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:STYLE + 7:0:TITLE + 4 attr_types + core + events + i18n + DEL + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike + flags: + 32:DFN + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike + flags: + 33:DIR + justify + 14 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:COMPACT + 3:0:DINGBAT + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:MD + 8:0:PLAIN + 9:2:SRC + 10:0:STYLE + 11:0:TITLE + 12:0:TYPE + 13:0:WRAP + 3 attr_types + core + i18n + UL + contents: SGML_MIXED + tagclass: ULlike + contains: LIlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike BRlike APPLETlike HRlike MAPlike + contained: FORMlike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike HRlike same + flags: + 34:DIV + justify + 8 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:STYLE + 7:0:TITLE + 4 attr_types + align + core + i18n + DIV + contents: SGML_MIXED + tagclass: DIVlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: TRlike FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike same + flags: mafse + 35:DL + justify + 8 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:COMPACT + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:STYLE + 7:0:TITLE + 3 attr_types + core + i18n + DL + contents: SGML_MIXED + tagclass: ULlike + contains: FORMlike LIlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike + flags: + 36:DLC + justify + 8 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:COMPACT + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:STYLE + 7:0:TITLE + 3 attr_types + core + i18n + DL + contents: SGML_MIXED + tagclass: ULlike + contains: FORMlike LIlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike + flags: + 37:DT + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_EMPTY + tagclass: LIlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike BRlike APPLETlike MAPlike + contained: ULlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike LIlike same + flags: endO + 38:EM + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_EMPTY + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike + flags: nreie + 39:EMBED + justify + 21 attributes: + 0:0:ALIGN + 1:0:ALT + 2:0:BORDER + 3:4:CLASS + 4:0:CLEAR + 5:0:DIR + 6:0:HEIGHT + 7:1:ID + 8:0:IMAGEMAP + 9:0:ISMAP + 10:0:LANG + 11:0:MD + 12:1:NAME + 13:0:NOFLOW + 14:0:PARAMS + 15:2:SRC + 16:0:STYLE + 17:0:TITLE + 18:0:UNITS + 19:0:USEMAP + 20:0:WIDTH + 4 attr_types + align + core + i18n + EMBED + contents: SGML_EMPTY + tagclass: APPLETlike + contains: FONTlike EMlike MATHlike Plike BRlike APPLETlike HRlike MAPlike same + icontains: FONTlike EMlike MATHlike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer + flags: endO + 40:FIELDSET + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: DIVlike + contains: FONTlike EMlike MATHlike Alike SELECTlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike FORMlike Plike DIVlike LIlike APPLETlike HRlike same + icontained: FONTlike EMlike MATHlike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike MAPlike same + flags: + 41:FIG + justify + 17 attributes: + 0:0:ALIGN + 1:0:BORDER + 2:4:CLASS + 3:0:CLEAR + 4:0:DIR + 5:0:HEIGHT + 6:1:ID + 7:0:IMAGEMAP + 8:0:ISOBJECT + 9:0:LANG + 10:0:MD + 11:0:NOFLOW + 12:2:SRC + 13:0:STYLE + 14:0:TITLE + 15:0:UNITS + 16:0:WIDTH + 4 attr_types + align + core + i18n + FIG + contents: SGML_MIXED + tagclass: DIVlike + contains: Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike SELECTlike Plike DIVlike MAPlike same + flags: + 42:FN + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: DIVlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike SELECTlike Plike BRlike same + flags: + 43:FONT + justify + 11 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:COLOR + 3:0:DIR + 4:0:END + 5:0:FACE + 6:1:ID + 7:0:LANG + 8:0:SIZE + 9:0:STYLE + 10:0:TITLE + 3 attr_types + core + i18n + FONT + contents: SGML_EMPTY + tagclass: FONTlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike + flags: mafse nreie + 44:FORM + justify + 17 attributes: + 0:0:ACCEPT + 1:0:ACCEPT-CHARSET + 2:2:ACTION + 3:4:CLASS + 4:0:CLEAR + 5:0:DIR + 6:0:ENCTYPE + 7:1:ID + 8:0:LANG + 9:0:METHOD + 10:0:ONRESET + 11:0:ONSUBMIT + 12:0:SCRIPT + 13:0:STYLE + 14:0:SUBJECT + 15:0:TARGET + 16:0:TITLE + 3 attr_types + core + i18n + FORM + contents: SGML_EMPTY + tagclass: FORMlike + contains: FONTlike EMlike MATHlike Alike TRlike SELECTlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike + contained: FONTlike EMlike MATHlike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Plike DIVlike LIlike ULlike APPLETlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike LIlike ULlike MAPlike same + flags: + 45:FRAME + justify + 12 attributes: + 0:4:CLASS + 1:0:FRAMEBORDER + 2:1:ID + 3:2:LONGDESC + 4:0:MARGINHEIGHT + 5:0:MARGINWIDTH + 6:0:NAME + 7:0:NORESIZE + 8:0:SCROLLING + 9:2:SRC + 10:0:STYLE + 11:0:TITLE + 2 attr_types + core + FRAME + contents: SGML_EMPTY + tagclass: outer + contains: + icontains: + contained: outer + icontained: outer + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer same + flags: endO + 46:FRAMESET + justify + 4 attributes: + 0:0:COLS + 1:0:ONLOAD + 2:0:ONUNLOAD + 3:0:ROWS + 1 attr_types + FRAMESET + contents: SGML_MIXED + tagclass: outer + contains: outer same + icontains: outer same + contained: outer same + icontained: BRlike APPLETlike outer same + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer same + flags: + 47:H1 + nojustify + 14 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DINGBAT + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:MD + 8:0:NOWRAP + 9:0:SEQNUM + 10:0:SKIP + 11:2:SRC + 12:0:STYLE + 13:0:TITLE + 5 attr_types + align + core + events + i18n + H + contents: SGML_MIXED + tagclass: Plike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula SELECTlike BRlike APPLETlike MAPlike + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike formula Plike same + flags: + 48:H2 + nojustify + 14 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DINGBAT + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:MD + 8:0:NOWRAP + 9:0:SEQNUM + 10:0:SKIP + 11:2:SRC + 12:0:STYLE + 13:0:TITLE + 5 attr_types + align + core + events + i18n + H + contents: SGML_MIXED + tagclass: Plike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula SELECTlike BRlike APPLETlike MAPlike + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike formula Plike same + flags: + 49:H3 + nojustify + 14 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DINGBAT + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:MD + 8:0:NOWRAP + 9:0:SEQNUM + 10:0:SKIP + 11:2:SRC + 12:0:STYLE + 13:0:TITLE + 5 attr_types + align + core + events + i18n + H + contents: SGML_MIXED + tagclass: Plike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula SELECTlike BRlike APPLETlike MAPlike + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike formula Plike same + flags: + 50:H4 + nojustify + 14 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DINGBAT + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:MD + 8:0:NOWRAP + 9:0:SEQNUM + 10:0:SKIP + 11:2:SRC + 12:0:STYLE + 13:0:TITLE + 5 attr_types + align + core + events + i18n + H + contents: SGML_MIXED + tagclass: Plike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula SELECTlike BRlike APPLETlike MAPlike + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike formula Plike same + flags: + 51:H5 + nojustify + 14 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DINGBAT + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:MD + 8:0:NOWRAP + 9:0:SEQNUM + 10:0:SKIP + 11:2:SRC + 12:0:STYLE + 13:0:TITLE + 5 attr_types + align + core + events + i18n + H + contents: SGML_MIXED + tagclass: Plike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula SELECTlike BRlike APPLETlike MAPlike + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike formula Plike same + flags: + 52:H6 + nojustify + 14 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DINGBAT + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:MD + 8:0:NOWRAP + 9:0:SEQNUM + 10:0:SKIP + 11:2:SRC + 12:0:STYLE + 13:0:TITLE + 5 attr_types + align + core + events + i18n + H + contents: SGML_MIXED + tagclass: Plike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula SELECTlike BRlike APPLETlike MAPlike + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike formula Plike same + flags: + 53:HEAD + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: HEADstuff + contains: BRlike APPLETlike HRlike MAPlike HEADstuff + icontains: BRlike APPLETlike HRlike HEADstuff + contained: outer + icontained: outer + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer same + flags: endO startO mafse + 54:HR + justify + 13 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:MD + 7:0:NOSHADE + 8:0:SIZE + 9:2:SRC + 10:0:STYLE + 11:0:TITLE + 12:0:WIDTH + 4 attr_types + align + core + i18n + HR + contents: SGML_EMPTY + tagclass: HRlike + contains: + icontains: + contained: FORMlike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike + canclose: FONTlike EMlike MATHlike formula TRlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike same + flags: endO + 55:HTML + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: outer + contains: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike HEADstuff + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike HEADstuff + contained: + icontained: + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer + flags: endO startO + 56:HY + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_EMPTY + tagclass: BRlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike formula FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff + canclose: FONTlike EMlike MATHlike Alike formula BRlike same + flags: endO + 57:I + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_EMPTY + tagclass: FONTlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike + flags: mafse nreie + 58:IFRAME + justify + 14 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:FRAMEBORDER + 3:0:HEIGHT + 4:1:ID + 5:2:LONGDESC + 6:0:MARGINHEIGHT + 7:0:MARGINWIDTH + 8:0:NAME + 9:0:SCROLLING + 10:2:SRC + 11:0:STYLE + 12:0:TITLE + 13:0:WIDTH + 3 attr_types + align + core + IFRAME + contents: SGML_MIXED + tagclass: APPLETlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike formula FORMlike Plike DIVlike LIlike APPLETlike HRlike outer BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike BRlike APPLETlike outer HEADstuff same + flags: + 59:IMG + justify + 22 attributes: + 0:0:ALIGN + 1:0:ALT + 2:0:BORDER + 3:4:CLASS + 4:0:CLEAR + 5:0:DIR + 6:0:HEIGHT + 7:0:HSPACE + 8:1:ID + 9:0:ISMAP + 10:0:ISOBJECT + 11:0:LANG + 12:2:LONGDESC + 13:0:MD + 14:0:NAME + 15:2:SRC + 16:0:STYLE + 17:0:TITLE + 18:0:UNITS + 19:2:USEMAP + 20:0:VSPACE + 21:0:WIDTH + 5 attr_types + align + core + events + i18n + IMG + contents: SGML_EMPTY + tagclass: BRlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike formula FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: same + flags: endO + 60:INPUT + justify + 35 attributes: + 0:0:ACCEPT + 1:0:ACCEPT-CHARSET + 2:0:ACCESSKEY + 3:0:ALIGN + 4:0:ALT + 5:0:CHECKED + 6:4:CLASS + 7:0:CLEAR + 8:0:DIR + 9:0:DISABLED + 10:0:ERROR + 11:0:HEIGHT + 12:1:ID + 13:0:ISMAP + 14:0:LANG + 15:0:MAX + 16:0:MAXLENGTH + 17:0:MD + 18:0:MIN + 19:0:NAME + 20:0:NOTAB + 21:0:ONBLUR + 22:0:ONCHANGE + 23:0:ONFOCUS + 24:0:ONSELECT + 25:0:READONLY + 26:0:SIZE + 27:2:SRC + 28:0:STYLE + 29:0:TABINDEX + 30:0:TITLE + 31:0:TYPE + 32:0:USEMAP + 33:0:VALUE + 34:0:WIDTH + 5 attr_types + align + core + events + i18n + INPUT + contents: SGML_EMPTY + tagclass: SELECTlike + contains: + icontains: + contained: FONTlike EMlike MATHlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike + icontained: FONTlike EMlike MATHlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike SELECTlike BRlike MAPlike same + flags: endO + 61:INS + justify + 8 attributes: + 0:0:CITE + 1:4:CLASS + 2:0:DATETIME + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:STYLE + 7:0:TITLE + 4 attr_types + core + events + i18n + DEL + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike + flags: + 62:ISINDEX + justify + 9 attributes: + 0:2:ACTION + 1:4:CLASS + 2:0:DIR + 3:2:HREF + 4:1:ID + 5:0:LANG + 6:0:PROMPT + 7:0:STYLE + 8:0:TITLE + 3 attr_types + core + i18n + ISINDEX + contents: SGML_EMPTY + tagclass: MAPlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike HEADstuff + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike HEADstuff + canclose: FONTlike EMlike MATHlike same + flags: endO + 63:KBD + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: EMlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff + canclose: FONTlike EMlike + flags: + 64:KEYGEN + justify + 8 attributes: + 0:0:CHALLENGE + 1:4:CLASS + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:NAME + 6:0:STYLE + 7:0:TITLE + 3 attr_types + core + i18n + KEYGEN + contents: SGML_EMPTY + tagclass: SELECTlike + contains: + icontains: + contained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: formula TRlike SELECTlike same + flags: endO + 65:LABEL + justify + 11 attributes: + 0:0:ACCESSKEY + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:0:FOR + 5:1:ID + 6:0:LANG + 7:0:ONBLUR + 8:0:ONFOCUS + 9:0:STYLE + 10:0:TITLE + 4 attr_types + core + events + i18n + LABEL + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike + contained: FONTlike EMlike MATHlike Alike formula FORMlike Plike DIVlike LIlike APPLETlike HRlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike + flags: + 66:LEGEND + justify + 9 attributes: + 0:0:ACCESSKEY + 1:0:ALIGN + 2:4:CLASS + 3:0:CLEAR + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:STYLE + 8:0:TITLE + 5 attr_types + align + core + events + i18n + CAPTION + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: DIVlike + icontained: FONTlike EMlike MATHlike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike + flags: + 67:LH + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_EMPTY + tagclass: LIlike + contains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike ULlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: ULlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer same + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike LIlike same + flags: endO + 68:LI + justify + 13 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DINGBAT + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:MD + 7:0:SKIP + 8:2:SRC + 9:0:STYLE + 10:0:TITLE + 11:0:TYPE + 12:0:VALUE + 4 attr_types + core + events + i18n + LI + contents: SGML_EMPTY + tagclass: LIlike + contains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: ULlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer same + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike LIlike same + flags: endO + 69:LINK + justify + 14 attributes: + 0:0:CHARSET + 1:4:CLASS + 2:0:DIR + 3:2:HREF + 4:0:HREFLANG + 5:1:ID + 6:0:LANG + 7:0:MEDIA + 8:0:REL + 9:0:REV + 10:0:STYLE + 11:0:TARGET + 12:0:TITLE + 13:0:TYPE + 4 attr_types + core + events + i18n + LINK + contents: SGML_EMPTY + tagclass: MAPlike + contains: + icontains: + contained: outer HEADstuff + icontained: outer HEADstuff + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike + flags: endO + 70:LISTING + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_LITTERAL + tagclass: ULlike + contains: + icontains: + contained: DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike LIlike ULlike same + flags: + 71:MAP + justify + 8 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:1:NAME + 6:0:STYLE + 7:0:TITLE + 3 attr_types + core + i18n + MAP + contents: SGML_MIXED + tagclass: MAPlike + contains: MAPlike + icontains: MAPlike + contained: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula Plike LIlike + flags: + 72:MARQUEE + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: HRlike + contains: FONTlike EMlike MATHlike Alike + icontains: FONTlike EMlike MATHlike Alike formula BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: MATHlike Alike formula BRlike APPLETlike same + flags: + 73:MATH + justify + 8 attributes: + 0:0:BOX + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:STYLE + 7:0:TITLE + 3 attr_types + core + i18n + MATH + contents: SGML_LITTERAL + tagclass: MATHlike + contains: FONTlike EMlike MATHlike Alike formula SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike MATHlike Alike formula + flags: + 74:MENU + justify + 14 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:COMPACT + 3:0:DINGBAT + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:MD + 8:0:PLAIN + 9:2:SRC + 10:0:STYLE + 11:0:TITLE + 12:0:TYPE + 13:0:WRAP + 3 attr_types + core + i18n + UL + contents: SGML_MIXED + tagclass: ULlike + contains: LIlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike BRlike APPLETlike HRlike MAPlike + contained: FORMlike DIVlike LIlike BRlike APPLETlike HRlike outer + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike MAPlike same + flags: + 75:META + justify + 5 attributes: + 0:0:CHARSET + 1:0:CONTENT + 2:0:HTTP-EQUIV + 3:0:NAME + 4:0:SCHEME + 1 attr_types + META + contents: SGML_EMPTY + tagclass: MAPlike + contains: + icontains: + contained: outer HEADstuff + icontained: outer HEADstuff + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike + flags: endO + 76:NEXTID + justify + 1 attributes: + 0:0:N + 1 attr_types + NEXTID + contents: SGML_EMPTY + tagclass: BRlike + contains: + icontains: + contained: outer HEADstuff + icontained: FONTlike EMlike MATHlike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer + canclose: FONTlike + flags: endO + 77:NOFRAMES + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: BODYlike + contains: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike BODYlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike + contained: BRlike APPLETlike HRlike outer + icontained: BRlike APPLETlike HRlike outer + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike ULlike HRlike MAPlike + flags: + 78:NOTE + justify + 10 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:MD + 6:8:ROLE + 7:2:SRC + 8:0:STYLE + 9:0:TITLE + 3 attr_types + core + i18n + NOTE + contents: SGML_MIXED + tagclass: DIVlike + contains: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike ULlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: formula TRlike FORMlike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike same + flags: + 79:OBJECT + justify + 26 attributes: + 0:0:ALIGN + 1:0:ARCHIVE + 2:0:BORDER + 3:4:CLASS + 4:2:CLASSID + 5:2:CODEBASE + 6:0:CODETYPE + 7:2:DATA + 8:0:DECLARE + 9:0:DIR + 10:0:HEIGHT + 11:0:HSPACE + 12:1:ID + 13:0:ISMAP + 14:0:LANG + 15:0:NAME + 16:0:NOTAB + 17:0:SHAPES + 18:0:STANDBY + 19:0:STYLE + 20:0:TABINDEX + 21:0:TITLE + 22:0:TYPE + 23:2:USEMAP + 24:0:VSPACE + 25:0:WIDTH + 5 attr_types + align + core + events + i18n + OBJECT + contents: SGML_LITTERAL + tagclass: APPLETlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike formula FORMlike Plike DIVlike LIlike APPLETlike HRlike outer BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike LIlike ULlike BRlike APPLETlike same + flags: frecyc + 80:OL + justify + 12 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:COMPACT + 3:0:CONTINUE + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:SEQNUM + 8:0:START + 9:0:STYLE + 10:0:TITLE + 11:0:TYPE + 3 attr_types + core + i18n + OL + contents: SGML_MIXED + tagclass: ULlike + contains: LIlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike MAPlike same + flags: + 81:OPTION + justify + 13 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:0:DISABLED + 4:0:ERROR + 5:1:ID + 6:0:LABEL + 7:0:LANG + 8:0:SELECTED + 9:0:SHAPE + 10:0:STYLE + 11:0:TITLE + 12:0:VALUE + 4 attr_types + core + events + i18n + OPTION + contents: SGML_EMPTY + tagclass: MAPlike + contains: + icontains: + contained: SELECTlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike same + flags: endO + 82:OVERLAY + justify + 12 attributes: + 0:4:CLASS + 1:0:HEIGHT + 2:1:ID + 3:0:IMAGEMAP + 4:0:MD + 5:2:SRC + 6:0:STYLE + 7:0:TITLE + 8:0:UNITS + 9:0:WIDTH + 10:0:X + 11:0:Y + 2 attr_types + core + OVERLAY + contents: SGML_EMPTY + tagclass: HRlike + contains: + icontains: + contained: DIVlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike BRlike APPLETlike same + flags: endO + 83:P + justify + 9 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:NOWRAP + 7:0:STYLE + 8:0:TITLE + 4 attr_types + align + core + i18n + P + contents: SGML_EMPTY + tagclass: Plike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike formula Plike same + flags: endO + 84:PARAM + justify + 18 attributes: + 0:0:ACCEPT + 1:0:ACCEPT-CHARSET + 2:0:ACCEPT-ENCODING + 3:4:CLASS + 4:0:CLEAR + 5:0:DATA + 6:0:DIR + 7:1:ID + 8:0:LANG + 9:0:NAME + 10:0:OBJECT + 11:0:REF + 12:0:STYLE + 13:0:TITLE + 14:0:TYPE + 15:0:VALUE + 16:0:VALUEREF + 17:0:VALUETYPE + 3 attr_types + core + i18n + PARAM + contents: SGML_EMPTY + tagclass: BRlike + contains: + icontains: + contained: Plike LIlike BRlike APPLETlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: TRlike SELECTlike Plike LIlike BRlike same + flags: endO + 85:PLAINTEXT + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_LITTERAL + tagclass: outer + contains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike HEADstuff same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike HEADstuff same + contained: outer same + icontained: outer same + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike + flags: endO + 86:PRE + nojustify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: DIVlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike HRlike MAPlike + icontains: EMlike MATHlike Alike formula SELECTlike BRlike APPLETlike HRlike MAPlike + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike + canclose: EMlike MATHlike Alike formula Plike DIVlike LIlike same + flags: + 87:Q + justify + 8 attributes: + 0:2:CITE + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:STYLE + 7:0:TITLE + 3 attr_types + core + i18n + Q + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike + flags: + 88:S + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: FONTlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike + flags: + 89:SAMP + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: EMlike + flags: nreie + 90:SCRIPT + justify + 16 attributes: + 0:0:CHARSET + 1:4:CLASS + 2:0:CLEAR + 3:0:DEFER + 4:0:DIR + 5:0:EVENT + 6:0:FOR + 7:1:ID + 8:0:LANG + 9:0:LANGUAGE + 10:0:NAME + 11:0:SCRIPTENGINE + 12:2:SRC + 13:0:STYLE + 14:0:TITLE + 15:0:TYPE + 3 attr_types + core + i18n + SCRIPT + contents: SGML_LITTERAL + tagclass: APPLETlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike formula FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike LIlike ULlike BRlike APPLETlike HRlike same + flags: + 91:SELECT + justify + 22 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:0:DISABLED + 5:0:ERROR + 6:0:HEIGHT + 7:1:ID + 8:0:LANG + 9:0:MD + 10:0:MULTIPLE + 11:0:NAME + 12:0:NOTAB + 13:0:ONBLUR + 14:0:ONCHANGE + 15:0:ONFOCUS + 16:0:SIZE + 17:0:STYLE + 18:0:TABINDEX + 19:0:TITLE + 20:0:UNITS + 21:0:WIDTH + 4 attr_types + align + core + i18n + SELECT + contents: SGML_MIXED + tagclass: SELECTlike + contains: MAPlike + icontains: MAPlike + contained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike LIlike ULlike same + flags: strict + 92:SHY + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_EMPTY + tagclass: BRlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike formula FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff + canclose: FONTlike EMlike MATHlike Alike formula BRlike same + flags: endO + 93:SMALL + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: FONTlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike + flags: mafse nreie + 94:SPAN + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike same + flags: + 95:SPOT + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_EMPTY + tagclass: Alike + contains: + icontains: + contained: FONTlike EMlike MATHlike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike + icontained: FONTlike EMlike MATHlike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike + canclose: Alike + flags: endO + 96:STRIKE + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: FONTlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike + flags: + 97:STRONG + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_EMPTY + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike + flags: nreie + 98:STYLE + justify + 9 attributes: + 0:4:CLASS + 1:0:DIR + 2:1:ID + 3:0:LANG + 4:0:MEDIA + 5:0:NOTATION + 6:0:STYLE + 7:0:TITLE + 8:0:TYPE + 3 attr_types + core + i18n + STYLE + contents: SGML_LITTERAL + tagclass: HEADstuff + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike APPLETlike HRlike outer BODYlike HEADstuff + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike HEADstuff + canclose: FONTlike EMlike MATHlike Alike formula same + flags: + 99:SUB + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: MATHlike + contains: FONTlike EMlike MATHlike Alike formula SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike formula FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike MATHlike + flags: + 100:SUP + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: MATHlike + contains: FONTlike EMlike MATHlike Alike formula SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike formula FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike MATHlike + flags: + 101:TAB + justify + 11 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:0:DP + 5:1:ID + 6:0:INDENT + 7:0:LANG + 8:0:STYLE + 9:0:TITLE + 10:0:TO + 4 attr_types + align + core + i18n + TAB + contents: SGML_EMPTY + tagclass: BRlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer HEADstuff + canclose: FONTlike + flags: endO + 102:TABLE + justify + 22 attributes: + 0:0:ALIGN + 1:2:BACKGROUND + 2:0:BORDER + 3:0:CELLPADDING + 4:0:CELLSPACING + 5:4:CLASS + 6:0:CLEAR + 7:0:COLS + 8:0:COLSPEC + 9:0:DIR + 10:0:DP + 11:0:FRAME + 12:1:ID + 13:0:LANG + 14:0:NOFLOW + 15:0:NOWRAP + 16:0:RULES + 17:0:STYLE + 18:0:SUMMARY + 19:0:TITLE + 20:0:UNITS + 21:0:WIDTH + 5 attr_types + align + core + events + i18n + TABLE + contents: SGML_MIXED + tagclass: ULlike + contains: TRlike SELECTlike FORMlike Plike BRlike APPLETlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike LIlike HRlike MAPlike same + flags: + 103:TBODY + justify + 13 attributes: + 0:0:ALIGN + 1:0:CHAR + 2:0:CHAROFF + 3:4:CLASS + 4:0:CLEAR + 5:0:DIR + 6:0:DP + 7:1:ID + 8:0:LANG + 9:0:NOWRAP + 10:0:STYLE + 11:0:TITLE + 12:0:VALIGN + 5 attr_types + cellalign + core + events + i18n + TR + contents: SGML_EMPTY + tagclass: TRlike + contains: TRlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike ULlike + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike HRlike MAPlike same + flags: endO startO + 104:TD + justify + 23 attributes: + 0:0:ABBR + 1:0:ALIGN + 2:0:AXES + 3:0:AXIS + 4:2:BACKGROUND + 5:0:CHAR + 6:0:CHAROFF + 7:4:CLASS + 8:0:CLEAR + 9:0:COLSPAN + 10:0:DIR + 11:0:DP + 12:0:HEADERS + 13:0:HEIGHT + 14:1:ID + 15:0:LANG + 16:0:NOWRAP + 17:0:ROWSPAN + 18:0:SCOPE + 19:0:STYLE + 20:0:TITLE + 21:0:VALIGN + 22:0:WIDTH + 4 attr_types + cellalign + core + i18n + TD + contents: SGML_EMPTY + tagclass: LIlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: TRlike + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike HRlike MAPlike same + flags: endO + 105:TEXTAREA + justify + 22 attributes: + 0:0:ACCEPT-CHARSET + 1:0:ACCESSKEY + 2:0:ALIGN + 3:4:CLASS + 4:0:CLEAR + 5:0:COLS + 6:0:DIR + 7:0:DISABLED + 8:0:ERROR + 9:1:ID + 10:0:LANG + 11:0:NAME + 12:0:NOTAB + 13:0:ONBLUR + 14:0:ONCHANGE + 15:0:ONFOCUS + 16:0:ONSELECT + 17:0:READONLY + 18:0:ROWS + 19:0:STYLE + 20:0:TABINDEX + 21:0:TITLE + 5 attr_types + align + core + events + i18n + TEXTAREA + contents: SGML_LITTERAL + tagclass: SELECTlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike LIlike ULlike same + flags: nolyspcl + 106:TEXTFLOW + justify + 14 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DATA + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:NAME + 7:0:OBJECT + 8:0:REF + 9:0:STYLE + 10:0:TITLE + 11:0:TYPE + 12:0:VALUE + 13:0:VALUETYPE + 3 attr_types + core + i18n + BODYTEXT + contents: SGML_MIXED + tagclass: BODYlike + contains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer same + contained: formula TRlike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer same + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike BRlike APPLETlike MAPlike outer same + flags: endO startO + 107:TFOOT + justify + 13 attributes: + 0:0:ALIGN + 1:0:CHAR + 2:0:CHAROFF + 3:4:CLASS + 4:0:CLEAR + 5:0:DIR + 6:0:DP + 7:1:ID + 8:0:LANG + 9:0:NOWRAP + 10:0:STYLE + 11:0:TITLE + 12:0:VALIGN + 5 attr_types + cellalign + core + events + i18n + TR + contents: SGML_EMPTY + tagclass: TRlike + contains: TRlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: ULlike + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike ULlike HRlike MAPlike same + flags: endO + 108:TH + justify + 23 attributes: + 0:0:ABBR + 1:0:ALIGN + 2:0:AXES + 3:0:AXIS + 4:2:BACKGROUND + 5:0:CHAR + 6:0:CHAROFF + 7:4:CLASS + 8:0:CLEAR + 9:0:COLSPAN + 10:0:DIR + 11:0:DP + 12:0:HEADERS + 13:0:HEIGHT + 14:1:ID + 15:0:LANG + 16:0:NOWRAP + 17:0:ROWSPAN + 18:0:SCOPE + 19:0:STYLE + 20:0:TITLE + 21:0:VALIGN + 22:0:WIDTH + 4 attr_types + cellalign + core + i18n + TD + contents: SGML_EMPTY + tagclass: LIlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike + contained: TRlike + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike ULlike HRlike MAPlike same + flags: endO + 109:THEAD + justify + 13 attributes: + 0:0:ALIGN + 1:0:CHAR + 2:0:CHAROFF + 3:4:CLASS + 4:0:CLEAR + 5:0:DIR + 6:0:DP + 7:1:ID + 8:0:LANG + 9:0:NOWRAP + 10:0:STYLE + 11:0:TITLE + 12:0:VALIGN + 5 attr_types + cellalign + core + events + i18n + TR + contents: SGML_EMPTY + tagclass: TRlike + contains: TRlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: ULlike + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike ULlike HRlike MAPlike same + flags: endO + 110:TITLE + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_RCDATA + tagclass: HEADstuff + contains: + icontains: + contained: outer HEADstuff + icontained: outer HEADstuff + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike + flags: mafse strict + 111:TR + justify + 13 attributes: + 0:0:ALIGN + 1:0:CHAR + 2:0:CHAROFF + 3:4:CLASS + 4:0:CLEAR + 5:0:DIR + 6:0:DP + 7:1:ID + 8:0:LANG + 9:0:NOWRAP + 10:0:STYLE + 11:0:TITLE + 12:0:VALIGN + 5 attr_types + cellalign + core + events + i18n + TR + contents: SGML_EMPTY + tagclass: TRlike + contains: LIlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: TRlike ULlike + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike HRlike MAPlike same + flags: endO + 112:TT + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_EMPTY + tagclass: FONTlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike + flags: nreie + 113:U + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_EMPTY + tagclass: FONTlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike + flags: mafse nreie + 114:UL + justify + 14 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:COMPACT + 3:0:DINGBAT + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:MD + 8:0:PLAIN + 9:2:SRC + 10:0:STYLE + 11:0:TITLE + 12:0:TYPE + 13:0:WRAP + 3 attr_types + core + i18n + UL + contents: SGML_MIXED + tagclass: ULlike + contains: FORMlike LIlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike same + flags: + 115:VAR + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike + flags: + 116:WBR + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_EMPTY + tagclass: FONTlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff + canclose: FONTlike EMlike MATHlike Alike formula BRlike same + flags: endO + 117:XMP + nojustify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_LITTERAL + tagclass: ULlike + contains: + icontains: + contained: TRlike SELECTlike FORMlike Plike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike MAPlike + flags: endO diff --git a/WWW/Library/Implementation/src1_HTMLDTD.h b/WWW/Library/Implementation/src1_HTMLDTD.h new file mode 100644 index 00000000..a56e4a09 --- /dev/null +++ b/WWW/Library/Implementation/src1_HTMLDTD.h @@ -0,0 +1,2422 @@ +/* $LynxId: src1_HTMLDTD.h,v 1.45 2011/10/07 00:54:36 Kihara.Hideto Exp $ */ +#ifndef src_HTMLDTD_H1 +#define src_HTMLDTD_H1 1 + +#ifndef once_HTMLDTD +#define once_HTMLDTD 1 + +#define T_A 0x00008,0x0B007,0x0FF17,0x37787,0x77BA7,0x8604F,0x00014 +#define T_ABBR 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00003,0x00000 +#define T_ACRONYM 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00003,0x00000 +#define T_ADDRESS 0x00200,0x0F14F,0x8FFFF,0x36680,0xB6FAF,0x80317,0x00000 +#define T_APPLET 0x02000,0x0B0CF,0x8FFFF,0x37F9F,0xB7FBF,0x8300F,0x00000 +#define T_AREA 0x08000,0x00000,0x00000,0x08000,0x3FFFF,0x00F1F,0x00001 +#define T_AU 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00003,0x00000 +#define T_AUTHOR 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00003,0x00000 +#define T_B 0x00001,0x8B04F,0xAFFFF,0xA778F,0xF7FBF,0x00001,0x00014 +#define T_BANNER 0x00200,0x0FB8F,0x0FFFF,0x30000,0x30000,0x8031F,0x00000 +#define T_BASE 0x40000,0x00000,0x00000,0x50000,0x50000,0x8000F,0x00001 +#define T_BASEFONT 0x01000,0x00000,0x00000,0x377AF,0x37FAF,0x8F000,0x00001 +#define T_BDO 0x00100,0x0B04F,0x8FFFF,0x36680,0xB6FAF,0x0033F,0x00000 +#define T_BGSOUND 0x01000,0x00000,0x00000,0x777AF,0x77FAF,0x8730F,0x00001 +#define T_BIG 0x00001,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00001,0x00014 +#define T_BLINK 0x00001,0x8B04F,0x8FFFF,0xA778F,0xF7FAF,0x00001,0x00014 +#define T_BLOCKQUOTE 0x00200,0xAFBCF,0xAFFFF,0xB6680,0xB6FAF,0x8031F,0x00000 +#define T_BODY 0x20000,0x2FB8F,0x2FFFF,0x30000,0x30000,0xDFF7F,0x00003 +#define T_BODYTEXT 0x20000,0x0FB8F,0xAFFFF,0x30200,0xB7FAF,0x8F17F,0x00003 +#define T_BQ 0x00200,0xAFBCF,0xAFFFF,0xB6680,0xB6FAF,0x8031F,0x00000 +#define T_BR 0x01000,0x00000,0x00000,0x377BF,0x77FBF,0x8101F,0x00001 +#define T_BUTTON 0x02000,0x0BB07,0x0FF37,0x0378F,0x37FBF,0x8115F,0x00000 +#define T_CAPTION 0x00100,0x0B04F,0x8FFFF,0x06A00,0xB6FA7,0x8035F,0x00000 +#define T_CENTER 0x00200,0x8FBCF,0x8FFFF,0xB6680,0xB6FA7,0x8071F,0x00000 +#define T_CITE 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00002,0x00010 +#define T_CODE 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00002,0x00000 +#define T_COL 0x04000,0x00000,0x00000,0x00820,0x36FA7,0x88F5F,0x00001 +#define T_COLGROUP 0x00020,0x04000,0x04000,0x00800,0x36FA7,0x8875F,0x00001 +#define T_COMMENT 0x00004,0x00000,0x00000,0xA77AF,0x7FFFF,0x00003,0x00000 +#define T_CREDIT 0x00100,0x0B04F,0x8FFFF,0x06A00,0xB7FBF,0x8030F,0x00000 +#define T_DD 0x00400,0x0FBCF,0x8FFFF,0x00800,0xB6FFF,0x8071F,0x00001 +#define T_DEL 0x00002,0x8BBCF,0x8FFFF,0xA7F8F,0xF7FBF,0x00003,0x00000 +#define T_DFN 0x00002,0x8B0CF,0x8FFFF,0x8778F,0xF7FBF,0x00003,0x00000 +#define T_DIR 0x00800,0x0B400,0x0F75F,0x37680,0x36FB7,0x84F7F,0x00000 +#define T_DIV 0x00200,0x8FBCF,0x8FFFF,0xB66A0,0xB7FFF,0x8031F,0x00004 +#define T_DL 0x00800,0x0C480,0x8FFFF,0x36680,0xB7FB7,0x0075F,0x00000 +#define T_DLC 0x00800,0x0C480,0x8FFFF,0x36680,0xB7FB7,0x0075F,0x00000 +#define T_DT 0x00400,0x0B04F,0x0B1FF,0x00800,0x17FFF,0x8071F,0x00001 +#define T_EM 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FAF,0x00003,0x00010 +#define T_EMBED 0x02000,0x8F107,0x8FFF7,0xB6FBF,0xB7FBF,0x1FF7F,0x00001 +#define T_FIELDSET 0x00200,0x8FB4F,0x8FF7F,0x86787,0xB7FF7,0x8805F,0x00000 +#define T_FIG 0x00200,0x0FB00,0x8FFFF,0x36680,0xB6FBF,0x8834F,0x00000 +#define T_FN 0x00200,0x8FBCF,0x8FFFF,0xB6680,0xB7EBF,0x8114F,0x00000 +#define T_FONT 0x00001,0x8B04F,0x8FFFF,0xB778F,0xF7FBF,0x00001,0x00014 +#define T_FORM 0x00080,0x0FF6F,0x0FF7F,0x36E07,0x32F07,0x88DFF,0x00000 +#define T_FRAME 0x10000,0x00000,0x00000,0x10000,0x10000,0x9FFFF,0x00001 +#define T_FRAMESET 0x10000,0x90000,0x90000,0x90000,0x93000,0x9FFFF,0x00000 +#define T_H1 0x00100,0x0B04F,0x0B05F,0x36680,0x37FAF,0x80117,0x00000 +#define T_H2 0x00100,0x0B04F,0x0B05F,0x36680,0x37FAF,0x80117,0x00000 +#define T_H3 0x00100,0x0B04F,0x0B05F,0x36680,0x37FAF,0x80117,0x00000 +#define T_H4 0x00100,0x0B04F,0x0B05F,0x36680,0x37FAF,0x80117,0x00000 +#define T_H5 0x00100,0x0B04F,0x0B05F,0x36680,0x37FAF,0x80117,0x00000 +#define T_H6 0x00100,0x0B04F,0x0B05F,0x36680,0x37FAF,0x80117,0x00000 +#define T_HEAD 0x40000,0x4F000,0x47000,0x10000,0x10000,0x9FF7F,0x00007 +#define T_HR 0x04000,0x00000,0x00000,0x3FE80,0x3FFBF,0x87F37,0x00001 +#define T_HTML 0x10000,0x7FB8F,0x7FFFF,0x00000,0x00000,0x1FFFF,0x00003 +#define T_HY 0x01000,0x00000,0x00000,0x3779F,0x77FBF,0x8101F,0x00001 +#define T_I 0x00001,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00001,0x00014 +#define T_IFRAME 0x02000,0x8FBCF,0x8FFFF,0xB679F,0xB6FBF,0xD315F,0x00000 +#define T_IMG 0x01000,0x00000,0x00000,0x3779F,0x37FBF,0x80000,0x00001 +#define T_INPUT 0x00040,0x00000,0x00000,0x03F87,0x37F87,0x8904F,0x00001 +#define T_INS 0x00002,0x8BBCF,0x8FFFF,0xA7F8F,0xF7FBF,0x00003,0x00000 +#define T_ISINDEX 0x08000,0x00000,0x00000,0x7778F,0x7FFAF,0x80007,0x00001 +#define T_KBD 0x00002,0x00000,0x00000,0x2778F,0x77FBF,0x00003,0x00000 +#define T_KEYGEN 0x00040,0x00000,0x00000,0x07FB7,0x37FB7,0x80070,0x00001 +#define T_LABEL 0x00002,0x0304F,0x0FFFF,0x0679F,0x36FBF,0x00007,0x00000 +#define T_LEGEND 0x00002,0x0B04F,0x8FF7F,0x00200,0xB7FA7,0x00003,0x00000 +#define T_LH 0x00400,0x0BB7F,0x8FFFF,0x00800,0x97FFF,0x8071F,0x00001 +#define T_LI 0x00400,0x0BBFF,0x8FFFF,0x00800,0x97FFF,0x8071F,0x00001 +#define T_LINK 0x08000,0x00000,0x00000,0x50000,0x50000,0x0FF7F,0x00001 +#define T_LISTING 0x00800,0x00000,0x00000,0x36600,0x36F00,0x80F1F,0x00000 +#define T_MAP 0x08000,0x08000,0x08000,0x37FCF,0x37FBF,0x0051F,0x00000 +#define T_MARQUEE 0x04000,0x0000F,0x8F01F,0x37787,0xB7FA7,0x8301C,0x00000 +#define T_MATH 0x00004,0x0B05F,0x8FFFF,0x2778F,0xF7FBF,0x0001F,0x00000 +#define T_MENU 0x00800,0x0B400,0x0F75F,0x17680,0x36FB7,0x88F7F,0x00000 +#define T_META 0x08000,0x00000,0x00000,0x50000,0x50000,0x0FF7F,0x00001 +#define T_NEXTID 0x01000,0x00000,0x00000,0x50000,0x1FFF7,0x00001,0x00001 +#define T_NOFRAMES 0x20000,0x2FB8F,0x0FFFF,0x17000,0x17000,0x0CF5F,0x00000 +#define T_NOTE 0x00200,0x0BBAF,0x8FFFF,0x376B0,0xB7FFF,0x8031F,0x00000 +#define T_OBJECT 0x02000,0x8FBCF,0x8FFFF,0xB679F,0xB6FBF,0x83D5F,0x00020 +#define T_OL 0x00800,0x0C400,0x8FFFF,0x37680,0xB7FB7,0x88F7F,0x00000 +#define T_OPTION 0x08000,0x00000,0x00000,0x00040,0x37FFF,0x8031F,0x00001 +#define T_OVERLAY 0x04000,0x00000,0x00000,0x00200,0x37FBF,0x83F7F,0x00001 +#define T_P 0x00100,0x0B04F,0x8FFFF,0x36680,0xB6FA7,0x80117,0x00001 +#define T_PARAM 0x01000,0x00000,0x00000,0x33500,0x37FFF,0x81560,0x00001 +#define T_PLAINTEXT 0x10000,0xFFFFF,0xFFFFF,0x90000,0x90000,0x3FFFF,0x00001 +#define T_PRE 0x00200,0x0F04F,0x0F05E,0x36680,0x36FF0,0x8071E,0x00000 +#define T_Q 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FAF,0x00003,0x00000 +#define T_S 0x00001,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00001,0x00000 +#define T_SAMP 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00002,0x00010 +#define T_SCRIPT 0x02000,0x00000,0x00000,0x77F9F,0x77FFF,0x87D5F,0x00000 +#define T_SELECT 0x00040,0x08000,0x08000,0x03FAF,0x33FBF,0x80D5F,0x00008 +#define T_SHY 0x01000,0x00000,0x00000,0x3779F,0x77FBF,0x8101F,0x00001 +#define T_SMALL 0x00001,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00001,0x00014 +#define T_SPAN 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x80003,0x00000 +#define T_SPOT 0x00008,0x00000,0x00000,0x3FFF7,0x3FFF7,0x00008,0x00001 +#define T_STRIKE 0x00001,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00001,0x00000 +#define T_STRONG 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FAF,0x00003,0x00010 +#define T_STYLE 0x40000,0x00000,0x00000,0x7638F,0x76FAF,0x8001F,0x00000 +#define T_SUB 0x00004,0x8B05F,0x8FFFF,0x8779F,0xF7FBF,0x00007,0x00000 +#define T_SUP 0x00004,0x8B05F,0x8FFFF,0x8779F,0xF7FBF,0x00007,0x00000 +#define T_TAB 0x01000,0x00000,0x00000,0x3778F,0x57FAF,0x00001,0x00001 +#define T_TABLE 0x00800,0x0F1E0,0x8FFFF,0x36680,0xB6FA7,0x8C57F,0x00000 +#define T_TBODY 0x00020,0x00020,0x8FFFF,0x00880,0xB7FB7,0x8C75F,0x00003 +#define T_TD 0x00400,0x0FBCF,0x8FFFF,0x00020,0xB7FB7,0x8C75F,0x00001 +#define T_TEXTAREA 0x00040,0x00000,0x00000,0x07F8F,0x33FBF,0x80D5F,0x00040 +#define T_TEXTFLOW 0x20000,0x8FBFF,0x9FFFF,0x977B0,0xB7FB7,0x9B00F,0x00003 +#define T_TFOOT 0x00020,0x00020,0x8FFFF,0x00800,0xB7FB7,0x8CF5F,0x00001 +#define T_TH 0x00400,0x0FBCF,0x0FFFF,0x00020,0xB7FB7,0x8CF5F,0x00001 +#define T_THEAD 0x00020,0x00020,0x8FFFF,0x00800,0xB7FB7,0x8CF5F,0x00001 +#define T_TITLE 0x40000,0x00000,0x00000,0x50000,0x50000,0x0031F,0x0000C +#define T_TR 0x00020,0x00400,0x8FFFF,0x00820,0xB7FB7,0x8C75F,0x00001 +#define T_TT 0x00001,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00001,0x00010 +#define T_U 0x00001,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00001,0x00014 +#define T_UL 0x00800,0x0C480,0x8FFFF,0x36680,0xB7FFF,0x8075F,0x00000 +#define T_VAR 0x00002,0x8B04F,0x8FFFF,0xA778F,0xF7FBF,0x00001,0x00000 +#define T_WBR 0x00001,0x00000,0x00000,0x3778F,0x77FBF,0x8101F,0x00001 +#define T_XMP 0x00800,0x00000,0x00000,0x367E0,0x36FFF,0x0875F,0x00001 +#define T_OBJECT_PCDATA 0x02000,0x8FBCF,0x8FFFF,0xB679F,0xB6FBF,0x83D5F,0x00008 +#define T__UNREC_ 0x00000,0x00000,0x00000,0x00000,0x00000,0x00000,0x00000 +#ifdef USE_PRETTYSRC +# define N HTMLA_NORMAL +# define i HTMLA_ANAME +# define h HTMLA_HREF +# define c HTMLA_CLASS +# define x HTMLA_AUXCLASS +# define T(t) , t +#else +# define T(t) /*nothing */ +#endif +/* *INDENT-OFF* */ + +#define ATTR_TYPE(name) #name, name##_attr_list + +/* generic attributes, used in different tags */ +static const attr core_attr_list[] = { + { "CLASS" T(c) }, + { "ID" T(i) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType core_attr_type[] = { + { ATTR_TYPE(core) }, + { 0, 0 }, +}; + +static const attr i18n_attr_list[] = { + { "DIR" T(N) }, + { "LANG" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType i18n_attr_type[] = { + { ATTR_TYPE(i18n) }, + { 0, 0 }, +}; + +static const attr events_attr_list[] = { + { "ONCLICK" T(N) }, + { "ONDBLCLICK" T(N) }, + { "ONKEYDOWN" T(N) }, + { "ONKEYPRESS" T(N) }, + { "ONKEYUP" T(N) }, + { "ONMOUSEDOWN" T(N) }, + { "ONMOUSEMOVE" T(N) }, + { "ONMOUSEOUT" T(N) }, + { "ONMOUSEOVER" T(N) }, + { "ONMOUSEUP" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType events_attr_type[] = { + { ATTR_TYPE(events) }, + { 0, 0 }, +}; + +static const attr align_attr_list[] = { + { "ALIGN" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType align_attr_type[] = { + { ATTR_TYPE(align) }, + { 0, 0 }, +}; + +static const attr cellalign_attr_list[] = { + { "ALIGN" T(N) }, + { "CHAR" T(N) }, + { "CHAROFF" T(N) }, + { "VALIGN" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType cellalign_attr_type[] = { + { ATTR_TYPE(cellalign) }, + { 0, 0 }, +}; + +static const attr bgcolor_attr_list[] = { + { "BGCOLOR" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType bgcolor_attr_type[] = { + { ATTR_TYPE(bgcolor) }, + { 0, 0 }, +}; + + +/* tables defining attributes per-tag in terms of generic attributes (editable) */ +static const attr A_attr_list[] = { + { "ACCESSKEY" T(N) }, + { "CHARSET" T(N) }, + { "CLEAR" T(N) }, + { "COORDS" T(N) }, + { "HREF" T(h) }, + { "HREFLANG" T(N) }, + { "ISMAP" T(N) }, + { "MD" T(N) }, + { "NAME" T(i) }, + { "NOTAB" T(N) }, + { "ONBLUR" T(N) }, + { "ONFOCUS" T(N) }, + { "REL" T(N) }, + { "REV" T(N) }, + { "SHAPE" T(N) }, + { "TABINDEX" T(N) }, + { "TARGET" T(N) }, + { "TYPE" T(N) }, + { "URN" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType A_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(A) }, + { 0, 0 }, +}; + +static const attr ADDRESS_attr_list[] = { + { "CLEAR" T(N) }, + { "NOWRAP" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType ADDRESS_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(ADDRESS) }, + { 0, 0 }, +}; + +static const attr APPLET_attr_list[] = { + { "ALT" T(N) }, + { "CLEAR" T(N) }, + { "CODE" T(N) }, + { "CODEBASE" T(h) }, + { "DOWNLOAD" T(N) }, + { "HEIGHT" T(N) }, + { "HSPACE" T(N) }, + { "NAME" T(i) }, + { "VSPACE" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType APPLET_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(APPLET) }, + { 0, 0 }, +}; + +static const attr AREA_attr_list[] = { + { "ACCESSKEY" T(N) }, + { "ALT" T(N) }, + { "CLEAR" T(N) }, + { "COORDS" T(N) }, + { "HREF" T(h) }, + { "NOHREF" T(N) }, + { "NOTAB" T(N) }, + { "ONBLUR" T(N) }, + { "ONFOCUS" T(N) }, + { "SHAPE" T(N) }, + { "TABINDEX" T(N) }, + { "TARGET" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType AREA_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(AREA) }, + { 0, 0 }, +}; + +static const attr BASE_attr_list[] = { + { "HREF" T(h) }, + { "TARGET" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType BASE_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(BASE) }, + { 0, 0 }, +}; + +static const attr BGSOUND_attr_list[] = { + { "CLEAR" T(N) }, + { "LOOP" T(N) }, + { "SRC" T(h) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType BGSOUND_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(BGSOUND) }, + { 0, 0 }, +}; + +static const attr BODY_attr_list[] = { + { "ALINK" T(N) }, + { "BACKGROUND" T(h) }, + { "CLEAR" T(N) }, + { "LINK" T(N) }, + { "ONLOAD" T(N) }, + { "ONUNLOAD" T(N) }, + { "TEXT" T(N) }, + { "VLINK" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType BODY_attr_type[] = { + { ATTR_TYPE(bgcolor) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(BODY) }, + { 0, 0 }, +}; + +static const attr BODYTEXT_attr_list[] = { + { "CLEAR" T(N) }, + { "DATA" T(N) }, + { "NAME" T(N) }, + { "OBJECT" T(N) }, + { "REF" T(N) }, + { "TYPE" T(N) }, + { "VALUE" T(N) }, + { "VALUETYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType BODYTEXT_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(BODYTEXT) }, + { 0, 0 }, +}; + +static const attr BQ_attr_list[] = { + { "CITE" T(h) }, + { "CLEAR" T(N) }, + { "NOWRAP" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType BQ_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(BQ) }, + { 0, 0 }, +}; + +static const attr BUTTON_attr_list[] = { + { "ACCESSKEY" T(N) }, + { "CLEAR" T(N) }, + { "DISABLED" T(N) }, + { "NAME" T(N) }, + { "ONBLUR" T(N) }, + { "ONFOCUS" T(N) }, + { "READONLY" T(N) }, + { "TABINDEX" T(N) }, + { "TYPE" T(N) }, + { "VALUE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType BUTTON_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(BUTTON) }, + { 0, 0 }, +}; + +static const attr CAPTION_attr_list[] = { + { "ACCESSKEY" T(N) }, + { "CLEAR" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType CAPTION_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(CAPTION) }, + { 0, 0 }, +}; + +static const attr COL_attr_list[] = { + { "CLEAR" T(N) }, + { "SPAN" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType COL_attr_type[] = { + { ATTR_TYPE(cellalign) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(COL) }, + { 0, 0 }, +}; + +static const attr DEL_attr_list[] = { + { "CITE" T(N) }, + { "DATETIME" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType DEL_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(DEL) }, + { 0, 0 }, +}; + +static const attr DIV_attr_list[] = { + { "CLEAR" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType DIV_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(DIV) }, + { 0, 0 }, +}; + +static const attr DL_attr_list[] = { + { "CLEAR" T(N) }, + { "COMPACT" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType DL_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(DL) }, + { 0, 0 }, +}; + +static const attr EMBED_attr_list[] = { + { "ALT" T(N) }, + { "BORDER" T(N) }, + { "CLEAR" T(N) }, + { "HEIGHT" T(N) }, + { "IMAGEMAP" T(N) }, + { "ISMAP" T(N) }, + { "MD" T(N) }, + { "NAME" T(i) }, + { "NOFLOW" T(N) }, + { "PARAMS" T(N) }, + { "SRC" T(h) }, + { "UNITS" T(N) }, + { "USEMAP" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType EMBED_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(EMBED) }, + { 0, 0 }, +}; + +static const attr FIG_attr_list[] = { + { "BORDER" T(N) }, + { "CLEAR" T(N) }, + { "HEIGHT" T(N) }, + { "IMAGEMAP" T(N) }, + { "ISOBJECT" T(N) }, + { "MD" T(N) }, + { "NOFLOW" T(N) }, + { "SRC" T(h) }, + { "UNITS" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType FIG_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(FIG) }, + { 0, 0 }, +}; + +static const attr FONT_attr_list[] = { + { "CLEAR" T(N) }, + { "COLOR" T(N) }, + { "END" T(N) }, + { "FACE" T(N) }, + { "SIZE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType FONT_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(FONT) }, + { 0, 0 }, +}; + +static const attr FORM_attr_list[] = { + { "ACCEPT" T(N) }, + { "ACCEPT-CHARSET" T(N) }, + { "ACTION" T(h) }, + { "CLEAR" T(N) }, + { "ENCTYPE" T(N) }, + { "METHOD" T(N) }, + { "ONRESET" T(N) }, + { "ONSUBMIT" T(N) }, + { "SCRIPT" T(N) }, + { "SUBJECT" T(N) }, + { "TARGET" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType FORM_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(FORM) }, + { 0, 0 }, +}; + +static const attr FRAME_attr_list[] = { + { "FRAMEBORDER" T(N) }, + { "LONGDESC" T(h) }, + { "MARGINHEIGHT" T(N) }, + { "MARGINWIDTH" T(N) }, + { "NAME" T(N) }, + { "NORESIZE" T(N) }, + { "SCROLLING" T(N) }, + { "SRC" T(h) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType FRAME_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(FRAME) }, + { 0, 0 }, +}; + +static const attr FRAMESET_attr_list[] = { + { "COLS" T(N) }, + { "ONLOAD" T(N) }, + { "ONUNLOAD" T(N) }, + { "ROWS" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType FRAMESET_attr_type[] = { + { ATTR_TYPE(FRAMESET) }, + { 0, 0 }, +}; + +static const attr GEN_attr_list[] = { + { "CLEAR" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType GEN_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(GEN) }, + { 0, 0 }, +}; + +static const attr H_attr_list[] = { + { "CLEAR" T(N) }, + { "DINGBAT" T(N) }, + { "MD" T(N) }, + { "NOWRAP" T(N) }, + { "SEQNUM" T(N) }, + { "SKIP" T(N) }, + { "SRC" T(h) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType H_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(H) }, + { 0, 0 }, +}; + +static const attr HR_attr_list[] = { + { "CLEAR" T(N) }, + { "MD" T(N) }, + { "NOSHADE" T(N) }, + { "SIZE" T(N) }, + { "SRC" T(h) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType HR_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(HR) }, + { 0, 0 }, +}; + +static const attr IFRAME_attr_list[] = { + { "FRAMEBORDER" T(N) }, + { "HEIGHT" T(N) }, + { "LONGDESC" T(h) }, + { "MARGINHEIGHT" T(N) }, + { "MARGINWIDTH" T(N) }, + { "NAME" T(N) }, + { "SCROLLING" T(N) }, + { "SRC" T(h) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType IFRAME_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(IFRAME) }, + { 0, 0 }, +}; + +static const attr IMG_attr_list[] = { + { "ALT" T(N) }, + { "BORDER" T(N) }, + { "CLEAR" T(N) }, + { "HEIGHT" T(N) }, + { "HSPACE" T(N) }, + { "ISMAP" T(N) }, + { "ISOBJECT" T(N) }, + { "LONGDESC" T(h) }, + { "MD" T(N) }, + { "NAME" T(N) }, + { "SRC" T(h) }, + { "UNITS" T(N) }, + { "USEMAP" T(h) }, + { "VSPACE" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType IMG_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(IMG) }, + { 0, 0 }, +}; + +static const attr INPUT_attr_list[] = { + { "ACCEPT" T(N) }, + { "ACCEPT-CHARSET" T(N) }, + { "ACCESSKEY" T(N) }, + { "ALT" T(N) }, + { "CHECKED" T(N) }, + { "CLEAR" T(N) }, + { "DISABLED" T(N) }, + { "ERROR" T(N) }, + { "HEIGHT" T(N) }, + { "ISMAP" T(N) }, + { "MAX" T(N) }, + { "MAXLENGTH" T(N) }, + { "MD" T(N) }, + { "MIN" T(N) }, + { "NAME" T(N) }, + { "NOTAB" T(N) }, + { "ONBLUR" T(N) }, + { "ONCHANGE" T(N) }, + { "ONFOCUS" T(N) }, + { "ONSELECT" T(N) }, + { "READONLY" T(N) }, + { "SIZE" T(N) }, + { "SRC" T(h) }, + { "TABINDEX" T(N) }, + { "TYPE" T(N) }, + { "USEMAP" T(N) }, + { "VALUE" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType INPUT_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(INPUT) }, + { 0, 0 }, +}; + +static const attr ISINDEX_attr_list[] = { + { "ACTION" T(h) }, + { "HREF" T(h) }, + { "PROMPT" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType ISINDEX_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(ISINDEX) }, + { 0, 0 }, +}; + +static const attr KEYGEN_attr_list[] = { + { "CHALLENGE" T(N) }, + { "NAME" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType KEYGEN_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(KEYGEN) }, + { 0, 0 }, +}; + +static const attr LABEL_attr_list[] = { + { "ACCESSKEY" T(N) }, + { "CLEAR" T(N) }, + { "FOR" T(N) }, + { "ONBLUR" T(N) }, + { "ONFOCUS" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType LABEL_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(LABEL) }, + { 0, 0 }, +}; + +static const attr LI_attr_list[] = { + { "CLEAR" T(N) }, + { "DINGBAT" T(N) }, + { "MD" T(N) }, + { "SKIP" T(N) }, + { "SRC" T(h) }, + { "TYPE" T(N) }, + { "VALUE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType LI_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(LI) }, + { 0, 0 }, +}; + +static const attr LINK_attr_list[] = { + { "CHARSET" T(N) }, + { "HREF" T(h) }, + { "HREFLANG" T(N) }, + { "MEDIA" T(N) }, + { "REL" T(N) }, + { "REV" T(N) }, + { "TARGET" T(N) }, + { "TYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType LINK_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(LINK) }, + { 0, 0 }, +}; + +static const attr MAP_attr_list[] = { + { "CLEAR" T(N) }, + { "NAME" T(i) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType MAP_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(MAP) }, + { 0, 0 }, +}; + +static const attr MATH_attr_list[] = { + { "BOX" T(N) }, + { "CLEAR" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType MATH_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(MATH) }, + { 0, 0 }, +}; + +static const attr META_attr_list[] = { + { "CHARSET" T(N) }, + { "CONTENT" T(N) }, + { "HTTP-EQUIV" T(N) }, + { "NAME" T(N) }, + { "SCHEME" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType META_attr_type[] = { + { ATTR_TYPE(META) }, + { 0, 0 }, +}; + +static const attr NEXTID_attr_list[] = { + { "N" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType NEXTID_attr_type[] = { + { ATTR_TYPE(NEXTID) }, + { 0, 0 }, +}; + +static const attr NOTE_attr_list[] = { + { "CLEAR" T(N) }, + { "MD" T(N) }, + { "ROLE" T(x) }, + { "SRC" T(h) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType NOTE_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(NOTE) }, + { 0, 0 }, +}; + +static const attr OBJECT_attr_list[] = { + { "ARCHIVE" T(N) }, + { "BORDER" T(N) }, + { "CLASSID" T(h) }, + { "CODEBASE" T(h) }, + { "CODETYPE" T(N) }, + { "DATA" T(h) }, + { "DECLARE" T(N) }, + { "HEIGHT" T(N) }, + { "HSPACE" T(N) }, + { "ISMAP" T(N) }, + { "NAME" T(N) }, + { "NOTAB" T(N) }, + { "SHAPES" T(N) }, + { "STANDBY" T(N) }, + { "TABINDEX" T(N) }, + { "TYPE" T(N) }, + { "USEMAP" T(h) }, + { "VSPACE" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType OBJECT_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(OBJECT) }, + { 0, 0 }, +}; + +static const attr OL_attr_list[] = { + { "CLEAR" T(N) }, + { "COMPACT" T(N) }, + { "CONTINUE" T(N) }, + { "SEQNUM" T(N) }, + { "START" T(N) }, + { "TYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType OL_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(OL) }, + { 0, 0 }, +}; + +static const attr OPTION_attr_list[] = { + { "CLEAR" T(N) }, + { "DISABLED" T(N) }, + { "ERROR" T(N) }, + { "LABEL" T(N) }, + { "SELECTED" T(N) }, + { "SHAPE" T(N) }, + { "VALUE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType OPTION_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(OPTION) }, + { 0, 0 }, +}; + +static const attr OVERLAY_attr_list[] = { + { "HEIGHT" T(N) }, + { "IMAGEMAP" T(N) }, + { "MD" T(N) }, + { "SRC" T(h) }, + { "UNITS" T(N) }, + { "WIDTH" T(N) }, + { "X" T(N) }, + { "Y" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType OVERLAY_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(OVERLAY) }, + { 0, 0 }, +}; + +static const attr P_attr_list[] = { + { "CLEAR" T(N) }, + { "NOWRAP" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType P_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(P) }, + { 0, 0 }, +}; + +static const attr PARAM_attr_list[] = { + { "ACCEPT" T(N) }, + { "ACCEPT-CHARSET" T(N) }, + { "ACCEPT-ENCODING" T(N) }, + { "CLEAR" T(N) }, + { "DATA" T(N) }, + { "NAME" T(N) }, + { "OBJECT" T(N) }, + { "REF" T(N) }, + { "TYPE" T(N) }, + { "VALUE" T(N) }, + { "VALUEREF" T(N) }, + { "VALUETYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType PARAM_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(PARAM) }, + { 0, 0 }, +}; + +static const attr Q_attr_list[] = { + { "CITE" T(h) }, + { "CLEAR" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType Q_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(Q) }, + { 0, 0 }, +}; + +static const attr SCRIPT_attr_list[] = { + { "CHARSET" T(N) }, + { "CLEAR" T(N) }, + { "DEFER" T(N) }, + { "EVENT" T(N) }, + { "FOR" T(N) }, + { "LANGUAGE" T(N) }, + { "NAME" T(N) }, + { "SCRIPTENGINE" T(N) }, + { "SRC" T(h) }, + { "TYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType SCRIPT_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(SCRIPT) }, + { 0, 0 }, +}; + +static const attr SELECT_attr_list[] = { + { "CLEAR" T(N) }, + { "DISABLED" T(N) }, + { "ERROR" T(N) }, + { "HEIGHT" T(N) }, + { "MD" T(N) }, + { "MULTIPLE" T(N) }, + { "NAME" T(N) }, + { "NOTAB" T(N) }, + { "ONBLUR" T(N) }, + { "ONCHANGE" T(N) }, + { "ONFOCUS" T(N) }, + { "SIZE" T(N) }, + { "TABINDEX" T(N) }, + { "UNITS" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType SELECT_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(SELECT) }, + { 0, 0 }, +}; + +static const attr STYLE_attr_list[] = { + { "MEDIA" T(N) }, + { "NOTATION" T(N) }, + { "TYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType STYLE_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(STYLE) }, + { 0, 0 }, +}; + +static const attr TAB_attr_list[] = { + { "CLEAR" T(N) }, + { "DP" T(N) }, + { "INDENT" T(N) }, + { "TO" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType TAB_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(TAB) }, + { 0, 0 }, +}; + +static const attr TABLE_attr_list[] = { + { "BACKGROUND" T(h) }, + { "BORDER" T(N) }, + { "CELLPADDING" T(N) }, + { "CELLSPACING" T(N) }, + { "CLEAR" T(N) }, + { "COLS" T(N) }, + { "COLSPEC" T(N) }, + { "DP" T(N) }, + { "FRAME" T(N) }, + { "NOFLOW" T(N) }, + { "NOWRAP" T(N) }, + { "RULES" T(N) }, + { "SUMMARY" T(N) }, + { "UNITS" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType TABLE_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(TABLE) }, + { 0, 0 }, +}; + +static const attr TD_attr_list[] = { + { "ABBR" T(N) }, + { "AXES" T(N) }, + { "AXIS" T(N) }, + { "BACKGROUND" T(h) }, + { "CLEAR" T(N) }, + { "COLSPAN" T(N) }, + { "DP" T(N) }, + { "HEADERS" T(N) }, + { "HEIGHT" T(N) }, + { "NOWRAP" T(N) }, + { "ROWSPAN" T(N) }, + { "SCOPE" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType TD_attr_type[] = { + { ATTR_TYPE(cellalign) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(TD) }, + { 0, 0 }, +}; + +static const attr TEXTAREA_attr_list[] = { + { "ACCEPT-CHARSET" T(N) }, + { "ACCESSKEY" T(N) }, + { "CLEAR" T(N) }, + { "COLS" T(N) }, + { "DISABLED" T(N) }, + { "ERROR" T(N) }, + { "NAME" T(N) }, + { "NOTAB" T(N) }, + { "ONBLUR" T(N) }, + { "ONCHANGE" T(N) }, + { "ONFOCUS" T(N) }, + { "ONSELECT" T(N) }, + { "READONLY" T(N) }, + { "ROWS" T(N) }, + { "TABINDEX" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType TEXTAREA_attr_type[] = { + { ATTR_TYPE(align) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(TEXTAREA) }, + { 0, 0 }, +}; + +static const attr TR_attr_list[] = { + { "CLEAR" T(N) }, + { "DP" T(N) }, + { "NOWRAP" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType TR_attr_type[] = { + { ATTR_TYPE(cellalign) }, + { ATTR_TYPE(core) }, + { ATTR_TYPE(events) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(TR) }, + { 0, 0 }, +}; + +static const attr UL_attr_list[] = { + { "CLEAR" T(N) }, + { "COMPACT" T(N) }, + { "DINGBAT" T(N) }, + { "MD" T(N) }, + { "PLAIN" T(N) }, + { "SRC" T(h) }, + { "TYPE" T(N) }, + { "WRAP" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const AttrType UL_attr_type[] = { + { ATTR_TYPE(core) }, + { ATTR_TYPE(i18n) }, + { ATTR_TYPE(UL) }, + { 0, 0 }, +}; + + +/* attribute lists for the runtime (generated by dtd_util) */ +static const attr A_attr[] = { /* A attributes */ + { "ACCESSKEY" T(N) }, + { "CHARSET" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "COORDS" T(N) }, + { "DIR" T(N) }, + { "HREF" T(h) }, + { "HREFLANG" T(N) }, + { "ID" T(i) }, + { "ISMAP" T(N) }, + { "LANG" T(N) }, + { "MD" T(N) }, + { "NAME" T(i) }, + { "NOTAB" T(N) }, + { "ONBLUR" T(N) }, + { "ONFOCUS" T(N) }, + { "REL" T(N) }, + { "REV" T(N) }, + { "SHAPE" T(N) }, + { "STYLE" T(N) }, + { "TABINDEX" T(N) }, + { "TARGET" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { "URN" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr ADDRESS_attr[] = { /* ADDRESS attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NOWRAP" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr APPLET_attr[] = { /* APPLET attributes */ + { "ALIGN" T(N) }, + { "ALT" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "CODE" T(N) }, + { "CODEBASE" T(h) }, + { "DIR" T(N) }, + { "DOWNLOAD" T(N) }, + { "HEIGHT" T(N) }, + { "HSPACE" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NAME" T(i) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "VSPACE" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr AREA_attr[] = { /* AREA attributes */ + { "ACCESSKEY" T(N) }, + { "ALT" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "COORDS" T(N) }, + { "DIR" T(N) }, + { "HREF" T(h) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NOHREF" T(N) }, + { "NOTAB" T(N) }, + { "ONBLUR" T(N) }, + { "ONFOCUS" T(N) }, + { "SHAPE" T(N) }, + { "STYLE" T(N) }, + { "TABINDEX" T(N) }, + { "TARGET" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr BASE_attr[] = { /* BASE attributes */ + { "CLASS" T(c) }, + { "HREF" T(h) }, + { "ID" T(i) }, + { "STYLE" T(N) }, + { "TARGET" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr BGSOUND_attr[] = { /* BGSOUND attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "LOOP" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr BODY_attr[] = { /* BODY attributes */ + { "ALINK" T(N) }, + { "BACKGROUND" T(h) }, + { "BGCOLOR" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "LINK" T(N) }, + { "ONLOAD" T(N) }, + { "ONUNLOAD" T(N) }, + { "STYLE" T(N) }, + { "TEXT" T(N) }, + { "TITLE" T(N) }, + { "VLINK" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr BODYTEXT_attr[] = { /* BODYTEXT attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DATA" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NAME" T(N) }, + { "OBJECT" T(N) }, + { "REF" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { "VALUE" T(N) }, + { "VALUETYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr BQ_attr[] = { /* BLOCKQUOTE attributes */ + { "CITE" T(h) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NOWRAP" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr BUTTON_attr[] = { /* BUTTON attributes */ + { "ACCESSKEY" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "DISABLED" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NAME" T(N) }, + { "ONBLUR" T(N) }, + { "ONFOCUS" T(N) }, + { "READONLY" T(N) }, + { "STYLE" T(N) }, + { "TABINDEX" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { "VALUE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr CAPTION_attr[] = { /* CAPTION attributes */ + { "ACCESSKEY" T(N) }, + { "ALIGN" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr COL_attr[] = { /* COL attributes */ + { "ALIGN" T(N) }, + { "CHAR" T(N) }, + { "CHAROFF" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "SPAN" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "VALIGN" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr DEL_attr[] = { /* DEL attributes */ + { "CITE" T(N) }, + { "CLASS" T(c) }, + { "DATETIME" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr DIV_attr[] = { /* CENTER attributes */ + { "ALIGN" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr DL_attr[] = { /* DL attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "COMPACT" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr EMBED_attr[] = { /* EMBED attributes */ + { "ALIGN" T(N) }, + { "ALT" T(N) }, + { "BORDER" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "HEIGHT" T(N) }, + { "ID" T(i) }, + { "IMAGEMAP" T(N) }, + { "ISMAP" T(N) }, + { "LANG" T(N) }, + { "MD" T(N) }, + { "NAME" T(i) }, + { "NOFLOW" T(N) }, + { "PARAMS" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "UNITS" T(N) }, + { "USEMAP" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr FIG_attr[] = { /* FIG attributes */ + { "ALIGN" T(N) }, + { "BORDER" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "HEIGHT" T(N) }, + { "ID" T(i) }, + { "IMAGEMAP" T(N) }, + { "ISOBJECT" T(N) }, + { "LANG" T(N) }, + { "MD" T(N) }, + { "NOFLOW" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "UNITS" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr FONT_attr[] = { /* BASEFONT attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "COLOR" T(N) }, + { "DIR" T(N) }, + { "END" T(N) }, + { "FACE" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "SIZE" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr FORM_attr[] = { /* FORM attributes */ + { "ACCEPT" T(N) }, + { "ACCEPT-CHARSET" T(N) }, + { "ACTION" T(h) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ENCTYPE" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "METHOD" T(N) }, + { "ONRESET" T(N) }, + { "ONSUBMIT" T(N) }, + { "SCRIPT" T(N) }, + { "STYLE" T(N) }, + { "SUBJECT" T(N) }, + { "TARGET" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr FRAME_attr[] = { /* FRAME attributes */ + { "CLASS" T(c) }, + { "FRAMEBORDER" T(N) }, + { "ID" T(i) }, + { "LONGDESC" T(h) }, + { "MARGINHEIGHT" T(N) }, + { "MARGINWIDTH" T(N) }, + { "NAME" T(N) }, + { "NORESIZE" T(N) }, + { "SCROLLING" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr FRAMESET_attr[] = { /* FRAMESET attributes */ + { "COLS" T(N) }, + { "ONLOAD" T(N) }, + { "ONUNLOAD" T(N) }, + { "ROWS" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr GEN_attr[] = { /* ABBR attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr H_attr[] = { /* H1 attributes */ + { "ALIGN" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DINGBAT" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "MD" T(N) }, + { "NOWRAP" T(N) }, + { "SEQNUM" T(N) }, + { "SKIP" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr HR_attr[] = { /* HR attributes */ + { "ALIGN" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "MD" T(N) }, + { "NOSHADE" T(N) }, + { "SIZE" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr IFRAME_attr[] = { /* IFRAME attributes */ + { "ALIGN" T(N) }, + { "CLASS" T(c) }, + { "FRAMEBORDER" T(N) }, + { "HEIGHT" T(N) }, + { "ID" T(i) }, + { "LONGDESC" T(h) }, + { "MARGINHEIGHT" T(N) }, + { "MARGINWIDTH" T(N) }, + { "NAME" T(N) }, + { "SCROLLING" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr IMG_attr[] = { /* IMG attributes */ + { "ALIGN" T(N) }, + { "ALT" T(N) }, + { "BORDER" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "HEIGHT" T(N) }, + { "HSPACE" T(N) }, + { "ID" T(i) }, + { "ISMAP" T(N) }, + { "ISOBJECT" T(N) }, + { "LANG" T(N) }, + { "LONGDESC" T(h) }, + { "MD" T(N) }, + { "NAME" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "UNITS" T(N) }, + { "USEMAP" T(h) }, + { "VSPACE" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr INPUT_attr[] = { /* INPUT attributes */ + { "ACCEPT" T(N) }, + { "ACCEPT-CHARSET" T(N) }, + { "ACCESSKEY" T(N) }, + { "ALIGN" T(N) }, + { "ALT" T(N) }, + { "CHECKED" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "DISABLED" T(N) }, + { "ERROR" T(N) }, + { "HEIGHT" T(N) }, + { "ID" T(i) }, + { "ISMAP" T(N) }, + { "LANG" T(N) }, + { "MAX" T(N) }, + { "MAXLENGTH" T(N) }, + { "MD" T(N) }, + { "MIN" T(N) }, + { "NAME" T(N) }, + { "NOTAB" T(N) }, + { "ONBLUR" T(N) }, + { "ONCHANGE" T(N) }, + { "ONFOCUS" T(N) }, + { "ONSELECT" T(N) }, + { "READONLY" T(N) }, + { "SIZE" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TABINDEX" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { "USEMAP" T(N) }, + { "VALUE" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr ISINDEX_attr[] = { /* ISINDEX attributes */ + { "ACTION" T(h) }, + { "CLASS" T(c) }, + { "DIR" T(N) }, + { "HREF" T(h) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "PROMPT" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr KEYGEN_attr[] = { /* KEYGEN attributes */ + { "CHALLENGE" T(N) }, + { "CLASS" T(c) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NAME" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr LABEL_attr[] = { /* LABEL attributes */ + { "ACCESSKEY" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "FOR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "ONBLUR" T(N) }, + { "ONFOCUS" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr LI_attr[] = { /* LI attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DINGBAT" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "MD" T(N) }, + { "SKIP" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { "VALUE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr LINK_attr[] = { /* LINK attributes */ + { "CHARSET" T(N) }, + { "CLASS" T(c) }, + { "DIR" T(N) }, + { "HREF" T(h) }, + { "HREFLANG" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "MEDIA" T(N) }, + { "REL" T(N) }, + { "REV" T(N) }, + { "STYLE" T(N) }, + { "TARGET" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr MAP_attr[] = { /* MAP attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NAME" T(i) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr MATH_attr[] = { /* MATH attributes */ + { "BOX" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr META_attr[] = { /* META attributes */ + { "CHARSET" T(N) }, + { "CONTENT" T(N) }, + { "HTTP-EQUIV" T(N) }, + { "NAME" T(N) }, + { "SCHEME" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr NEXTID_attr[] = { /* NEXTID attributes */ + { "N" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr NOTE_attr[] = { /* NOTE attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "MD" T(N) }, + { "ROLE" T(x) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr OBJECT_attr[] = { /* OBJECT attributes */ + { "ALIGN" T(N) }, + { "ARCHIVE" T(N) }, + { "BORDER" T(N) }, + { "CLASS" T(c) }, + { "CLASSID" T(h) }, + { "CODEBASE" T(h) }, + { "CODETYPE" T(N) }, + { "DATA" T(h) }, + { "DECLARE" T(N) }, + { "DIR" T(N) }, + { "HEIGHT" T(N) }, + { "HSPACE" T(N) }, + { "ID" T(i) }, + { "ISMAP" T(N) }, + { "LANG" T(N) }, + { "NAME" T(N) }, + { "NOTAB" T(N) }, + { "SHAPES" T(N) }, + { "STANDBY" T(N) }, + { "STYLE" T(N) }, + { "TABINDEX" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { "USEMAP" T(h) }, + { "VSPACE" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr OL_attr[] = { /* OL attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "COMPACT" T(N) }, + { "CONTINUE" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "SEQNUM" T(N) }, + { "START" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr OPTION_attr[] = { /* OPTION attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "DISABLED" T(N) }, + { "ERROR" T(N) }, + { "ID" T(i) }, + { "LABEL" T(N) }, + { "LANG" T(N) }, + { "SELECTED" T(N) }, + { "SHAPE" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "VALUE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr OVERLAY_attr[] = { /* OVERLAY attributes */ + { "CLASS" T(c) }, + { "HEIGHT" T(N) }, + { "ID" T(i) }, + { "IMAGEMAP" T(N) }, + { "MD" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "UNITS" T(N) }, + { "WIDTH" T(N) }, + { "X" T(N) }, + { "Y" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr P_attr[] = { /* P attributes */ + { "ALIGN" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NOWRAP" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr PARAM_attr[] = { /* PARAM attributes */ + { "ACCEPT" T(N) }, + { "ACCEPT-CHARSET" T(N) }, + { "ACCEPT-ENCODING" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DATA" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NAME" T(N) }, + { "OBJECT" T(N) }, + { "REF" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { "VALUE" T(N) }, + { "VALUEREF" T(N) }, + { "VALUETYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr Q_attr[] = { /* Q attributes */ + { "CITE" T(h) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr SCRIPT_attr[] = { /* SCRIPT attributes */ + { "CHARSET" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DEFER" T(N) }, + { "DIR" T(N) }, + { "EVENT" T(N) }, + { "FOR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "LANGUAGE" T(N) }, + { "NAME" T(N) }, + { "SCRIPTENGINE" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr SELECT_attr[] = { /* SELECT attributes */ + { "ALIGN" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "DISABLED" T(N) }, + { "ERROR" T(N) }, + { "HEIGHT" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "MD" T(N) }, + { "MULTIPLE" T(N) }, + { "NAME" T(N) }, + { "NOTAB" T(N) }, + { "ONBLUR" T(N) }, + { "ONCHANGE" T(N) }, + { "ONFOCUS" T(N) }, + { "SIZE" T(N) }, + { "STYLE" T(N) }, + { "TABINDEX" T(N) }, + { "TITLE" T(N) }, + { "UNITS" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr STYLE_attr[] = { /* STYLE attributes */ + { "CLASS" T(c) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "MEDIA" T(N) }, + { "NOTATION" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr TAB_attr[] = { /* TAB attributes */ + { "ALIGN" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "DP" T(N) }, + { "ID" T(i) }, + { "INDENT" T(N) }, + { "LANG" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "TO" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr TABLE_attr[] = { /* TABLE attributes */ + { "ALIGN" T(N) }, + { "BACKGROUND" T(h) }, + { "BORDER" T(N) }, + { "CELLPADDING" T(N) }, + { "CELLSPACING" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "COLS" T(N) }, + { "COLSPEC" T(N) }, + { "DIR" T(N) }, + { "DP" T(N) }, + { "FRAME" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NOFLOW" T(N) }, + { "NOWRAP" T(N) }, + { "RULES" T(N) }, + { "STYLE" T(N) }, + { "SUMMARY" T(N) }, + { "TITLE" T(N) }, + { "UNITS" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr TD_attr[] = { /* TD attributes */ + { "ABBR" T(N) }, + { "ALIGN" T(N) }, + { "AXES" T(N) }, + { "AXIS" T(N) }, + { "BACKGROUND" T(h) }, + { "CHAR" T(N) }, + { "CHAROFF" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "COLSPAN" T(N) }, + { "DIR" T(N) }, + { "DP" T(N) }, + { "HEADERS" T(N) }, + { "HEIGHT" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NOWRAP" T(N) }, + { "ROWSPAN" T(N) }, + { "SCOPE" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "VALIGN" T(N) }, + { "WIDTH" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr TEXTAREA_attr[] = { /* TEXTAREA attributes */ + { "ACCEPT-CHARSET" T(N) }, + { "ACCESSKEY" T(N) }, + { "ALIGN" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "COLS" T(N) }, + { "DIR" T(N) }, + { "DISABLED" T(N) }, + { "ERROR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NAME" T(N) }, + { "NOTAB" T(N) }, + { "ONBLUR" T(N) }, + { "ONCHANGE" T(N) }, + { "ONFOCUS" T(N) }, + { "ONSELECT" T(N) }, + { "READONLY" T(N) }, + { "ROWS" T(N) }, + { "STYLE" T(N) }, + { "TABINDEX" T(N) }, + { "TITLE" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr TR_attr[] = { /* TBODY attributes */ + { "ALIGN" T(N) }, + { "CHAR" T(N) }, + { "CHAROFF" T(N) }, + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "DIR" T(N) }, + { "DP" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "NOWRAP" T(N) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "VALIGN" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +static const attr UL_attr[] = { /* DIR attributes */ + { "CLASS" T(c) }, + { "CLEAR" T(N) }, + { "COMPACT" T(N) }, + { "DINGBAT" T(N) }, + { "DIR" T(N) }, + { "ID" T(i) }, + { "LANG" T(N) }, + { "MD" T(N) }, + { "PLAIN" T(N) }, + { "SRC" T(h) }, + { "STYLE" T(N) }, + { "TITLE" T(N) }, + { "TYPE" T(N) }, + { "WRAP" T(N) }, + { 0 T(N) } /* Terminate list */ +}; + +/* *INDENT-ON* */ + +/* justification-flags */ +#undef N +#undef i +#undef h +#undef c +#undef x + +#undef T + +/* tag-names */ +#undef A +#undef ABBR +#undef ACRONYM +#undef ADDRESS +#undef APPLET +#undef AREA +#undef AU +#undef AUTHOR +#undef B +#undef BANNER +#undef BASE +#undef BASEFONT +#undef BDO +#undef BGSOUND +#undef BIG +#undef BLINK +#undef BLOCKQUOTE +#undef BODY +#undef BODYTEXT +#undef BQ +#undef BR +#undef BUTTON +#undef CAPTION +#undef CENTER +#undef CITE +#undef CODE +#undef COL +#undef COLGROUP +#undef COMMENT +#undef CREDIT +#undef DD +#undef DEL +#undef DFN +#undef DIR +#undef DIV +#undef DL +#undef DLC +#undef DT +#undef EM +#undef EMBED +#undef FIELDSET +#undef FIG +#undef FN +#undef FONT +#undef FORM +#undef FRAME +#undef FRAMESET +#undef H1 +#undef H2 +#undef H3 +#undef H4 +#undef H5 +#undef H6 +#undef HEAD +#undef HR +#undef HTML +#undef HY +#undef I +#undef IFRAME +#undef IMG +#undef INPUT +#undef INS +#undef ISINDEX +#undef KBD +#undef KEYGEN +#undef LABEL +#undef LEGEND +#undef LH +#undef LI +#undef LINK +#undef LISTING +#undef MAP +#undef MARQUEE +#undef MATH +#undef MENU +#undef META +#undef NEXTID +#undef NOFRAMES +#undef NOTE +#undef OBJECT +#undef OL +#undef OPTION +#undef OVERLAY +#undef P +#undef PARAM +#undef PLAINTEXT +#undef PRE +#undef Q +#undef S +#undef SAMP +#undef SCRIPT +#undef SELECT +#undef SHY +#undef SMALL +#undef SPAN +#undef SPOT +#undef STRIKE +#undef STRONG +#undef STYLE +#undef SUB +#undef SUP +#undef TAB +#undef TABLE +#undef TBODY +#undef TD +#undef TEXTAREA +#undef TEXTFLOW +#undef TFOOT +#undef TH +#undef THEAD +#undef TITLE +#undef TR +#undef TT +#undef U +#undef UL +#undef VAR +#undef WBR +#undef XMP +#undef OBJECT_PCDATA + +/* these definitions are used in the tags-tables */ +#undef P +#undef P_ +#ifdef USE_COLOR_STYLE +#define P_(x) #x, (sizeof #x) -1 +#define NULL_HTTag_ NULL, 0 +#else +#define P_(x) #x +#define NULL_HTTag_ NULL +#endif + +#ifdef USE_JUSTIFY_ELTS +#define P(x) P_(x), 1 +#define P0(x) P_(x), 0 +#define NULL_HTTag NULL_HTTag_,0 +#else +#define P(x) P_(x) +#define P0(x) P_(x) +#define NULL_HTTag NULL_HTTag_ +#endif + +#define ATTR_DATA(name) name##_attr, HTML_##name##_ATTRIBUTES, name##_attr_type + +#endif /* once_HTMLDTD */ +/* *INDENT-OFF* */ +static const HTTag tags_table1[HTML_ALL_ELEMENTS] = { + { P(A), ATTR_DATA(A), SGML_MIXED, T_A}, + { P(ABBR), ATTR_DATA(GEN), SGML_MIXED, T_ABBR}, + { P(ACRONYM), ATTR_DATA(GEN), SGML_MIXED, T_ACRONYM}, + { P(ADDRESS), ATTR_DATA(ADDRESS), SGML_MIXED, T_ADDRESS}, + { P(APPLET), ATTR_DATA(APPLET), SGML_MIXED, T_APPLET}, + { P(AREA), ATTR_DATA(AREA), SGML_EMPTY, T_AREA}, + { P(AU), ATTR_DATA(GEN), SGML_MIXED, T_AU}, + { P(AUTHOR), ATTR_DATA(GEN), SGML_MIXED, T_AUTHOR}, + { P(B), ATTR_DATA(GEN), SGML_MIXED, T_B}, + { P0(BANNER), ATTR_DATA(GEN), SGML_MIXED, T_BANNER}, + { P(BASE), ATTR_DATA(BASE), SGML_EMPTY, T_BASE}, + { P(BASEFONT), ATTR_DATA(FONT), SGML_EMPTY, T_BASEFONT}, + { P(BDO), ATTR_DATA(GEN), SGML_MIXED, T_BDO}, + { P(BGSOUND), ATTR_DATA(BGSOUND), SGML_EMPTY, T_BGSOUND}, + { P(BIG), ATTR_DATA(GEN), SGML_MIXED, T_BIG}, + { P(BLINK), ATTR_DATA(GEN), SGML_MIXED, T_BLINK}, + { P(BLOCKQUOTE), ATTR_DATA(BQ), SGML_MIXED, T_BLOCKQUOTE}, + { P(BODY), ATTR_DATA(BODY), SGML_MIXED, T_BODY}, + { P(BODYTEXT), ATTR_DATA(BODYTEXT), SGML_MIXED, T_BODYTEXT}, + { P(BQ), ATTR_DATA(BQ), SGML_MIXED, T_BQ}, + { P(BR), ATTR_DATA(GEN), SGML_EMPTY, T_BR}, + { P(BUTTON), ATTR_DATA(BUTTON), SGML_MIXED, T_BUTTON}, + { P(CAPTION), ATTR_DATA(CAPTION), SGML_MIXED, T_CAPTION}, + { P(CENTER), ATTR_DATA(DIV), SGML_MIXED, T_CENTER}, + { P(CITE), ATTR_DATA(GEN), SGML_MIXED, T_CITE}, + { P(CODE), ATTR_DATA(GEN), SGML_MIXED, T_CODE}, + { P(COL), ATTR_DATA(COL), SGML_EMPTY, T_COL}, + { P(COLGROUP), ATTR_DATA(COL), SGML_ELEMENT, T_COLGROUP}, + { P(COMMENT), ATTR_DATA(GEN), SGML_PCDATA, T_COMMENT}, + { P(CREDIT), ATTR_DATA(GEN), SGML_MIXED, T_CREDIT}, + { P(DD), ATTR_DATA(GEN), SGML_MIXED, T_DD}, + { P(DEL), ATTR_DATA(DEL), SGML_MIXED, T_DEL}, + { P(DFN), ATTR_DATA(GEN), SGML_MIXED, T_DFN}, + { P(DIR), ATTR_DATA(UL), SGML_MIXED, T_DIR}, + { P(DIV), ATTR_DATA(DIV), SGML_MIXED, T_DIV}, + { P(DL), ATTR_DATA(DL), SGML_MIXED, T_DL}, + { P(DLC), ATTR_DATA(DL), SGML_MIXED, T_DLC}, + { P(DT), ATTR_DATA(GEN), SGML_MIXED, T_DT}, + { P(EM), ATTR_DATA(GEN), SGML_MIXED, T_EM}, + { P(EMBED), ATTR_DATA(EMBED), SGML_EMPTY, T_EMBED}, + { P(FIELDSET), ATTR_DATA(GEN), SGML_MIXED, T_FIELDSET}, + { P(FIG), ATTR_DATA(FIG), SGML_MIXED, T_FIG}, + { P(FN), ATTR_DATA(GEN), SGML_MIXED, T_FN}, + { P(FONT), ATTR_DATA(FONT), SGML_MIXED, T_FONT}, + { P(FORM), ATTR_DATA(FORM), SGML_MIXED, T_FORM}, + { P(FRAME), ATTR_DATA(FRAME), SGML_EMPTY, T_FRAME}, + { P(FRAMESET), ATTR_DATA(FRAMESET), SGML_ELEMENT, T_FRAMESET}, + { P0(H1), ATTR_DATA(H), SGML_MIXED, T_H1}, + { P0(H2), ATTR_DATA(H), SGML_MIXED, T_H2}, + { P0(H3), ATTR_DATA(H), SGML_MIXED, T_H3}, + { P0(H4), ATTR_DATA(H), SGML_MIXED, T_H4}, + { P0(H5), ATTR_DATA(H), SGML_MIXED, T_H5}, + { P0(H6), ATTR_DATA(H), SGML_MIXED, T_H6}, + { P(HEAD), ATTR_DATA(GEN), SGML_ELEMENT, T_HEAD}, + { P(HR), ATTR_DATA(HR), SGML_EMPTY, T_HR}, + { P(HTML), ATTR_DATA(GEN), SGML_MIXED, T_HTML}, + { P(HY), ATTR_DATA(GEN), SGML_EMPTY, T_HY}, + { P(I), ATTR_DATA(GEN), SGML_MIXED, T_I}, + { P(IFRAME), ATTR_DATA(IFRAME), SGML_MIXED, T_IFRAME}, + { P(IMG), ATTR_DATA(IMG), SGML_EMPTY, T_IMG}, + { P(INPUT), ATTR_DATA(INPUT), SGML_EMPTY, T_INPUT}, + { P(INS), ATTR_DATA(DEL), SGML_MIXED, T_INS}, + { P(ISINDEX), ATTR_DATA(ISINDEX), SGML_EMPTY, T_ISINDEX}, + { P(KBD), ATTR_DATA(GEN), SGML_MIXED, T_KBD}, + { P(KEYGEN), ATTR_DATA(KEYGEN), SGML_EMPTY, T_KEYGEN}, + { P(LABEL), ATTR_DATA(LABEL), SGML_MIXED, T_LABEL}, + { P(LEGEND), ATTR_DATA(CAPTION), SGML_MIXED, T_LEGEND}, + { P(LH), ATTR_DATA(GEN), SGML_MIXED, T_LH}, + { P(LI), ATTR_DATA(LI), SGML_MIXED, T_LI}, + { P(LINK), ATTR_DATA(LINK), SGML_EMPTY, T_LINK}, + { P(LISTING), ATTR_DATA(GEN), SGML_LITTERAL,T_LISTING}, + { P(MAP), ATTR_DATA(MAP), SGML_ELEMENT, T_MAP}, + { P(MARQUEE), ATTR_DATA(GEN), SGML_MIXED, T_MARQUEE}, + { P(MATH), ATTR_DATA(MATH), SGML_PCDATA, T_MATH}, + { P(MENU), ATTR_DATA(UL), SGML_MIXED, T_MENU}, + { P(META), ATTR_DATA(META), SGML_EMPTY, T_META}, + { P(NEXTID), ATTR_DATA(NEXTID), SGML_EMPTY, T_NEXTID}, + { P(NOFRAMES), ATTR_DATA(GEN), SGML_MIXED, T_NOFRAMES}, + { P(NOTE), ATTR_DATA(NOTE), SGML_MIXED, T_NOTE}, + { P(OBJECT), ATTR_DATA(OBJECT), SGML_LITTERAL,T_OBJECT}, + { P(OL), ATTR_DATA(OL), SGML_MIXED, T_OL}, + { P(OPTION), ATTR_DATA(OPTION), SGML_PCDATA, T_OPTION}, + { P(OVERLAY), ATTR_DATA(OVERLAY), SGML_PCDATA, T_OVERLAY}, + { P(P), ATTR_DATA(P), SGML_MIXED, T_P}, + { P(PARAM), ATTR_DATA(PARAM), SGML_EMPTY, T_PARAM}, + { P(PLAINTEXT), ATTR_DATA(GEN), SGML_LITTERAL,T_PLAINTEXT}, + { P0(PRE), ATTR_DATA(GEN), SGML_MIXED, T_PRE}, + { P(Q), ATTR_DATA(Q), SGML_MIXED, T_Q}, + { P(S), ATTR_DATA(GEN), SGML_MIXED, T_S}, + { P(SAMP), ATTR_DATA(GEN), SGML_MIXED, T_SAMP}, + { P(SCRIPT), ATTR_DATA(SCRIPT), SGML_SCRIPT, T_SCRIPT}, + { P(SELECT), ATTR_DATA(SELECT), SGML_ELEMENT, T_SELECT}, + { P(SHY), ATTR_DATA(GEN), SGML_EMPTY, T_SHY}, + { P(SMALL), ATTR_DATA(GEN), SGML_MIXED, T_SMALL}, + { P(SPAN), ATTR_DATA(GEN), SGML_MIXED, T_SPAN}, + { P(SPOT), ATTR_DATA(GEN), SGML_EMPTY, T_SPOT}, + { P(STRIKE), ATTR_DATA(GEN), SGML_MIXED, T_STRIKE}, + { P(STRONG), ATTR_DATA(GEN), SGML_MIXED, T_STRONG}, + { P(STYLE), ATTR_DATA(STYLE), SGML_CDATA, T_STYLE}, + { P(SUB), ATTR_DATA(GEN), SGML_MIXED, T_SUB}, + { P(SUP), ATTR_DATA(GEN), SGML_MIXED, T_SUP}, + { P(TAB), ATTR_DATA(TAB), SGML_EMPTY, T_TAB}, + { P(TABLE), ATTR_DATA(TABLE), SGML_ELEMENT, T_TABLE}, + { P(TBODY), ATTR_DATA(TR), SGML_ELEMENT, T_TBODY}, + { P(TD), ATTR_DATA(TD), SGML_MIXED, T_TD}, + { P(TEXTAREA), ATTR_DATA(TEXTAREA), SGML_PCDATA, T_TEXTAREA}, + { P(TEXTFLOW), ATTR_DATA(BODYTEXT), SGML_MIXED, T_TEXTFLOW}, + { P(TFOOT), ATTR_DATA(TR), SGML_ELEMENT, T_TFOOT}, + { P(TH), ATTR_DATA(TD), SGML_MIXED, T_TH}, + { P(THEAD), ATTR_DATA(TR), SGML_ELEMENT, T_THEAD}, + { P(TITLE), ATTR_DATA(GEN), SGML_PCDATA, T_TITLE}, + { P(TR), ATTR_DATA(TR), SGML_MIXED, T_TR}, + { P(TT), ATTR_DATA(GEN), SGML_MIXED, T_TT}, + { P(U), ATTR_DATA(GEN), SGML_MIXED, T_U}, + { P(UL), ATTR_DATA(UL), SGML_MIXED, T_UL}, + { P(VAR), ATTR_DATA(GEN), SGML_MIXED, T_VAR}, + { P(WBR), ATTR_DATA(GEN), SGML_EMPTY, T_WBR}, + { P0(XMP), ATTR_DATA(GEN), SGML_LITTERAL,T_XMP}, +/* additional (alternative variants), not counted in HTML_ELEMENTS: */ +/* This one will be used as a temporary substitute within the parser when + it has been signalled to parse OBJECT content as MIXED. - kw */ + { P(OBJECT), ATTR_DATA(OBJECT), SGML_MIXED, T_OBJECT_PCDATA}, +}; +/* *INDENT-ON* */ + +#endif /* src_HTMLDTD_H1 */ diff --git a/WWW/Library/Implementation/src1_HTMLDTD.txt b/WWW/Library/Implementation/src1_HTMLDTD.txt new file mode 100644 index 00000000..6e7ac733 --- /dev/null +++ b/WWW/Library/Implementation/src1_HTMLDTD.txt @@ -0,0 +1,3664 @@ +59 attr_types + 0:align + 1 attributes: + 0:0:ALIGN + 1:bgcolor + 1 attributes: + 0:0:BGCOLOR + 2:cellalign + 4 attributes: + 0:0:ALIGN + 1:0:CHAR + 2:0:CHAROFF + 3:0:VALIGN + 3:core + 4 attributes: + 0:4:CLASS + 1:1:ID + 2:0:STYLE + 3:0:TITLE + 4:events + 10 attributes: + 0:0:ONCLICK + 1:0:ONDBLCLICK + 2:0:ONKEYDOWN + 3:0:ONKEYPRESS + 4:0:ONKEYUP + 5:0:ONMOUSEDOWN + 6:0:ONMOUSEMOVE + 7:0:ONMOUSEOUT + 8:0:ONMOUSEOVER + 9:0:ONMOUSEUP + 5:i18n + 2 attributes: + 0:0:DIR + 1:0:LANG + 6:A + 19 attributes: + 0:0:ACCESSKEY + 1:0:CHARSET + 2:0:CLEAR + 3:0:COORDS + 4:2:HREF + 5:0:HREFLANG + 6:0:ISMAP + 7:0:MD + 8:1:NAME + 9:0:NOTAB + 10:0:ONBLUR + 11:0:ONFOCUS + 12:0:REL + 13:0:REV + 14:0:SHAPE + 15:0:TABINDEX + 16:0:TARGET + 17:0:TYPE + 18:0:URN + 7:ADDRESS + 2 attributes: + 0:0:CLEAR + 1:0:NOWRAP + 8:APPLET + 10 attributes: + 0:0:ALT + 1:0:CLEAR + 2:0:CODE + 3:2:CODEBASE + 4:0:DOWNLOAD + 5:0:HEIGHT + 6:0:HSPACE + 7:1:NAME + 8:0:VSPACE + 9:0:WIDTH + 9:AREA + 12 attributes: + 0:0:ACCESSKEY + 1:0:ALT + 2:0:CLEAR + 3:0:COORDS + 4:2:HREF + 5:0:NOHREF + 6:0:NOTAB + 7:0:ONBLUR + 8:0:ONFOCUS + 9:0:SHAPE + 10:0:TABINDEX + 11:0:TARGET + 10:BASE + 2 attributes: + 0:2:HREF + 1:0:TARGET + 11:BGSOUND + 3 attributes: + 0:0:CLEAR + 1:0:LOOP + 2:2:SRC + 12:BODY + 8 attributes: + 0:0:ALINK + 1:2:BACKGROUND + 2:0:CLEAR + 3:0:LINK + 4:0:ONLOAD + 5:0:ONUNLOAD + 6:0:TEXT + 7:0:VLINK + 13:BODYTEXT + 8 attributes: + 0:0:CLEAR + 1:0:DATA + 2:0:NAME + 3:0:OBJECT + 4:0:REF + 5:0:TYPE + 6:0:VALUE + 7:0:VALUETYPE + 14:BQ + 3 attributes: + 0:2:CITE + 1:0:CLEAR + 2:0:NOWRAP + 15:BUTTON + 10 attributes: + 0:0:ACCESSKEY + 1:0:CLEAR + 2:0:DISABLED + 3:0:NAME + 4:0:READONLY + 5:0:ONBLUR + 6:0:ONFOCUS + 7:0:TABINDEX + 8:0:TYPE + 9:0:VALUE + 16:CAPTION + 2 attributes: + 0:0:ACCESSKEY + 1:0:CLEAR + 17:COL + 3 attributes: + 0:0:CLEAR + 1:0:SPAN + 2:0:WIDTH + 18:DEL + 2 attributes: + 0:0:CITE + 1:0:DATETIME + 19:DIV + 1 attributes: + 0:0:CLEAR + 20:DL + 2 attributes: + 0:0:CLEAR + 1:0:COMPACT + 21:EMBED + 14 attributes: + 0:0:ALT + 1:0:BORDER + 2:0:CLEAR + 3:0:HEIGHT + 4:0:IMAGEMAP + 5:0:ISMAP + 6:0:MD + 7:1:NAME + 8:0:NOFLOW + 9:0:PARAMS + 10:2:SRC + 11:0:UNITS + 12:0:USEMAP + 13:0:WIDTH + 22:FIG + 10 attributes: + 0:0:BORDER + 1:0:CLEAR + 2:0:HEIGHT + 3:0:IMAGEMAP + 4:0:ISOBJECT + 5:0:MD + 6:0:NOFLOW + 7:2:SRC + 8:0:UNITS + 9:0:WIDTH + 23:FONT + 5 attributes: + 0:0:CLEAR + 1:0:COLOR + 2:0:END + 3:0:FACE + 4:0:SIZE + 24:FORM + 11 attributes: + 0:0:ACCEPT + 1:0:ACCEPT-CHARSET + 2:2:ACTION + 3:0:CLEAR + 4:0:ENCTYPE + 5:0:METHOD + 6:0:ONRESET + 7:0:ONSUBMIT + 8:0:SCRIPT + 9:0:SUBJECT + 10:0:TARGET + 25:FRAME + 8 attributes: + 0:0:FRAMEBORDER + 1:2:LONGDESC + 2:0:MARGINHEIGHT + 3:0:MARGINWIDTH + 4:0:NAME + 5:0:NORESIZE + 6:0:SCROLLING + 7:2:SRC + 26:FRAMESET + 4 attributes: + 0:0:COLS + 1:0:ONLOAD + 2:0:ONUNLOAD + 3:0:ROWS + 27:GEN + 1 attributes: + 0:0:CLEAR + 28:H + 7 attributes: + 0:0:CLEAR + 1:0:DINGBAT + 2:0:MD + 3:0:NOWRAP + 4:0:SEQNUM + 5:0:SKIP + 6:2:SRC + 29:HR + 6 attributes: + 0:0:CLEAR + 1:0:MD + 2:0:NOSHADE + 3:0:SIZE + 4:2:SRC + 5:0:WIDTH + 30:IFRAME + 9 attributes: + 0:0:FRAMEBORDER + 1:0:HEIGHT + 2:2:LONGDESC + 3:0:MARGINHEIGHT + 4:0:MARGINWIDTH + 5:0:NAME + 6:0:SCROLLING + 7:2:SRC + 8:0:WIDTH + 31:IMG + 15 attributes: + 0:0:ALT + 1:0:BORDER + 2:0:CLEAR + 3:0:HEIGHT + 4:0:HSPACE + 5:0:ISMAP + 6:0:ISOBJECT + 7:2:LONGDESC + 8:0:MD + 9:0:NAME + 10:2:SRC + 11:0:UNITS + 12:2:USEMAP + 13:0:VSPACE + 14:0:WIDTH + 32:INPUT + 28 attributes: + 0:0:ACCEPT + 1:0:ACCEPT-CHARSET + 2:0:ACCESSKEY + 3:0:ALT + 4:0:CHECKED + 5:0:CLEAR + 6:0:DISABLED + 7:0:ERROR + 8:0:HEIGHT + 9:0:ISMAP + 10:0:MAX + 11:0:MAXLENGTH + 12:0:MD + 13:0:MIN + 14:0:NAME + 15:0:NOTAB + 16:0:ONBLUR + 17:0:ONCHANGE + 18:0:ONFOCUS + 19:0:ONSELECT + 20:0:READONLY + 21:0:SIZE + 22:2:SRC + 23:0:TABINDEX + 24:0:TYPE + 25:0:USEMAP + 26:0:VALUE + 27:0:WIDTH + 33:ISINDEX + 3 attributes: + 0:2:ACTION + 1:2:HREF + 2:0:PROMPT + 34:KEYGEN + 2 attributes: + 0:0:CHALLENGE + 1:0:NAME + 35:LABEL + 5 attributes: + 0:0:ACCESSKEY + 1:0:CLEAR + 2:0:FOR + 3:0:ONBLUR + 4:0:ONFOCUS + 36:LI + 7 attributes: + 0:0:CLEAR + 1:0:DINGBAT + 2:0:MD + 3:0:SKIP + 4:2:SRC + 5:0:TYPE + 6:0:VALUE + 37:LINK + 8 attributes: + 0:0:CHARSET + 1:2:HREF + 2:0:HREFLANG + 3:0:MEDIA + 4:0:REL + 5:0:REV + 6:0:TARGET + 7:0:TYPE + 38:MAP + 2 attributes: + 0:0:CLEAR + 1:1:NAME + 39:MATH + 2 attributes: + 0:0:BOX + 1:0:CLEAR + 40:META + 5 attributes: + 0:0:CHARSET + 1:0:CONTENT + 2:0:HTTP-EQUIV + 3:0:NAME + 4:0:SCHEME + 41:NEXTID + 1 attributes: + 0:0:N + 42:NOTE + 4 attributes: + 0:0:CLEAR + 1:0:MD + 2:8:ROLE + 3:2:SRC + 43:OBJECT + 19 attributes: + 0:0:ARCHIVE + 1:0:BORDER + 2:2:CLASSID + 3:2:CODEBASE + 4:0:CODETYPE + 5:2:DATA + 6:0:DECLARE + 7:0:HEIGHT + 8:0:HSPACE + 9:0:ISMAP + 10:0:NAME + 11:0:NOTAB + 12:0:SHAPES + 13:0:STANDBY + 14:0:TABINDEX + 15:0:TYPE + 16:2:USEMAP + 17:0:VSPACE + 18:0:WIDTH + 44:OL + 6 attributes: + 0:0:CLEAR + 1:0:COMPACT + 2:0:CONTINUE + 3:0:SEQNUM + 4:0:START + 5:0:TYPE + 45:OPTION + 7 attributes: + 0:0:CLEAR + 1:0:DISABLED + 2:0:ERROR + 3:0:LABEL + 4:0:SELECTED + 5:0:SHAPE + 6:0:VALUE + 46:OVERLAY + 8 attributes: + 0:0:HEIGHT + 1:0:IMAGEMAP + 2:0:MD + 3:2:SRC + 4:0:UNITS + 5:0:WIDTH + 6:0:X + 7:0:Y + 47:P + 2 attributes: + 0:0:CLEAR + 1:0:NOWRAP + 48:PARAM + 12 attributes: + 0:0:ACCEPT + 1:0:ACCEPT-CHARSET + 2:0:ACCEPT-ENCODING + 3:0:CLEAR + 4:0:DATA + 5:0:NAME + 6:0:OBJECT + 7:0:REF + 8:0:TYPE + 9:0:VALUE + 10:0:VALUEREF + 11:0:VALUETYPE + 49:Q + 2 attributes: + 0:2:CITE + 1:0:CLEAR + 50:SCRIPT + 10 attributes: + 0:0:CHARSET + 1:0:CLEAR + 2:0:DEFER + 3:0:EVENT + 4:0:FOR + 5:0:LANGUAGE + 6:0:NAME + 7:0:SCRIPTENGINE + 8:2:SRC + 9:0:TYPE + 51:SELECT + 15 attributes: + 0:0:CLEAR + 1:0:DISABLED + 2:0:ERROR + 3:0:HEIGHT + 4:0:MD + 5:0:MULTIPLE + 6:0:NAME + 7:0:NOTAB + 8:0:ONBLUR + 9:0:ONCHANGE + 10:0:ONFOCUS + 11:0:SIZE + 12:0:TABINDEX + 13:0:UNITS + 14:0:WIDTH + 52:STYLE + 3 attributes: + 0:0:MEDIA + 1:0:NOTATION + 2:0:TYPE + 53:TAB + 4 attributes: + 0:0:CLEAR + 1:0:DP + 2:0:INDENT + 3:0:TO + 54:TABLE + 15 attributes: + 0:2:BACKGROUND + 1:0:BORDER + 2:0:CELLPADDING + 3:0:CELLSPACING + 4:0:CLEAR + 5:0:COLS + 6:0:COLSPEC + 7:0:DP + 8:0:FRAME + 9:0:NOFLOW + 10:0:NOWRAP + 11:0:RULES + 12:0:SUMMARY + 13:0:UNITS + 14:0:WIDTH + 55:TD + 13 attributes: + 0:0:ABBR + 1:0:AXES + 2:0:AXIS + 3:2:BACKGROUND + 4:0:CLEAR + 5:0:COLSPAN + 6:0:DP + 7:0:HEADERS + 8:0:HEIGHT + 9:0:NOWRAP + 10:0:ROWSPAN + 11:0:SCOPE + 12:0:WIDTH + 56:TEXTAREA + 15 attributes: + 0:0:ACCEPT-CHARSET + 1:0:ACCESSKEY + 2:0:CLEAR + 3:0:COLS + 4:0:DISABLED + 5:0:ERROR + 6:0:NAME + 7:0:NOTAB + 8:0:ONBLUR + 9:0:ONCHANGE + 10:0:ONFOCUS + 11:0:ONSELECT + 12:0:READONLY + 13:0:ROWS + 14:0:TABINDEX + 57:TR + 3 attributes: + 0:0:CLEAR + 1:0:DP + 2:0:NOWRAP + 58:UL + 8 attributes: + 0:0:CLEAR + 1:0:COMPACT + 2:0:DINGBAT + 3:0:MD + 4:0:PLAIN + 5:2:SRC + 6:0:TYPE + 7:0:WRAP +118 tags + 0:A + justify + 25 attributes: + 0:0:ACCESSKEY + 1:0:CHARSET + 2:4:CLASS + 3:0:CLEAR + 4:0:COORDS + 5:0:DIR + 6:2:HREF + 7:0:HREFLANG + 8:1:ID + 9:0:ISMAP + 10:0:LANG + 11:0:MD + 12:1:NAME + 13:0:NOTAB + 14:0:ONBLUR + 15:0:ONFOCUS + 16:0:REL + 17:0:REV + 18:0:SHAPE + 19:0:STYLE + 20:0:TABINDEX + 21:0:TARGET + 22:0:TITLE + 23:0:TYPE + 24:0:URN + 4 attr_types + core + events + i18n + A + contents: SGML_MIXED + tagclass: Alike + contains: FONTlike EMlike MATHlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike formula Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike + contained: FONTlike EMlike MATHlike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike TRlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff + canclose: FONTlike EMlike MATHlike Alike SELECTlike APPLETlike HRlike same + flags: mafse nreie + 1:ABBR + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike + flags: + 2:ACRONYM + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike + flags: + 3:ADDRESS + justify + 8 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:NOWRAP + 6:0:STYLE + 7:0:TITLE + 3 attr_types + core + i18n + ADDRESS + contents: SGML_MIXED + tagclass: DIVlike + contains: FONTlike EMlike MATHlike Alike SELECTlike Plike BRlike APPLETlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike formula Plike DIVlike same + flags: + 4:APPLET + justify + 17 attributes: + 0:0:ALIGN + 1:0:ALT + 2:4:CLASS + 3:0:CLEAR + 4:0:CODE + 5:2:CODEBASE + 6:0:DIR + 7:0:DOWNLOAD + 8:0:HEIGHT + 9:0:HSPACE + 10:1:ID + 11:0:LANG + 12:1:NAME + 13:0:STYLE + 14:0:TITLE + 15:0:VSPACE + 16:0:WIDTH + 4 attr_types + align + core + i18n + APPLET + contents: SGML_MIXED + tagclass: APPLETlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike formula FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike BRlike APPLETlike same + flags: + 5:AREA + justify + 18 attributes: + 0:0:ACCESSKEY + 1:0:ALT + 2:4:CLASS + 3:0:CLEAR + 4:0:COORDS + 5:0:DIR + 6:2:HREF + 7:1:ID + 8:0:LANG + 9:0:NOHREF + 10:0:NOTAB + 11:0:ONBLUR + 12:0:ONFOCUS + 13:0:SHAPE + 14:0:STYLE + 15:0:TABINDEX + 16:0:TARGET + 17:0:TITLE + 4 attr_types + core + events + i18n + AREA + contents: SGML_EMPTY + tagclass: MAPlike + contains: + icontains: + contained: MAPlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike LIlike ULlike + flags: endO + 6:AU + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike + flags: + 7:AUTHOR + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike + flags: + 8:B + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: FONTlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike BODYlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike + flags: mafse nreie + 9:BANNER + nojustify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: DIVlike + contains: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike + contained: outer BODYlike + icontained: outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike same + flags: + 10:BASE + justify + 6 attributes: + 0:4:CLASS + 1:2:HREF + 2:1:ID + 3:0:STYLE + 4:0:TARGET + 5:0:TITLE + 2 attr_types + core + BASE + contents: SGML_EMPTY + tagclass: HEADstuff + contains: + icontains: + contained: outer HEADstuff + icontained: outer HEADstuff + canclose: FONTlike EMlike MATHlike Alike same + flags: endO + 11:BASEFONT + justify + 11 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:COLOR + 3:0:DIR + 4:0:END + 5:0:FACE + 6:1:ID + 7:0:LANG + 8:0:SIZE + 9:0:STYLE + 10:0:TITLE + 3 attr_types + core + i18n + FONT + contents: SGML_EMPTY + tagclass: BRlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: BRlike APPLETlike HRlike MAPlike same + flags: endO + 12:BDO + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: Plike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula TRlike Plike DIVlike + flags: + 13:BGSOUND + justify + 9 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:LOOP + 6:2:SRC + 7:0:STYLE + 8:0:TITLE + 3 attr_types + core + i18n + BGSOUND + contents: SGML_EMPTY + tagclass: BRlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike HEADstuff + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff + canclose: FONTlike EMlike MATHlike Alike Plike DIVlike BRlike APPLETlike HRlike same + flags: endO + 14:BIG + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: FONTlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike + flags: mafse nreie + 15:BLINK + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: FONTlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike + flags: mafse nreie + 16:BLOCKQUOTE + justify + 9 attributes: + 0:2:CITE + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:NOWRAP + 7:0:STYLE + 8:0:TITLE + 3 attr_types + core + i18n + BQ + contents: SGML_MIXED + tagclass: DIVlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike BODYlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike BODYlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike same + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike same + flags: + 17:BODY + justify + 15 attributes: + 0:0:ALINK + 1:2:BACKGROUND + 2:0:BGCOLOR + 3:4:CLASS + 4:0:CLEAR + 5:0:DIR + 6:1:ID + 7:0:LANG + 8:0:LINK + 9:0:ONLOAD + 10:0:ONUNLOAD + 11:0:STYLE + 12:0:TEXT + 13:0:TITLE + 14:0:VLINK + 4 attr_types + bgcolor + core + i18n + BODY + contents: SGML_MIXED + tagclass: BODYlike + contains: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike BODYlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike BODYlike + contained: outer BODYlike + icontained: outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer HEADstuff same + flags: endO startO + 18:BODYTEXT + justify + 14 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DATA + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:NAME + 7:0:OBJECT + 8:0:REF + 9:0:STYLE + 10:0:TITLE + 11:0:TYPE + 12:0:VALUE + 13:0:VALUETYPE + 3 attr_types + core + i18n + BODYTEXT + contents: SGML_MIXED + tagclass: BODYlike + contains: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike BODYlike same + contained: DIVlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike BRlike APPLETlike HRlike MAPlike same + flags: endO startO + 19:BQ + justify + 9 attributes: + 0:2:CITE + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:NOWRAP + 7:0:STYLE + 8:0:TITLE + 3 attr_types + core + i18n + BQ + contents: SGML_MIXED + tagclass: DIVlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike BODYlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike BODYlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike same + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike same + flags: + 20:BR + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_EMPTY + tagclass: BRlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff + canclose: FONTlike EMlike MATHlike Alike formula BRlike same + flags: endO + 21:BUTTON + justify + 16 attributes: + 0:0:ACCESSKEY + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:0:DISABLED + 5:1:ID + 6:0:LANG + 7:0:NAME + 8:0:ONBLUR + 9:0:ONFOCUS + 10:0:READONLY + 11:0:STYLE + 12:0:TABINDEX + 13:0:TITLE + 14:0:TYPE + 15:0:VALUE + 4 attr_types + core + events + i18n + BUTTON + contents: SGML_MIXED + tagclass: APPLETlike + contains: FONTlike EMlike MATHlike Plike DIVlike ULlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike formula TRlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike BRlike same + flags: + 22:CAPTION + justify + 9 attributes: + 0:0:ACCESSKEY + 1:0:ALIGN + 2:4:CLASS + 3:0:CLEAR + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:STYLE + 8:0:TITLE + 5 attr_types + align + core + events + i18n + CAPTION + contents: SGML_MIXED + tagclass: Plike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: DIVlike ULlike APPLETlike HRlike + icontained: FONTlike EMlike MATHlike TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike same + flags: + 23:CENTER + justify + 8 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:STYLE + 7:0:TITLE + 4 attr_types + align + core + i18n + DIV + contents: SGML_MIXED + tagclass: DIVlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike same + icontained: FONTlike EMlike MATHlike TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike LIlike same + flags: + 24:CITE + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: EMlike + flags: nreie + 25:CODE + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: EMlike + flags: + 26:COL + justify + 13 attributes: + 0:0:ALIGN + 1:0:CHAR + 2:0:CHAROFF + 3:4:CLASS + 4:0:CLEAR + 5:0:DIR + 6:1:ID + 7:0:LANG + 8:0:SPAN + 9:0:STYLE + 10:0:TITLE + 11:0:VALIGN + 12:0:WIDTH + 5 attr_types + cellalign + core + events + i18n + COL + contents: SGML_EMPTY + tagclass: HRlike + contains: + icontains: + contained: TRlike ULlike + icontained: FONTlike EMlike MATHlike TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike ULlike MAPlike same + flags: endO + 27:COLGROUP + justify + 13 attributes: + 0:0:ALIGN + 1:0:CHAR + 2:0:CHAROFF + 3:4:CLASS + 4:0:CLEAR + 5:0:DIR + 6:1:ID + 7:0:LANG + 8:0:SPAN + 9:0:STYLE + 10:0:TITLE + 11:0:VALIGN + 12:0:WIDTH + 5 attr_types + cellalign + core + events + i18n + COL + contents: SGML_ELEMENT + tagclass: TRlike + contains: HRlike + icontains: HRlike + contained: ULlike + icontained: FONTlike EMlike MATHlike TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike MAPlike same + flags: endO + 28:COMMENT + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_PCDATA + tagclass: MATHlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike HEADstuff + canclose: FONTlike EMlike + flags: + 29:CREDIT + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: Plike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: DIVlike ULlike APPLETlike HRlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike Plike DIVlike same + flags: + 30:DD + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: LIlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: ULlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike LIlike same + flags: endO + 31:DEL + justify + 8 attributes: + 0:0:CITE + 1:4:CLASS + 2:0:DATETIME + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:STYLE + 7:0:TITLE + 4 attr_types + core + events + i18n + DEL + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike + flags: + 32:DFN + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike + flags: + 33:DIR + justify + 14 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:COMPACT + 3:0:DINGBAT + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:MD + 8:0:PLAIN + 9:2:SRC + 10:0:STYLE + 11:0:TITLE + 12:0:TYPE + 13:0:WRAP + 3 attr_types + core + i18n + UL + contents: SGML_MIXED + tagclass: ULlike + contains: LIlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike BRlike APPLETlike HRlike MAPlike + contained: FORMlike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike HRlike same + flags: + 34:DIV + justify + 8 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:STYLE + 7:0:TITLE + 4 attr_types + align + core + i18n + DIV + contents: SGML_MIXED + tagclass: DIVlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: TRlike FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike same + flags: mafse + 35:DL + justify + 8 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:COMPACT + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:STYLE + 7:0:TITLE + 3 attr_types + core + i18n + DL + contents: SGML_MIXED + tagclass: ULlike + contains: FORMlike LIlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike + flags: + 36:DLC + justify + 8 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:COMPACT + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:STYLE + 7:0:TITLE + 3 attr_types + core + i18n + DL + contents: SGML_MIXED + tagclass: ULlike + contains: FORMlike LIlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike + flags: + 37:DT + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: LIlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike BRlike APPLETlike MAPlike + contained: ULlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike LIlike same + flags: endO + 38:EM + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike + flags: nreie + 39:EMBED + justify + 21 attributes: + 0:0:ALIGN + 1:0:ALT + 2:0:BORDER + 3:4:CLASS + 4:0:CLEAR + 5:0:DIR + 6:0:HEIGHT + 7:1:ID + 8:0:IMAGEMAP + 9:0:ISMAP + 10:0:LANG + 11:0:MD + 12:1:NAME + 13:0:NOFLOW + 14:0:PARAMS + 15:2:SRC + 16:0:STYLE + 17:0:TITLE + 18:0:UNITS + 19:0:USEMAP + 20:0:WIDTH + 4 attr_types + align + core + i18n + EMBED + contents: SGML_EMPTY + tagclass: APPLETlike + contains: FONTlike EMlike MATHlike Plike BRlike APPLETlike HRlike MAPlike same + icontains: FONTlike EMlike MATHlike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer + flags: endO + 40:FIELDSET + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: DIVlike + contains: FONTlike EMlike MATHlike Alike SELECTlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike FORMlike Plike DIVlike LIlike APPLETlike HRlike same + icontained: FONTlike EMlike MATHlike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike MAPlike same + flags: + 41:FIG + justify + 17 attributes: + 0:0:ALIGN + 1:0:BORDER + 2:4:CLASS + 3:0:CLEAR + 4:0:DIR + 5:0:HEIGHT + 6:1:ID + 7:0:IMAGEMAP + 8:0:ISOBJECT + 9:0:LANG + 10:0:MD + 11:0:NOFLOW + 12:2:SRC + 13:0:STYLE + 14:0:TITLE + 15:0:UNITS + 16:0:WIDTH + 4 attr_types + align + core + i18n + FIG + contents: SGML_MIXED + tagclass: DIVlike + contains: Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike SELECTlike Plike DIVlike MAPlike same + flags: + 42:FN + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: DIVlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike SELECTlike Plike BRlike same + flags: + 43:FONT + justify + 11 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:COLOR + 3:0:DIR + 4:0:END + 5:0:FACE + 6:1:ID + 7:0:LANG + 8:0:SIZE + 9:0:STYLE + 10:0:TITLE + 3 attr_types + core + i18n + FONT + contents: SGML_MIXED + tagclass: FONTlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike + flags: mafse nreie + 44:FORM + justify + 17 attributes: + 0:0:ACCEPT + 1:0:ACCEPT-CHARSET + 2:2:ACTION + 3:4:CLASS + 4:0:CLEAR + 5:0:DIR + 6:0:ENCTYPE + 7:1:ID + 8:0:LANG + 9:0:METHOD + 10:0:ONRESET + 11:0:ONSUBMIT + 12:0:SCRIPT + 13:0:STYLE + 14:0:SUBJECT + 15:0:TARGET + 16:0:TITLE + 3 attr_types + core + i18n + FORM + contents: SGML_MIXED + tagclass: FORMlike + contains: FONTlike EMlike MATHlike Alike TRlike SELECTlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike + contained: FONTlike EMlike MATHlike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Plike DIVlike LIlike ULlike APPLETlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike LIlike ULlike MAPlike same + flags: + 45:FRAME + justify + 12 attributes: + 0:4:CLASS + 1:0:FRAMEBORDER + 2:1:ID + 3:2:LONGDESC + 4:0:MARGINHEIGHT + 5:0:MARGINWIDTH + 6:0:NAME + 7:0:NORESIZE + 8:0:SCROLLING + 9:2:SRC + 10:0:STYLE + 11:0:TITLE + 2 attr_types + core + FRAME + contents: SGML_EMPTY + tagclass: outer + contains: + icontains: + contained: outer + icontained: outer + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer same + flags: endO + 46:FRAMESET + justify + 4 attributes: + 0:0:COLS + 1:0:ONLOAD + 2:0:ONUNLOAD + 3:0:ROWS + 1 attr_types + FRAMESET + contents: SGML_ELEMENT + tagclass: outer + contains: outer same + icontains: outer same + contained: outer same + icontained: BRlike APPLETlike outer same + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer same + flags: + 47:H1 + nojustify + 14 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DINGBAT + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:MD + 8:0:NOWRAP + 9:0:SEQNUM + 10:0:SKIP + 11:2:SRC + 12:0:STYLE + 13:0:TITLE + 5 attr_types + align + core + events + i18n + H + contents: SGML_MIXED + tagclass: Plike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula SELECTlike BRlike APPLETlike MAPlike + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike formula Plike same + flags: + 48:H2 + nojustify + 14 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DINGBAT + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:MD + 8:0:NOWRAP + 9:0:SEQNUM + 10:0:SKIP + 11:2:SRC + 12:0:STYLE + 13:0:TITLE + 5 attr_types + align + core + events + i18n + H + contents: SGML_MIXED + tagclass: Plike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula SELECTlike BRlike APPLETlike MAPlike + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike formula Plike same + flags: + 49:H3 + nojustify + 14 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DINGBAT + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:MD + 8:0:NOWRAP + 9:0:SEQNUM + 10:0:SKIP + 11:2:SRC + 12:0:STYLE + 13:0:TITLE + 5 attr_types + align + core + events + i18n + H + contents: SGML_MIXED + tagclass: Plike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula SELECTlike BRlike APPLETlike MAPlike + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike formula Plike same + flags: + 50:H4 + nojustify + 14 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DINGBAT + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:MD + 8:0:NOWRAP + 9:0:SEQNUM + 10:0:SKIP + 11:2:SRC + 12:0:STYLE + 13:0:TITLE + 5 attr_types + align + core + events + i18n + H + contents: SGML_MIXED + tagclass: Plike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula SELECTlike BRlike APPLETlike MAPlike + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike formula Plike same + flags: + 51:H5 + nojustify + 14 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DINGBAT + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:MD + 8:0:NOWRAP + 9:0:SEQNUM + 10:0:SKIP + 11:2:SRC + 12:0:STYLE + 13:0:TITLE + 5 attr_types + align + core + events + i18n + H + contents: SGML_MIXED + tagclass: Plike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula SELECTlike BRlike APPLETlike MAPlike + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike formula Plike same + flags: + 52:H6 + nojustify + 14 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DINGBAT + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:MD + 8:0:NOWRAP + 9:0:SEQNUM + 10:0:SKIP + 11:2:SRC + 12:0:STYLE + 13:0:TITLE + 5 attr_types + align + core + events + i18n + H + contents: SGML_MIXED + tagclass: Plike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula SELECTlike BRlike APPLETlike MAPlike + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike formula Plike same + flags: + 53:HEAD + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_ELEMENT + tagclass: HEADstuff + contains: BRlike APPLETlike HRlike MAPlike HEADstuff + icontains: BRlike APPLETlike HRlike HEADstuff + contained: outer + icontained: outer + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer same + flags: endO startO mafse + 54:HR + justify + 13 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:MD + 7:0:NOSHADE + 8:0:SIZE + 9:2:SRC + 10:0:STYLE + 11:0:TITLE + 12:0:WIDTH + 4 attr_types + align + core + i18n + HR + contents: SGML_EMPTY + tagclass: HRlike + contains: + icontains: + contained: FORMlike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike + canclose: FONTlike EMlike MATHlike formula TRlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike same + flags: endO + 55:HTML + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: outer + contains: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike HEADstuff + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike HEADstuff + contained: + icontained: + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer + flags: endO startO + 56:HY + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_EMPTY + tagclass: BRlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike formula FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff + canclose: FONTlike EMlike MATHlike Alike formula BRlike same + flags: endO + 57:I + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: FONTlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike + flags: mafse nreie + 58:IFRAME + justify + 14 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:FRAMEBORDER + 3:0:HEIGHT + 4:1:ID + 5:2:LONGDESC + 6:0:MARGINHEIGHT + 7:0:MARGINWIDTH + 8:0:NAME + 9:0:SCROLLING + 10:2:SRC + 11:0:STYLE + 12:0:TITLE + 13:0:WIDTH + 3 attr_types + align + core + IFRAME + contents: SGML_MIXED + tagclass: APPLETlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike formula FORMlike Plike DIVlike LIlike APPLETlike HRlike outer BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike BRlike APPLETlike outer HEADstuff same + flags: + 59:IMG + justify + 22 attributes: + 0:0:ALIGN + 1:0:ALT + 2:0:BORDER + 3:4:CLASS + 4:0:CLEAR + 5:0:DIR + 6:0:HEIGHT + 7:0:HSPACE + 8:1:ID + 9:0:ISMAP + 10:0:ISOBJECT + 11:0:LANG + 12:2:LONGDESC + 13:0:MD + 14:0:NAME + 15:2:SRC + 16:0:STYLE + 17:0:TITLE + 18:0:UNITS + 19:2:USEMAP + 20:0:VSPACE + 21:0:WIDTH + 5 attr_types + align + core + events + i18n + IMG + contents: SGML_EMPTY + tagclass: BRlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike formula FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: same + flags: endO + 60:INPUT + justify + 35 attributes: + 0:0:ACCEPT + 1:0:ACCEPT-CHARSET + 2:0:ACCESSKEY + 3:0:ALIGN + 4:0:ALT + 5:0:CHECKED + 6:4:CLASS + 7:0:CLEAR + 8:0:DIR + 9:0:DISABLED + 10:0:ERROR + 11:0:HEIGHT + 12:1:ID + 13:0:ISMAP + 14:0:LANG + 15:0:MAX + 16:0:MAXLENGTH + 17:0:MD + 18:0:MIN + 19:0:NAME + 20:0:NOTAB + 21:0:ONBLUR + 22:0:ONCHANGE + 23:0:ONFOCUS + 24:0:ONSELECT + 25:0:READONLY + 26:0:SIZE + 27:2:SRC + 28:0:STYLE + 29:0:TABINDEX + 30:0:TITLE + 31:0:TYPE + 32:0:USEMAP + 33:0:VALUE + 34:0:WIDTH + 5 attr_types + align + core + events + i18n + INPUT + contents: SGML_EMPTY + tagclass: SELECTlike + contains: + icontains: + contained: FONTlike EMlike MATHlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike + icontained: FONTlike EMlike MATHlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike SELECTlike BRlike MAPlike same + flags: endO + 61:INS + justify + 8 attributes: + 0:0:CITE + 1:4:CLASS + 2:0:DATETIME + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:STYLE + 7:0:TITLE + 4 attr_types + core + events + i18n + DEL + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike + flags: + 62:ISINDEX + justify + 9 attributes: + 0:2:ACTION + 1:4:CLASS + 2:0:DIR + 3:2:HREF + 4:1:ID + 5:0:LANG + 6:0:PROMPT + 7:0:STYLE + 8:0:TITLE + 3 attr_types + core + i18n + ISINDEX + contents: SGML_EMPTY + tagclass: MAPlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike HEADstuff + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike HEADstuff + canclose: FONTlike EMlike MATHlike same + flags: endO + 63:KBD + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: EMlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff + canclose: FONTlike EMlike + flags: + 64:KEYGEN + justify + 8 attributes: + 0:0:CHALLENGE + 1:4:CLASS + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:NAME + 6:0:STYLE + 7:0:TITLE + 3 attr_types + core + i18n + KEYGEN + contents: SGML_EMPTY + tagclass: SELECTlike + contains: + icontains: + contained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: formula TRlike SELECTlike same + flags: endO + 65:LABEL + justify + 11 attributes: + 0:0:ACCESSKEY + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:0:FOR + 5:1:ID + 6:0:LANG + 7:0:ONBLUR + 8:0:ONFOCUS + 9:0:STYLE + 10:0:TITLE + 4 attr_types + core + events + i18n + LABEL + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike + contained: FONTlike EMlike MATHlike Alike formula FORMlike Plike DIVlike LIlike APPLETlike HRlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike + flags: + 66:LEGEND + justify + 9 attributes: + 0:0:ACCESSKEY + 1:0:ALIGN + 2:4:CLASS + 3:0:CLEAR + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:STYLE + 8:0:TITLE + 5 attr_types + align + core + events + i18n + CAPTION + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: DIVlike + icontained: FONTlike EMlike MATHlike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike + flags: + 67:LH + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: LIlike + contains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike ULlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: ULlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer same + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike LIlike same + flags: endO + 68:LI + justify + 13 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DINGBAT + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:MD + 7:0:SKIP + 8:2:SRC + 9:0:STYLE + 10:0:TITLE + 11:0:TYPE + 12:0:VALUE + 4 attr_types + core + events + i18n + LI + contents: SGML_MIXED + tagclass: LIlike + contains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: ULlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer same + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike LIlike same + flags: endO + 69:LINK + justify + 14 attributes: + 0:0:CHARSET + 1:4:CLASS + 2:0:DIR + 3:2:HREF + 4:0:HREFLANG + 5:1:ID + 6:0:LANG + 7:0:MEDIA + 8:0:REL + 9:0:REV + 10:0:STYLE + 11:0:TARGET + 12:0:TITLE + 13:0:TYPE + 4 attr_types + core + events + i18n + LINK + contents: SGML_EMPTY + tagclass: MAPlike + contains: + icontains: + contained: outer HEADstuff + icontained: outer HEADstuff + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike + flags: endO + 70:LISTING + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_LITTERAL + tagclass: ULlike + contains: + icontains: + contained: DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike LIlike ULlike same + flags: + 71:MAP + justify + 8 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:1:NAME + 6:0:STYLE + 7:0:TITLE + 3 attr_types + core + i18n + MAP + contents: SGML_ELEMENT + tagclass: MAPlike + contains: MAPlike + icontains: MAPlike + contained: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula Plike LIlike + flags: + 72:MARQUEE + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: HRlike + contains: FONTlike EMlike MATHlike Alike + icontains: FONTlike EMlike MATHlike Alike formula BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: MATHlike Alike formula BRlike APPLETlike same + flags: + 73:MATH + justify + 8 attributes: + 0:0:BOX + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:STYLE + 7:0:TITLE + 3 attr_types + core + i18n + MATH + contents: SGML_PCDATA + tagclass: MATHlike + contains: FONTlike EMlike MATHlike Alike formula SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike MATHlike Alike formula + flags: + 74:MENU + justify + 14 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:COMPACT + 3:0:DINGBAT + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:MD + 8:0:PLAIN + 9:2:SRC + 10:0:STYLE + 11:0:TITLE + 12:0:TYPE + 13:0:WRAP + 3 attr_types + core + i18n + UL + contents: SGML_MIXED + tagclass: ULlike + contains: LIlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike BRlike APPLETlike HRlike MAPlike + contained: FORMlike DIVlike LIlike BRlike APPLETlike HRlike outer + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike MAPlike same + flags: + 75:META + justify + 5 attributes: + 0:0:CHARSET + 1:0:CONTENT + 2:0:HTTP-EQUIV + 3:0:NAME + 4:0:SCHEME + 1 attr_types + META + contents: SGML_EMPTY + tagclass: MAPlike + contains: + icontains: + contained: outer HEADstuff + icontained: outer HEADstuff + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike + flags: endO + 76:NEXTID + justify + 1 attributes: + 0:0:N + 1 attr_types + NEXTID + contents: SGML_EMPTY + tagclass: BRlike + contains: + icontains: + contained: outer HEADstuff + icontained: FONTlike EMlike MATHlike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer + canclose: FONTlike + flags: endO + 77:NOFRAMES + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: BODYlike + contains: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike BODYlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike + contained: BRlike APPLETlike HRlike outer + icontained: BRlike APPLETlike HRlike outer + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike ULlike HRlike MAPlike + flags: + 78:NOTE + justify + 10 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:MD + 6:8:ROLE + 7:2:SRC + 8:0:STYLE + 9:0:TITLE + 3 attr_types + core + i18n + NOTE + contents: SGML_MIXED + tagclass: DIVlike + contains: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike ULlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: formula TRlike FORMlike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike same + flags: + 79:OBJECT + justify + 26 attributes: + 0:0:ALIGN + 1:0:ARCHIVE + 2:0:BORDER + 3:4:CLASS + 4:2:CLASSID + 5:2:CODEBASE + 6:0:CODETYPE + 7:2:DATA + 8:0:DECLARE + 9:0:DIR + 10:0:HEIGHT + 11:0:HSPACE + 12:1:ID + 13:0:ISMAP + 14:0:LANG + 15:0:NAME + 16:0:NOTAB + 17:0:SHAPES + 18:0:STANDBY + 19:0:STYLE + 20:0:TABINDEX + 21:0:TITLE + 22:0:TYPE + 23:2:USEMAP + 24:0:VSPACE + 25:0:WIDTH + 5 attr_types + align + core + events + i18n + OBJECT + contents: SGML_LITTERAL + tagclass: APPLETlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike formula FORMlike Plike DIVlike LIlike APPLETlike HRlike outer BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike LIlike ULlike BRlike APPLETlike same + flags: frecyc + 80:OL + justify + 12 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:COMPACT + 3:0:CONTINUE + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:SEQNUM + 8:0:START + 9:0:STYLE + 10:0:TITLE + 11:0:TYPE + 3 attr_types + core + i18n + OL + contents: SGML_MIXED + tagclass: ULlike + contains: LIlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike MAPlike same + flags: + 81:OPTION + justify + 13 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:0:DISABLED + 4:0:ERROR + 5:1:ID + 6:0:LABEL + 7:0:LANG + 8:0:SELECTED + 9:0:SHAPE + 10:0:STYLE + 11:0:TITLE + 12:0:VALUE + 4 attr_types + core + events + i18n + OPTION + contents: SGML_PCDATA + tagclass: MAPlike + contains: + icontains: + contained: SELECTlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike same + flags: endO + 82:OVERLAY + justify + 12 attributes: + 0:4:CLASS + 1:0:HEIGHT + 2:1:ID + 3:0:IMAGEMAP + 4:0:MD + 5:2:SRC + 6:0:STYLE + 7:0:TITLE + 8:0:UNITS + 9:0:WIDTH + 10:0:X + 11:0:Y + 2 attr_types + core + OVERLAY + contents: SGML_PCDATA + tagclass: HRlike + contains: + icontains: + contained: DIVlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike DIVlike LIlike ULlike BRlike APPLETlike same + flags: endO + 83:P + justify + 9 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:NOWRAP + 7:0:STYLE + 8:0:TITLE + 4 attr_types + align + core + i18n + P + contents: SGML_MIXED + tagclass: Plike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike formula Plike same + flags: endO + 84:PARAM + justify + 18 attributes: + 0:0:ACCEPT + 1:0:ACCEPT-CHARSET + 2:0:ACCEPT-ENCODING + 3:4:CLASS + 4:0:CLEAR + 5:0:DATA + 6:0:DIR + 7:1:ID + 8:0:LANG + 9:0:NAME + 10:0:OBJECT + 11:0:REF + 12:0:STYLE + 13:0:TITLE + 14:0:TYPE + 15:0:VALUE + 16:0:VALUEREF + 17:0:VALUETYPE + 3 attr_types + core + i18n + PARAM + contents: SGML_EMPTY + tagclass: BRlike + contains: + icontains: + contained: Plike LIlike BRlike APPLETlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike + canclose: TRlike SELECTlike Plike LIlike BRlike same + flags: endO + 85:PLAINTEXT + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_LITTERAL + tagclass: outer + contains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike HEADstuff same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike HEADstuff same + contained: outer same + icontained: outer same + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike + flags: endO + 86:PRE + nojustify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: DIVlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike HRlike MAPlike + icontains: EMlike MATHlike Alike formula SELECTlike BRlike APPLETlike HRlike MAPlike + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike + canclose: EMlike MATHlike Alike formula Plike DIVlike LIlike same + flags: + 87:Q + justify + 8 attributes: + 0:2:CITE + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:STYLE + 7:0:TITLE + 3 attr_types + core + i18n + Q + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike + flags: + 88:S + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: FONTlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike + flags: + 89:SAMP + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: EMlike + flags: nreie + 90:SCRIPT + justify + 16 attributes: + 0:0:CHARSET + 1:4:CLASS + 2:0:CLEAR + 3:0:DEFER + 4:0:DIR + 5:0:EVENT + 6:0:FOR + 7:1:ID + 8:0:LANG + 9:0:LANGUAGE + 10:0:NAME + 11:0:SCRIPTENGINE + 12:2:SRC + 13:0:STYLE + 14:0:TITLE + 15:0:TYPE + 3 attr_types + core + i18n + SCRIPT + contents: SGML_SCRIPT + tagclass: APPLETlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike formula FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike LIlike ULlike BRlike APPLETlike HRlike same + flags: + 91:SELECT + justify + 22 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:0:DISABLED + 5:0:ERROR + 6:0:HEIGHT + 7:1:ID + 8:0:LANG + 9:0:MD + 10:0:MULTIPLE + 11:0:NAME + 12:0:NOTAB + 13:0:ONBLUR + 14:0:ONCHANGE + 15:0:ONFOCUS + 16:0:SIZE + 17:0:STYLE + 18:0:TABINDEX + 19:0:TITLE + 20:0:UNITS + 21:0:WIDTH + 4 attr_types + align + core + i18n + SELECT + contents: SGML_ELEMENT + tagclass: SELECTlike + contains: MAPlike + icontains: MAPlike + contained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike LIlike ULlike same + flags: strict + 92:SHY + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_EMPTY + tagclass: BRlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike formula FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff + canclose: FONTlike EMlike MATHlike Alike formula BRlike same + flags: endO + 93:SMALL + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: FONTlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike + flags: mafse nreie + 94:SPAN + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike same + flags: + 95:SPOT + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_EMPTY + tagclass: Alike + contains: + icontains: + contained: FONTlike EMlike MATHlike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike + icontained: FONTlike EMlike MATHlike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer BODYlike + canclose: Alike + flags: endO + 96:STRIKE + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: FONTlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike + flags: + 97:STRONG + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike + flags: nreie + 98:STYLE + justify + 9 attributes: + 0:4:CLASS + 1:0:DIR + 2:1:ID + 3:0:LANG + 4:0:MEDIA + 5:0:NOTATION + 6:0:STYLE + 7:0:TITLE + 8:0:TYPE + 3 attr_types + core + i18n + STYLE + contents: SGML_CDATA + tagclass: HEADstuff + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike APPLETlike HRlike outer BODYlike HEADstuff + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike HEADstuff + canclose: FONTlike EMlike MATHlike Alike formula same + flags: + 99:SUB + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: MATHlike + contains: FONTlike EMlike MATHlike Alike formula SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike formula FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike MATHlike + flags: + 100:SUP + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: MATHlike + contains: FONTlike EMlike MATHlike Alike formula SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike formula FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike EMlike MATHlike + flags: + 101:TAB + justify + 11 attributes: + 0:0:ALIGN + 1:4:CLASS + 2:0:CLEAR + 3:0:DIR + 4:0:DP + 5:1:ID + 6:0:INDENT + 7:0:LANG + 8:0:STYLE + 9:0:TITLE + 10:0:TO + 4 attr_types + align + core + i18n + TAB + contents: SGML_EMPTY + tagclass: BRlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer HEADstuff + canclose: FONTlike + flags: endO + 102:TABLE + justify + 22 attributes: + 0:0:ALIGN + 1:2:BACKGROUND + 2:0:BORDER + 3:0:CELLPADDING + 4:0:CELLSPACING + 5:4:CLASS + 6:0:CLEAR + 7:0:COLS + 8:0:COLSPEC + 9:0:DIR + 10:0:DP + 11:0:FRAME + 12:1:ID + 13:0:LANG + 14:0:NOFLOW + 15:0:NOWRAP + 16:0:RULES + 17:0:STYLE + 18:0:SUMMARY + 19:0:TITLE + 20:0:UNITS + 21:0:WIDTH + 5 attr_types + align + core + events + i18n + TABLE + contents: SGML_ELEMENT + tagclass: ULlike + contains: TRlike SELECTlike FORMlike Plike BRlike APPLETlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike TRlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike Plike LIlike HRlike MAPlike same + flags: + 103:TBODY + justify + 13 attributes: + 0:0:ALIGN + 1:0:CHAR + 2:0:CHAROFF + 3:4:CLASS + 4:0:CLEAR + 5:0:DIR + 6:0:DP + 7:1:ID + 8:0:LANG + 9:0:NOWRAP + 10:0:STYLE + 11:0:TITLE + 12:0:VALIGN + 5 attr_types + cellalign + core + events + i18n + TR + contents: SGML_ELEMENT + tagclass: TRlike + contains: TRlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike ULlike + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike HRlike MAPlike same + flags: endO startO + 104:TD + justify + 23 attributes: + 0:0:ABBR + 1:0:ALIGN + 2:0:AXES + 3:0:AXIS + 4:2:BACKGROUND + 5:0:CHAR + 6:0:CHAROFF + 7:4:CLASS + 8:0:CLEAR + 9:0:COLSPAN + 10:0:DIR + 11:0:DP + 12:0:HEADERS + 13:0:HEIGHT + 14:1:ID + 15:0:LANG + 16:0:NOWRAP + 17:0:ROWSPAN + 18:0:SCOPE + 19:0:STYLE + 20:0:TITLE + 21:0:VALIGN + 22:0:WIDTH + 4 attr_types + cellalign + core + i18n + TD + contents: SGML_MIXED + tagclass: LIlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: TRlike + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike HRlike MAPlike same + flags: endO + 105:TEXTAREA + justify + 22 attributes: + 0:0:ACCEPT-CHARSET + 1:0:ACCESSKEY + 2:0:ALIGN + 3:4:CLASS + 4:0:CLEAR + 5:0:COLS + 6:0:DIR + 7:0:DISABLED + 8:0:ERROR + 9:1:ID + 10:0:LANG + 11:0:NAME + 12:0:NOTAB + 13:0:ONBLUR + 14:0:ONCHANGE + 15:0:ONFOCUS + 16:0:ONSELECT + 17:0:READONLY + 18:0:ROWS + 19:0:STYLE + 20:0:TABINDEX + 21:0:TITLE + 5 attr_types + align + core + events + i18n + TEXTAREA + contents: SGML_PCDATA + tagclass: SELECTlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike LIlike ULlike same + flags: nolyspcl + 106:TEXTFLOW + justify + 14 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DATA + 3:0:DIR + 4:1:ID + 5:0:LANG + 6:0:NAME + 7:0:OBJECT + 8:0:REF + 9:0:STYLE + 10:0:TITLE + 11:0:TYPE + 12:0:VALUE + 13:0:VALUETYPE + 3 attr_types + core + i18n + BODYTEXT + contents: SGML_MIXED + tagclass: BODYlike + contains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike outer same + contained: formula TRlike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer same + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike BRlike APPLETlike MAPlike outer same + flags: endO startO + 107:TFOOT + justify + 13 attributes: + 0:0:ALIGN + 1:0:CHAR + 2:0:CHAROFF + 3:4:CLASS + 4:0:CLEAR + 5:0:DIR + 6:0:DP + 7:1:ID + 8:0:LANG + 9:0:NOWRAP + 10:0:STYLE + 11:0:TITLE + 12:0:VALIGN + 5 attr_types + cellalign + core + events + i18n + TR + contents: SGML_ELEMENT + tagclass: TRlike + contains: TRlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: ULlike + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike ULlike HRlike MAPlike same + flags: endO + 108:TH + justify + 23 attributes: + 0:0:ABBR + 1:0:ALIGN + 2:0:AXES + 3:0:AXIS + 4:2:BACKGROUND + 5:0:CHAR + 6:0:CHAROFF + 7:4:CLASS + 8:0:CLEAR + 9:0:COLSPAN + 10:0:DIR + 11:0:DP + 12:0:HEADERS + 13:0:HEIGHT + 14:1:ID + 15:0:LANG + 16:0:NOWRAP + 17:0:ROWSPAN + 18:0:SCOPE + 19:0:STYLE + 20:0:TITLE + 21:0:VALIGN + 22:0:WIDTH + 4 attr_types + cellalign + core + i18n + TD + contents: SGML_MIXED + tagclass: LIlike + contains: FONTlike EMlike MATHlike Alike SELECTlike FORMlike Plike DIVlike ULlike BRlike APPLETlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike + contained: TRlike + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike ULlike HRlike MAPlike same + flags: endO + 109:THEAD + justify + 13 attributes: + 0:0:ALIGN + 1:0:CHAR + 2:0:CHAROFF + 3:4:CLASS + 4:0:CLEAR + 5:0:DIR + 6:0:DP + 7:1:ID + 8:0:LANG + 9:0:NOWRAP + 10:0:STYLE + 11:0:TITLE + 12:0:VALIGN + 5 attr_types + cellalign + core + events + i18n + TR + contents: SGML_ELEMENT + tagclass: TRlike + contains: TRlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: ULlike + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike ULlike HRlike MAPlike same + flags: endO + 110:TITLE + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_PCDATA + tagclass: HEADstuff + contains: + icontains: + contained: outer HEADstuff + icontained: outer HEADstuff + canclose: FONTlike EMlike MATHlike Alike formula Plike DIVlike + flags: mafse strict + 111:TR + justify + 13 attributes: + 0:0:ALIGN + 1:0:CHAR + 2:0:CHAROFF + 3:4:CLASS + 4:0:CLEAR + 5:0:DIR + 6:0:DP + 7:1:ID + 8:0:LANG + 9:0:NOWRAP + 10:0:STYLE + 11:0:TITLE + 12:0:VALIGN + 5 attr_types + cellalign + core + events + i18n + TR + contents: SGML_MIXED + tagclass: TRlike + contains: LIlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: TRlike ULlike + icontained: FONTlike EMlike MATHlike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike HRlike MAPlike same + flags: endO + 112:TT + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: FONTlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike + flags: nreie + 113:U + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: FONTlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike + flags: mafse nreie + 114:UL + justify + 14 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:COMPACT + 3:0:DINGBAT + 4:0:DIR + 5:1:ID + 6:0:LANG + 7:0:MD + 8:0:PLAIN + 9:2:SRC + 10:0:STYLE + 11:0:TITLE + 12:0:TYPE + 13:0:WRAP + 3 attr_types + core + i18n + UL + contents: SGML_MIXED + tagclass: ULlike + contains: FORMlike LIlike HRlike MAPlike + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FORMlike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike same + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike same + flags: + 115:VAR + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_MIXED + tagclass: EMlike + contains: FONTlike EMlike MATHlike Alike SELECTlike BRlike APPLETlike MAPlike same + icontains: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike MAPlike same + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike BODYlike same + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff same + canclose: FONTlike + flags: + 116:WBR + justify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_EMPTY + tagclass: FONTlike + contains: + icontains: + contained: FONTlike EMlike MATHlike Alike FORMlike Plike DIVlike LIlike BRlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike FORMlike Plike DIVlike LIlike ULlike BRlike APPLETlike HRlike outer BODYlike HEADstuff + canclose: FONTlike EMlike MATHlike Alike formula BRlike same + flags: endO + 117:XMP + nojustify + 7 attributes: + 0:4:CLASS + 1:0:CLEAR + 2:0:DIR + 3:1:ID + 4:0:LANG + 5:0:STYLE + 6:0:TITLE + 4 attr_types + core + events + i18n + GEN + contents: SGML_LITTERAL + tagclass: ULlike + contains: + icontains: + contained: TRlike SELECTlike FORMlike Plike DIVlike LIlike APPLETlike HRlike outer BODYlike + icontained: FONTlike EMlike MATHlike Alike formula TRlike SELECTlike FORMlike Plike DIVlike LIlike ULlike APPLETlike HRlike outer BODYlike + canclose: FONTlike EMlike MATHlike Alike formula SELECTlike Plike DIVlike LIlike MAPlike + flags: endO diff --git a/WWW/Library/Implementation/tidy_tls.h b/WWW/Library/Implementation/tidy_tls.h new file mode 100644 index 00000000..25564404 --- /dev/null +++ b/WWW/Library/Implementation/tidy_tls.h @@ -0,0 +1,139 @@ +/* + * $LynxId: tidy_tls.h,v 1.3 2011/05/11 10:53:36 tom Exp $ + * Copyright 2008,2011 Thomas E. Dickey + */ +#ifndef TIDY_TLS_H +#define TIDY_TLS_H + +#include <gnutls/gnutls.h> + +#define OPENSSL_VERSION_NUMBER (0x0090604F) +#define SSLEAY_VERSION_NUMBER OPENSSL_VERSION_NUMBER + +#define SSLeay_add_ssl_algorithms() SSL_library_init() + +#define SSL_ST_OK (1) + +#define SSL_OP_ALL (0x000FFFFF) +#define SSL_OP_NO_SSLv2 (0x00100000) +#define SSL_OP_NO_SSLv3 (0x00200000) +#define SSL_OP_NO_TLSv1 (0x00400000) + +#define SSL_get_cipher_name(ssl) SSL_CIPHER_get_name(SSL_get_current_cipher(ssl)) +#define SSL_get_cipher(ssl) SSL_get_cipher_name(ssl) +#define SSL_get_cipher_bits(ssl,bp) SSL_CIPHER_get_bits(SSL_get_current_cipher(ssl),(bp)) +#define SSL_get_cipher_version(ssl) SSL_CIPHER_get_version(SSL_get_current_cipher(ssl)) + +#define TIDY_TLS_BUFSIZE 256 + +typedef struct { + char common_name[TIDY_TLS_BUFSIZE]; + char country[TIDY_TLS_BUFSIZE]; + char email[TIDY_TLS_BUFSIZE]; + char locality_name[TIDY_TLS_BUFSIZE]; + char organization[TIDY_TLS_BUFSIZE]; + char organizational_unit_name[TIDY_TLS_BUFSIZE]; + char state_or_province_name[TIDY_TLS_BUFSIZE]; +} X509_NAME; + +typedef struct _SSL SSL; + +typedef gnutls_datum_t X509; + +typedef struct { + gnutls_connection_end_t connend; + struct { + int protocol[GNUTLS_MAX_ALGORITHM_NUM]; + int encrypts[GNUTLS_MAX_ALGORITHM_NUM]; + int compress[GNUTLS_MAX_ALGORITHM_NUM]; + int key_xchg[GNUTLS_MAX_ALGORITHM_NUM]; + int msg_code[GNUTLS_MAX_ALGORITHM_NUM]; + } priority; +} SSL_METHOD; + +typedef struct { + SSL *ssl; + int error; + const gnutls_datum_t *cert_list; +#define current_cert cert_list +} X509_STORE_CTX; + +typedef struct { + gnutls_certificate_type_t cert; + gnutls_cipher_algorithm_t encrypts; + gnutls_compression_method_t compress; + gnutls_kx_algorithm_t key_xchg; + gnutls_mac_algorithm_t msg_code; + gnutls_protocol_t protocol; +} SSL_CIPHER; + +typedef struct _SSL_CTX { + SSL_METHOD *method; + char *certfile; + int certfile_type; + char *keyfile; + int keyfile_type; + unsigned long options; + + int (*verify_callback) (int, X509_STORE_CTX *); + int verify_mode; + +} SSL_CTX; + +struct _SSL { + gnutls_session_t gnutls_state; + + gnutls_certificate_client_credentials gnutls_cred; + + SSL_CTX *ctx; + SSL_CIPHER ciphersuite; + + int last_error; + int shutdown; + int state; + unsigned long options; + + int (*verify_callback) (int, X509_STORE_CTX *); + int verify_mode; + + gnutls_transport_ptr_t rfd; + gnutls_transport_ptr_t wfd; +}; + +/* use either SSL_VERIFY_NONE or SSL_VERIFY_PEER, the last 2 options + * are 'ored' with SSL_VERIFY_PEER if they are desired */ +#define SSL_VERIFY_PEER 0x01 +/* *INDENT-OFF* */ +extern SSL *SSL_new(SSL_CTX * ctx); +extern SSL_CIPHER *SSL_get_current_cipher(SSL * ssl); +extern SSL_CTX *SSL_CTX_new(SSL_METHOD * method); +extern SSL_METHOD *SSLv23_client_method(void); +extern const X509 *SSL_get_peer_certificate(SSL * ssl); +extern X509_NAME *X509_get_issuer_name(const X509 * cert); +extern X509_NAME *X509_get_subject_name(const X509 * cert); +extern char *X509_NAME_oneline(X509_NAME * name, char *buf, int len); +extern const char *ERR_error_string(unsigned long e, char *buf); +extern const char *RAND_file_name(char *buf, size_t len); +extern const char *SSL_CIPHER_get_name(SSL_CIPHER * cipher); +extern const char *SSL_CIPHER_get_version(SSL_CIPHER * cipher); +extern int RAND_bytes(unsigned char *buf, int num); +extern int RAND_load_file(const char *name, long maxbytes); +extern int RAND_status(void); +extern int RAND_write_file(const char *name); +extern int SSL_CIPHER_get_bits(SSL_CIPHER * cipher, int *bits); +extern int SSL_CTX_set_default_verify_paths(SSL_CTX * ctx); +extern int SSL_connect(SSL * ssl); +extern int SSL_library_init(void); +extern int SSL_read(SSL * ssl, void *buf, int len); +extern int SSL_set_fd(SSL * ssl, int fd); +extern int SSL_write(SSL * ssl, const void *buf, int len); +extern unsigned long ERR_get_error(void); +extern unsigned long SSL_CTX_set_options(SSL_CTX * ctx, unsigned long options); +extern void RAND_seed(const void *buf, int num); +extern void SSL_CTX_free(SSL_CTX * ctx); +extern void SSL_CTX_set_verify(SSL_CTX * ctx, int verify_mode, int (*verify_callback) (int, X509_STORE_CTX *)); +extern void SSL_free(SSL * ssl); +extern void SSL_load_error_strings(void); +/* *INDENT-ON* */ + +#endif /* TIDY_TLS_H */ diff --git a/WWW/Library/Implementation/www_tcp.h b/WWW/Library/Implementation/www_tcp.h new file mode 100644 index 00000000..63178adf --- /dev/null +++ b/WWW/Library/Implementation/www_tcp.h @@ -0,0 +1,983 @@ +/* System dependencies in the W3 library + * $LynxId: www_tcp.h,v 1.53 2011/05/24 23:48:50 tom Exp $ + * + SYSTEM DEPENDENCIES + + System-system differences for TCP include files and macros. This + file includes for each system the files necessary for network and + file I/O. It should be used in conjunction with HTUtils.h to help + ensure portability across as many platforms and flavors of platforms + as possible. + + AUTHORS + + TBL Tim Berners-Lee, W3 project, CERN, <timbl@info.cern.ch> + EvA Eelco van Asperen <evas@cs.few.eur.nl> + MA Marc Andreessen NCSA + AT Aleksandar Totic <atotic@ncsa.uiuc.edu> + SCW Susan C. Weber <sweber@kyle.eitech.com> + + HISTORY: + 22 Feb 91 Written (TBL) as part of the WWW library. + 16 Jan 92 PC code from EvA + 22 Apr 93 Merged diffs bits from xmosaic release + 29 Apr 93 Windows/NT code from SCW + 20 May 94 A.Harper Add support for VMS CMU TCP/IP transport + 3 Oct 94 A.Harper Add support for VMS SOCKETSHR/NETLIB + 15 Jul 95 S. Bjorndahl Gnu C for VMS Globaldef/ref support + +*/ + +#ifndef TCP_H +#define TCP_H + +/* + +Default values + + These values may be reset and altered by system-specific sections + later on. there are also a bunch of defaults at the end . + + */ +/* Default values of those: */ + /* Routine to close a TCP-IP socket */ +#define NETCLOSE close + /* Routine to read from a TCP-IP socket */ +#define NETREAD(s,p,n) \ + HTDoRead(s, p, (unsigned)(n)) + /* Routine to write to a TCP-IP socket */ +#define NETWRITE(s,p,n) write(s, p, (size_t)(n)) +#define SOCKET_READ(s,b,l) read(s,b,(size_t)(l)) +#define IOCTL(s,cmd,arg) ioctl(s,(long)(cmd),arg) +#define SOCKET_ERRNO errno /* normal socket errno */ + +/* Unless stated otherwise, */ +#define SELECT /* Can handle >1 channel. */ +#define GOT_SYSTEM /* Can call shell with string */ + +#ifdef UNIX +#define GOT_PIPE +#endif /* UNIX */ + +#define INVSOC (-1) /* Unix invalid socket */ + /* NB: newer libwww has something different for Windows */ + +#ifndef VMS + +#include <sys/types.h> + +#if defined(__BORLANDC__) +#define DECL_ERRNO +#endif + +#if defined(__DJGPP__) || defined(__BORLANDC__) +#undef HAVE_DIRENT_H +#define HAVE_DIRENT_H +#undef HAVE_SYS_FILIO_H +#endif /* DJGPP or __BORLANDC__ */ + +#if defined(_MSC_VER) +#undef HAVE_DIRENT_H +#define HAVE_DIRENT_H +#undef HAVE_SYS_FILIO_H +#endif /* _MSC_VER */ + +#ifdef HAVE_DIRENT_H +# include <dirent.h> +# define D_NAMLEN(dirent) strlen((dirent)->d_name) +# define STRUCT_DIRENT struct dirent +#else +# define D_NAMLEN(dirent) (dirent)->d_namlen +# define STRUCT_DIRENT struct direct +# ifdef HAVE_SYS_NDIR_H +# include <sys/ndir.h> +# endif +# ifdef HAVE_SYS_DIR_H +# include <sys/dir.h> +# endif +# ifdef HAVE_NDIR_H +# include <ndir.h> +# endif +#endif /* HAVE_DIRENT_H */ + +#ifdef HAVE_STRUCT_DIRENT64 +# undef STRUCT_DIRENT +# define STRUCT_DIRENT struct dirent64 +#endif + +#if !(defined(DOSPATH) || defined(__EMX__) || defined(__CYGWIN__)) +#define STRUCT_DIRENT__D_INO 1 +#endif + +#endif /* !VMS */ + +#ifdef TIME_WITH_SYS_TIME +# include <sys/time.h> +# include <time.h> +#else +# ifdef HAVE_SYS_TIME_H +# include <sys/time.h> +# else +# include <time.h> +# endif +#endif + +#if defined(_AIX) && !defined(AIX) +#define AIX +#endif /* _AIX */ + +#ifndef __CYGWIN__ +#ifdef WIN_EX +#define HAVE_FTIME 1 +#define HAVE_SYS_TIMEB_H 1 +#endif +#endif /* __CYGWIN__ */ + +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#else +#ifdef HAVE_SYS_FCNTL_H +#include <sys/fcntl.h> +#endif +#endif + +#ifdef HAVE_STRING_H +#include <string.h> /* For bzero etc */ +#endif /* HAVE_STRING_H */ + +/* + + MACROS FOR CONVERTING CHARACTERS + + */ +#ifndef TOASCII +#ifdef EBCDIC /* S/390 -- gil -- 1327 */ + +extern const char un_IBM1047[]; +extern const unsigned char IBM1047[]; + +/* For debugging +#include <assert.h> +#define TOASCII(c) (assert((c)>=0 && (c)<256), un_IBM1047[c]) +*/ +/* for production */ +#define TOASCII(c) (un_IBM1047[c]) + +#define FROMASCII(c) (IBM1047[c]) + +#else /* EBCDIC */ + +#if '0' != 48 +error Host character set is not ASCII. +#endif + +#define TOASCII(c) (c) +#define FROMASCII(c) (c) + +#endif /* EBCDIC */ +#endif /* !TOASCII */ + +/* convert a char to an unsigned, needed if we have signed characters for ctype.h */ +#define UCH(ch) ((unsigned char)(ch)) + +/* + * These parameters were provided by Nigel Horne, using BeOS professional 5.0 + */ +#ifdef __BEOS__ +#undef NETREAD +#undef NETWRITE +#undef NETCLOSE +#define NETREAD(s,b,l) recv((s),(b),(l),0) +#define NETWRITE(s,b,l) send((s),(b),(l),0) +#define NETCLOSE(s) closesocket(s) +#endif + +/* +IBM-PC running Windows NT + + These parameters provided by Susan C. Weber <sweber@kyle.eitech.com>. +*/ + +#ifdef _WINDOWS + +#ifndef _WINDOWS_NSL +#define _WINDOWS_NSL +#endif + +#include <fcntl.h> /* For HTFile.c */ +#include <sys/types.h> /* For HTFile.c */ +#include <sys/stat.h> /* For HTFile.c */ +#undef NETREAD +#undef NETWRITE +#undef NETCLOSE +#undef IOCTL +extern int ws_netread(int fd, char *buf, int len); + +#define NETREAD(s,b,l) ws_netread((s),(b),(l)) /* 1997/11/06 (Thu) */ +#define NETWRITE(s,b,l) send((s),(b),(l),0) +#define NETCLOSE(s) closesocket(s) +#define IOCTL(s,cmd,arg) ioctlsocket(s,cmd,arg) +#include <io.h> +#include <string.h> +#include <process.h> +#include <time.h> +#include <errno.h> +#include <direct.h> + +#ifdef ENABLE_IPV6 +#undef USE_WINSOCK2_H +#define USE_WINSOCK2_H + +/* Avoid including <winsock*.h> in <windows.h> */ +#ifndef WIN32_LEAN_AND_MEAN +#error Define "WIN32_LEAN_AND_MEAN" in your makefile +#endif + +#ifdef _WINSOCKAPI_ +#error windows.h included before winsock2.h +#endif + +#if defined(_MSC_VER) && (!defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0501) +/* + * Needed to pull in the real getaddrinfo() and not the inline version + * in <wspiAPI.H> which doesn't support IPv6 (IPv4 only). <wspiAPI.H> is + * included from <ws2tcpip.h> for <= 0x0500 SDKs. + */ +#undef _WIN32_WINNT +#define _WIN32_WINNT 0x0501 +#endif +#endif /* ENABLE_IPV6 */ + +#ifdef USE_WINSOCK2_H +#include <winsock2.h> /* normally included in windows.h */ + +#ifdef ENABLE_IPV6 +#include <ws2tcpip.h> +#endif + +#undef EINPROGRESS +#undef EALREADY +#undef EISCONN +#undef EINTR +#undef EAGAIN +#undef ENOTCONN +#undef ECONNRESET +#undef ETIMEDOUT + +#define EINPROGRESS WSAEINPROGRESS +#define EALREADY WSAEALREADY +#define EISCONN WSAEISCONN +#define EINTR WSAEINTR +/* fine EAGAIN WSAEAGAIN */ +#define ENOTCONN WSAENOTCONN +#define ECONNRESET WSAECONNRESET +#define ETIMEDOUT WSAETIMEDOUT + +#else /* USE_WINSOCK2_H */ + +#include <winsock.h> + +#if defined(_MSC_VER) || defined(__MINGW32__) +#undef EINTR +#undef EAGAIN +#endif /* _MSC_VER */ + +#define EINPROGRESS (WSABASEERR+36) +#define EALREADY (WSABASEERR+37) +#define EISCONN (WSABASEERR+56) +#define EINTR (WSABASEERR+4) +#define EAGAIN (WSABASEERR+1002) +#define ENOTCONN (WSABASEERR+57) +#define ECONNRESET (WSABASEERR+54) +#define ETIMEDOUT WSAETIMEDOUT + +#endif /* USE_WINSOCK2_H */ + +#undef SOCKET_ERRNO +#define SOCKET_ERRNO WSAGetLastError() + +#define INCLUDES_DONE +#define TCP_INCLUDES_DONE +#endif /* WINDOWS */ + +/* + +VAX/VMS + + Under VMS, there are many versions of TCP-IP. Define one if you do + not use Digital's UCX product: + + UCX DEC's "Ultrix connection" (default) + CMU_TCP Available via FTP from sacusr.mp.usbr.gov + SOCKETSHR Eckhart Meyer's interface to NETLIB + WIN_TCP From Wollongong, now GEC software. + MULTINET From SRI, became TGV, then Cisco. + DECNET Cern's TCP socket emulation over DECnet + TCPIP_SERVICES TCP/IP Services (newer than UCX) + + WIN_TCP, MULTINET and DECNET do not interfere with the + unix i/o library, and so they need special calls to read, write and + close sockets. In these cases the socket number is a VMS channel + number, so we make the @@@ HORRIBLE @@@ assumption that a channel + number will be greater than 10 but a unix file descriptor less than + 10. It works. + + */ +#ifdef VMS + +#ifdef UCX +#undef IOCTL +#define IOCTL(s,cmd,arg) HTioctl(s,cmd,arg) +#endif /* UCX */ + +#ifdef WIN_TCP +#undef SOCKET_READ +#undef NETWRITE +#undef NETCLOSE +#define SOCKET_READ(s,b,l) ((s)>10 ? netread((s),(b),(l)) : read((s),(b),(l))) +#define NETWRITE(s,b,l) ((s)>10 ? netwrite((s),(b),(l)) : write((s),(b),(l))) +#define NETCLOSE(s) ((s)>10 ? netclose(s) : close(s)) +#undef IOCTL +#define IOCTL(a,b,c) -1 /* disables ioctl function */ +#define NO_IOCTL /* flag to check if ioctl is disabled */ +#endif /* WIN_TCP */ + +#ifdef CMU_TCP +#undef SOCKET_READ +#undef NETREAD +#undef NETWRITE +#undef NETCLOSE +#define SOCKET_READ(s,b,l) (cmu_get_sdc((s)) != 0 ? cmu_read((s),(b),(l)) : read((s),(b),(l))) +#define NETREAD(s,b,l) (cmu_get_sdc((s)) != 0 ? HTDoRead((s),(b),(l)) : read((s),(b),(l))) +#define NETWRITE(s,b,l) (cmu_get_sdc((s)) != 0 ? cmu_write((s),(b),(l)) : write((s),(b),(l))) +#define NETCLOSE(s) (cmu_get_sdc((s)) != 0 ? cmu_close((s)) : close((s))) +#endif /* CMU_TCP */ + +#ifdef MULTINET +#undef NETCLOSE +#undef SOCKET_READ +#undef NETWRITE +#undef IOCTL +#undef SOCKET_ERRNO +/* + * Delete these socket_foo() prototypes as MultiNet adds them + * to it's socket library headers. Compiler warnings due to + * the absence of arguments in the generic prototypes here will + * include the names of those which can be deleted. - FM + */ +extern int socket_read(); +extern int socket_write(); +extern int socket_close(); +extern int socket_ioctl(); + +#define SOCKET_READ(s,b,l) ((s)>10 ? socket_read((s),(b),(l)) : \ + read((s),(b),(l))) +#define NETWRITE(s,b,l) ((s)>10 ? socket_write((s),(b),(l)) : \ + write((s),(b),(l))) +#define NETCLOSE(s) ((s)>10 ? socket_close(s) : close(s)) +#define IOCTL(s,cmd,arg) socket_ioctl(s,cmd,arg) +#define SOCKET_ERRNO socket_errno +#endif /* MULTINET */ + +#ifdef SOCKETSHR_TCP +#undef SOCKET_READ +#undef NETREAD +#undef NETWRITE +#undef NETCLOSE +#undef IOCTL +#define SOCKET_READ(s,b,l) (si_get_sdc((s)) != 0 ? si_read((s),(b),(l)) : \ + read((s),(b),(l))) +#define NETREAD(s,b,l) (si_get_sdc((s)) != 0 ? HTDoRead((s),(b),(l)) : \ + read((s),(b),(l))) +#define NETWRITE(s,b,l) (si_get_sdc((s)) != 0 ? si_write((s),(b),(l)) : \ + write((s),(b),(l))) +#define NETCLOSE(s) (si_get_sdc((s)) != 0 ? si_close((s)) : close((s))) +#define IOCTL(s,cmd,arg) si_ioctl(s,cmd,arg) +#endif /* SOCKETSHR_TCP */ + +#ifdef TCPIP_SERVICES +/* + * TCPIP Services has all of the entrypoints including ioctl(). + */ +#undef NETWRITE +#define NETWRITE(s,b,l) send((s),(char *)(b),(l),0) + +#define TYPE_FD_SET int + +#if 0 /* this should be declared via time.h */ +typedef TYPE_FD_SET fd_set; +#endif + +#endif /* TCPIP_SERVICES */ + +#include <string.h> + +#include <file.h> +#include <stat.h> +#include <unixio.h> +#include <unixlib.h> + +#define INCLUDES_DONE + +#ifdef MULTINET /* Include from standard Multinet directories */ +/* + * Delete any of these multinet_foo() and associated prototypes + * as MultiNet adds them to its socket library headers. You'll + * get compiler warnings about them, due the absence of arguments + * in the generic prototyping here, and the warnings will include + * the names of the functions whose prototype entries can be + * deleted here. - FM + */ +extern int multinet_accept(); +extern int multinet_bind(); +extern int multinet_connect(); +extern int multinet_gethostname(); +extern int multinet_getsockname(); +extern unsigned short multinet_htons(unsigned short __val); +extern unsigned short multinet_ntohs(unsigned short __val); +extern int multinet_listen(); +extern int multinet_select(); +extern int multinet_socket(); +extern char *vms_errno_string(); + +#ifndef __SOCKET_TYPEDEFS +#define __SOCKET_TYPEDEFS 1 +#endif /* !__SOCKET_TYPEDEFS */ + +#include <time.h> +#include <types.h> +/* + * DEC C before version 5.2 added some typedefs to <types.h> which happen + * to be suppressed if the version-4 compatibility define is set. In + * particular, lynx uses "off_t". VAX-C used "unsigned", DEC-C uses "int". + */ +#if defined(_DECC_V4_SOURCE) && !defined(____OFF_T) +#undef off_t +#define off_t int +#endif + +#ifdef __TIME_T +#undef __TYPES +#define __TYPES 1 +#define __TYPES_LOADED 1 +#endif /* __TIME_T */ + +#ifdef __SOCKET_TYPEDEFS +#undef __SOCKET_TYPEDEFS +#endif /* __SOCKET_TYPEDEFS */ + +#include "multinet_root:[multinet.include.sys]types.h" + +#ifndef __SOCKET_TYPEDEFS +#define __SOCKET_TYPEDEFS 1 +#endif /* !__SOCKET_TYPEDEFS */ + +#include "multinet_root:[multinet.include]errno.h" + +#ifdef __TYPES +#undef __TIME_T +#define __TIME_T 1 +#endif /* __TYPE */ + +#ifdef __TIME_LOADED +#undef __TIME +#define __TIME 1 /* to avoid double definitions in in.h */ +#endif /* __TIME_LOADED */ + +#include "multinet_root:[multinet.include.sys]time.h" + +#define MULTINET_NO_PROTOTYPES /* DECC is compatible-but-different */ +#include "multinet_root:[multinet.include.sys]socket.h" +#undef MULTINET_NO_PROTOTYPES +#include "multinet_root:[multinet.include.netinet]in.h" +#include "multinet_root:[multinet.include.arpa]inet.h" +#include "multinet_root:[multinet.include]netdb.h" +#include "multinet_root:[multinet.include.sys]ioctl.h" +#define TCP_INCLUDES_DONE +/* + * Uncomment this if you get compiler messages + * about struct timeval having no linkage. - FM + */ +/*#define NO_TIMEVAL*/ +#ifdef NO_TIMEVAL +struct timeval { + long tv_sec; /* seconds since Jan. 1, 1970 */ + long tv_usec; /* microseconds */ +}; +#endif /* NO_TIMEVAL */ +#endif /* MULTINET */ + +#ifdef DECNET +#include <types.h> +#include <errno.h> +#include <time.h> +#include <types.h> /* for socket.h */ +#include <socket.h> +#include <dn> +#include <dnetdb> +/* #include "vms.h" */ +#define TCP_INCLUDES_DONE +#endif /* DECNET */ + +#ifdef UCX +#include <types.h> +#include <errno.h> +#include <time.h> +#include <socket.h> +#include <in.h> +#include <inet.h> +#if defined(TCPWARE) && !defined(__DECC) +#include "tcpware_include:netdb.h" +#include "tcpware_include:ucx$inetdef.h" +#else +#include <netdb.h> +#ifdef MUCX +#include <multinet_root:[multinet.include.vms]ucx$inetdef.h> +#else +#include <ucx$inetdef.h> +#endif /* MUCX */ +#endif /* TCPWARE */ +#define TCP_INCLUDES_DONE +#endif /* UCX */ + +#ifdef CMU_TCP +#include <types.h> +#include <errno.h> +#include "cmuip_root:[syslib]time.h" +#include "cmuip_root:[syslib]socket.h" +#include <in.h> +#include <inet.h> +#include <netdb.h> +#include "cmuip_root:[syslib]ioctl.h" +#define TCP_INCLUDES_DONE +#endif /* CMU_TCP */ + +#ifdef SOCKETSHR_TCP +#include <types.h> +#include <errno.h> +#include <time.h> +#include <socket.h> +#include <in.h> +#include <inet.h> +#include <netdb.h> +#include "socketshr_library:socketshr.h" +#include "socketshr_library:ioctl.h" +#define TCP_INCLUDES_DONE +#endif /* SOCKETSHR_TCP */ + +#ifdef TCPIP_SERVICES +#include <types.h> +#include <errno.h> +#include <time.h> +#include <ioctl.h> +#include <socket.h> +#include <in.h> +#include <inet.h> +#include <netdb.h> +#define TCP_INCLUDES_DONE +#endif /* TCPIP_SERVICES */ + +#ifdef WIN_TCP +#include <types.h> +#include <errno.h> +#include <time.h> +#include <socket.h> +#include <in.h> +#include <inet.h> +#include <netdb.h> +#ifndef NO_IOCTL +#include <ioctl.h> +#endif /* !NO_IOCTL */ +#define TCP_INCLUDES_DONE +#endif /* WIN_TCP */ + +#ifndef TCP_INCLUDES_DONE +#include <types.h> +#include <errno.h> +#include <time.h> +#ifdef VMS_SOCKET_HEADERS +/* + * Not all versions of VMS have the full set of headers + * for socket library functions, because the TCP/IP + * packages were layered products. If we want these + * specifically, instead of those for the above packages, + * the module should be compiled with VMS_SOCKET_HEADERS + * defined instead of layered product definitions, above. + * If the module is not using socket library functions, + * none of the definitions need be used, and we include + * only the above three headers. - FM + */ +#include <socket.h> +#include <in.h> +#include <inet.h> +#include <netdb.h> +#include <ioctl.h> +#endif /* VMS_SOCKET_HEADERS */ +#define TCP_INCLUDES_DONE +#endif /* !TCP_INCLUDES_DONE */ + +/* + * On VMS machines, the linker needs to be told to put global data sections + * into a data segment using these storage classes. (MarkDonszelmann) + */ +#if defined(VAXC) && !defined(__DECC) +#define GLOBALDEF globaldef +#define GLOBALREF globalref +#else +#ifdef __GNUC__ /* this added by Sterling Bjorndahl */ +#define GLOBALREF_IS_MACRO 1 +#define GLOBALDEF_IS_MACRO 1 +#include <gnu_hacks.h> /* defines GLOBALREF and GLOBALDEF for GNUC on VMS */ +#endif /* __GNUC__ */ +#endif /* VAXC && !DECC */ + +#include <perror.h> +#ifndef errno +extern int errno; +#endif /* !errno */ + +#endif /* VMS */ + +/* + * On non-VMS machines and for DECC on VMS, the GLOBALDEF and GLOBALREF + * storage types default to normal C storage types. + */ +#ifndef GLOBALREF +#define GLOBALDEF +#define GLOBALREF extern +#endif /* !GLOBALREF */ + +#ifdef __DJGPP__ +#undef SELECT +#define TCP_INCLUDES_DONE +#undef IOCTL +#define IOCTL(s,cmd,arg) ioctlsocket(s,cmd,(char*)(arg)) +#define DECL_ERRNO +#include <errno.h> +#include <sys/types.h> +#include <io.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <arpa/inet.h> +#include <tcp.h> +#ifdef word +#undef word +#endif /* word */ +#ifdef set_timeout +#undef set_timeout +#endif /* set_timeout */ +#define select select_s + +#undef NETWRITE +#define NETWRITE write_s +#undef NETREAD +#define NETREAD read_s +#undef NETCLOSE +#define NETCLOSE close_s +#ifdef UNIX +#undef UNIX +#endif /* UNIX */ +#ifdef HAVE_GETTEXT +#define gettext gettext__ +#endif +#if !defined(NCURSES) && !defined(USE_SLANG) +#define HAVE_CBREAK 1 +#endif /* !NCURSES && !USE_SLANG */ +#if defined(USE_SLANG) && !defined(NO_DJ_KEYHANDLER) && defined(HAVE_CONFIG_H) +#define DJGPP_KEYHANDLER +#endif /* USE_SLANG && !NO_DJ_KEYHANDLER && HAVE_CONFIG_H */ +#endif /* DJGPP */ + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif /* HAVE_UNISTD_H */ + +#ifdef HAVE_SYS_FILIO_H +#include <sys/filio.h> +#endif /* HAVE_SYS_FILIO_H */ + +#if !defined(HAVE_LSTAT) && !defined(lstat) +#define lstat(path,block) stat(path,block) +#endif + +#if defined(DECL_ERRNO) && !defined(errno) +extern int errno; +#endif /* DECL_ERRNO */ + +/* +Regular BSD unix versions +========================= + These are a default unix where not already defined specifically. + */ +#ifndef INCLUDES_DONE +#include <sys/types.h> +#ifdef HAVE_STRING_H +#include <string.h> +#endif /* HAVE_STRING_H */ +#include <errno.h> /* independent */ +#ifdef __MVS__ /* S/390 -- gil -- 1361 */ +#include <time.h> +#endif /* __MVS__ */ +#ifdef SCO +#include <sys/timeb.h> +#include <time.h> +#endif /* SCO */ +#if defined(AIX) || defined(SVR4) +#include <time.h> +#endif /* AIX || SVR4 */ +#include <sys/time.h> /* independent */ +#include <sys/stat.h> +#ifndef __MVS__ /* S/390 -- gil -- 1373 */ +#include <sys/param.h> +#endif /* __MVS__ */ +#include <sys/file.h> /* For open() etc */ + +#if defined(NeXT) || defined(sony_news) +#ifndef mode_t +typedef unsigned short mode_t; +#endif /* !mode_t */ + +#endif /* NeXT || sony_news */ + +#define INCLUDES_DONE +#endif /* Normal includes */ + +/* FIXME: this should be autoconf'd */ +/* Interactive UNIX for i386 and i486 -- Thanks to jeffrey@itm.itm.org */ +#ifdef ISC +#include <net/errno.h> +#include <sys/types.h> +#include <sys/tty.h> +#include <sys/sioctl.h> +#include <sys/bsdtypes.h> +#ifndef MERGE +#define MERGE +#include <sys/pty.h> +#undef MERGE +#else +#include <sys/pty.h> +#endif /* !MERGE */ +#ifndef USE_DIRENT +#define USE_DIRENT /* sys V style directory open */ +#endif /* USE_DIRENT */ +#include <sys/dirent.h> +#endif /* ISC */ + +/* Directory reading stuff - BSD or SYS V +*/ +#ifdef HAVE_CONFIG_H + +# ifdef HAVE_LIMITS_H +# include <limits.h> +# endif /* HAVE_LIMITS_H */ +# if !defined(MAXINT) && defined(INT_MAX) +# define MAXINT INT_MAX +# endif /* !MAXINT && INT_MAX */ + +#else + +#if !(defined(VM) || defined(VMS) || defined(THINK_C) || defined(PCNFS) || defined(_WINDOWS)) +#define DECL_SYS_ERRLIST 1 +#endif + +#if defined(VMS) +#define socklen_t unsigned +#else +#define socklen_t int /* used for default LY_SOCKLEN definition */ +#endif + +#endif /* !HAVE_CONFIG_H */ + +#ifdef HAVE_LIBINTL_H +#include <libintl.h> +#endif + +#ifdef HAVE_LIBGETTEXT_H +#include <libgettext.h> +#endif + +#define N_(s) s + +#ifndef HAVE_GETTEXT +#define gettext(s) s +#endif + +#ifndef NLS_TEXTDOMAIN +#define NLS_TEXTDOMAIN "lynx" +#endif + +/* +Defaults +======== + INCLUDE FILES FOR TCP + */ +#ifndef TCP_INCLUDES_DONE +#ifndef NO_IOCTL +#include <sys/ioctl.h> /* EJB */ +#endif /* !NO_IOCTL */ +#include <sys/socket.h> +#include <netinet/in.h> +#ifdef HAVE_ARPA_INET_H +#include <arpa/inet.h> /* Must be after netinet/in.h */ +#endif +#include <netdb.h> +#endif /* TCP includes */ + +typedef unsigned short PortNumber; + +#ifndef S_ISLNK +#define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK) +#endif /* S_ISLNK */ + +#ifndef S_ISDIR +#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) +#endif /* S_ISDIR */ + +#ifndef S_ISREG +#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +#endif /* S_ISREG */ + +#ifndef S_ISUID +#define S_ISUID 0004000 +#endif +#ifndef S_ISGID +#define S_ISGID 0002000 +#endif +#ifndef S_ISVTX +#define S_ISVTX 0001000 +#endif + +#ifndef S_IRWXU +#define S_IRWXU 00700 +#endif + +#ifndef S_IRUSR +#define S_IRUSR 00400 +#endif +#ifndef S_IWUSR +#define S_IWUSR 00200 +#endif +#ifndef S_IXUSR +#define S_IXUSR 00100 +#endif + +#ifndef S_IRWXG +#define S_IRWXG 00070 +#endif + +#ifndef S_IRGRP +#define S_IRGRP 00040 +#endif +#ifndef S_IWGRP +#define S_IWGRP 00020 +#endif +#ifndef S_IXGRP +#define S_IXGRP 00010 +#endif + +#ifndef S_IRWXO +#define S_IRWXO 00007 +#endif + +#ifndef S_IROTH +#define S_IROTH 00004 +#endif +#ifndef S_IWOTH +#define S_IWOTH 00002 +#endif +#ifndef S_IXOTH +#define S_IXOTH 00001 +#endif + +/* + +ROUGH ESTIMATE OF MAX PATH LENGTH + +*/ +#ifndef HT_MAX_PATH +#ifdef MAXPATHLEN +#define HT_MAX_PATH MAXPATHLEN +#else +#ifdef PATH_MAX +#define HT_MAX_PATH PATH_MAX +#else +#define HT_MAX_PATH 1024 /* Any better ideas? */ +#endif +#endif +#endif /* HT_MAX_PATH */ + +#if HT_MAX_PATH < 256 +#undef HT_MAX_PATH +#define HT_MAX_PATH 256 +#endif + +/* + MACROS FOR MANIPULATING MASKS FOR SELECT() + */ +#ifdef SELECT +#ifndef FD_SET +#ifndef TYPE_FD_SET +#define TYPE_FD_SET unsigned +typedef TYPE_FD_SET fd_set; +#endif /* !TYPE_FD_SET */ + +#define FD_SET(fd,pmask) (*(pmask)) |= (1 << (fd)) +#define FD_CLR(fd,pmask) (*(pmask)) &= ~(1 << (fd)) +#define FD_ZERO(pmask) (*(pmask)) = 0 +#define FD_ISSET(fd,pmask) (*(pmask) & (1 << (fd))) +#endif /* !FD_SET */ +#endif /* SELECT */ + +/* + * Macro for setting errno - only define this if you really can do it. + */ +#if defined(CAN_SET_ERRNO) || (!defined(errno) && (!defined(VMS) || defined(UCX))) +#define set_errno(value) errno = value +#else +#define set_errno(value) /* we do not know how */ +#endif + +/* + * IPv6 support + */ +#if defined(HAVE_GETADDRINFO) && defined(ENABLE_IPV6) +#if defined(HAVE_GAI_STRERROR) +#define INET6 +#elif defined(_WINDOWS) +#define INET6 +#ifndef WIN_EX +#error Define "WIN_EX" in your makefile. +#endif +#ifndef _MSC_VER /* MSVC has this inlined in <ws2tcpip.h> */ +#undef gai_strerror +#define gai_strerror(err) w32_strerror (err) +#endif +#endif +#endif /* HAVE_GETADDRINFO && ENABLE_IPV6 */ + +#ifdef INET6 +typedef struct sockaddr_storage SockA; + +#ifdef SIN6_LEN +#define SOCKADDR_LEN(soc_address) (((struct sockaddr *)&soc_address)->sa_len) +#else +#ifndef SA_LEN +#define SA_LEN(x) (((x)->sa_family == AF_INET6) \ + ? sizeof(struct sockaddr_in6) \ + : (((x)->sa_family == AF_INET) \ + ? sizeof(struct sockaddr_in) \ + : sizeof(struct sockaddr))) /* AF_UNSPEC? */ +#endif +#define SOCKADDR_LEN(soc_address) (socklen_t) (SA_LEN((struct sockaddr *)&soc_address)) +#endif /* SIN6_LEN */ +#else +typedef struct sockaddr_in SockA; + +#define SOCKADDR_LEN(soc_address) sizeof(soc_address) +#endif /* INET6 */ + +#ifndef MAXHOSTNAMELEN +#define MAXHOSTNAMELEN 128 /* Max label is 63. Should handle 2 of those */ +#endif /* MAXHOSTNAMELEN */ + +#endif /* TCP_H */ diff --git a/WWW/Library/Implementation/www_wait.h b/WWW/Library/Implementation/www_wait.h new file mode 100644 index 00000000..3e40e889 --- /dev/null +++ b/WWW/Library/Implementation/www_wait.h @@ -0,0 +1,34 @@ +#ifndef WWW_WAIT_H +#define WWW_WAIT_H 1 + +#include <HTUtils.h> + +#ifdef HAVE_SYS_WAIT_H +#include <sys/wait.h> +#endif + +#ifndef WEXITSTATUS +# ifdef HAVE_TYPE_UNIONWAIT +# define WEXITSTATUS(status) (status.w_retcode) +# else +# define WEXITSTATUS(status) (((status) & 0xff00) >> 8) +# endif +#endif + +#ifndef WTERMSIG +# ifdef HAVE_TYPE_UNIONWAIT +# define WTERMSIG(status) (status.w_termsig) +# else +# define WTERMSIG(status) ((status) & 0x7f) +# endif +#endif + +#ifndef WSTOPSIG +# ifdef HAVE_TYPE_UNIONWAIT +# define WSTOPSIG(status) (status.w_stopsig) +# else +# define WSTOPSIG(status) WEXITSTATUS(status) +# endif +#endif + +#endif /* WWW_WAIT_H */ |