about summary refs log blame commit diff stats
path: root/WWW/Library/Implementation/HTWSRC.c
blob: ba12cc713f05b8676c425d64c88a1217edb8fbcb (plain) (tree)
1
2
  
                                                         


























































































































































































                                                                              




                                                                                        
         













































































































                                                                           
                                       





































































































































































                                                                             













                                                                                
/*
 * $LynxId: HTWSRC.c,v 1.31 2020/01/21 22:05:46 tom Exp $
 *
 *			Parse WAIS Source file			HTWSRC.c
 *			======================
 *
 *	This module parses a stream with WAIS source file
 *	format information on it and creates a structured stream.
 *	That structured stream is then converted into whatever.
 *
 *	3 June 93	Bug fix: Won't crash if no description
 */

#define HTSTREAM_INTERNAL 1

#include <HTUtils.h>

#include <HTWSRC.h>
#include <LYUtils.h>

#include <HTML.h>
#include <HTParse.h>

#include <LYLeaks.h>

#define BIG 10000		/* Arbitrary limit to value length */
#define PARAM_MAX BIG
#define CACHE_PERIOD (7*86400)	/* Time to keep .src file in seconds */

struct _HTStructured {
    const HTStructuredClass *isa;
    /* ... */
};

#define PUTC(c) (*me->target->isa->put_character)(me->target, c)
#define PUTS(s) (*me->target->isa->put_string)(me->target, s)
#define START(e) (*me->target->isa->start_element)(me->target, e, 0, 0, -1, 0)
#define END(e) (*me->target->isa->end_element)(me->target, e, 0)
#define MAYBE_END(e) if (HTML_dtd.tags[e].contents != SGML_EMPTY) \
			(*me->target->isa->end_element)(me->target, e, 0)

/*	Here are the parameters which can be specified in a  source file
*/
static const char *par_name[] =
{
    "version",
    "ip-address",
#define PAR_IP_NAME 2
    "ip-name",
#define PAR_TCP_PORT 3
    "tcp-port",
#define PAR_DATABASE_NAME 4
    "database-name",
#define PAR_COST 5
    "cost",
#define PAR_COST_UNIT 6
    "cost-unit",
#define PAR_FREE 7
    "free",
#define PAR_MAINTAINER 8
    "maintainer",
#define PAR_DESCRIPTION 9
    "description",
    "keyword-list",
    "source",
    "window-geometry",
    "configuration",
    "script",
    "update-time",
    "contact-at",
    "last-contacted",
    "confidence",
    "num-docs-to-request",
    "font",
    "font-size",
#define PAR_UNKNOWN 22
    "unknown",
    0,				/* Terminate list */
#define PAR_COUNT 23
};

enum tokenstate {
    beginning,
    before_tag,
    colon,
    before_value,
    value,
    bracketed_value,
    quoted_value,
    escape_in_quoted,
    done
};

/*		Stream Object
 *		------------
 *
 *	The target is the structured stream down which the
 *	parsed results will go.
 *
 *	all the static stuff below should go in here to make it reentrant
 */

struct _HTStream {
    const HTStreamClass *isa;
    HTStructured *target;
    char *par_value[PAR_COUNT];
    enum tokenstate state;
    char param[BIG + 1];
    int param_number;
    int param_count;
};

/*	Decode one hex character
*/
char from_hex(char c)
{
    return (char) ((c >= '0') && (c <= '9') ? c - '0'
		   : (c >= 'A') && (c <= 'F') ? c - 'A' + 10
		   : (c >= 'a') && (c <= 'f') ? c - 'a' + 10
		   : 0);
}

/*			State machine
 *			-------------
 *
 * On entry,
 *	me->state	is a valid state (see WSRC_init)
 *	c		is the next character
 * On exit,
 *	returns 1	Done with file
 *		0	Continue. me->state is updated if necessary.
 *		-1	Syntax error error
 */

/*		Treat One Character
 *		-------------------
 */
static void WSRCParser_put_character(HTStream *me, int c)
{
    switch (me->state) {
    case beginning:
	if (c == '(')
	    me->state = before_tag;
	break;

    case before_tag:
	if (c == ')') {
	    me->state = done;
	    return;		/* Done with input file */
	} else if (c == ':') {
	    me->param_count = 0;
	    me->state = colon;
	}			/* Ignore other text */
	break;

    case colon:
	if (WHITE(c)) {
	    me->param[me->param_count++] = 0;	/* Terminate */
	    for (me->param_number = 0;
		 par_name[me->param_number];
		 me->param_number++) {
		if (0 == strcmp(par_name[me->param_number], me->param)) {
		    break;
		}
	    }
	    if (!par_name[me->param_number]) {	/* Unknown field */
		CTRACE((tfp, "HTWSRC: Unknown field `%s' in source file\n",
			me->param));
		me->param_number = PAR_UNKNOWN;
		me->state = before_value;	/* Could be better ignore */
		return;
	    }
	    me->state = before_value;
	} else {
	    if (me->param_count < PARAM_MAX)
		me->param[me->param_count++] = (char) c;
	}
	break;

    case before_value:
	if (c == ')') {
	    me->state = done;
	    return;		/* Done with input file */
	}
	if (WHITE(c))
	    return;		/* Skip white space */
	me->param_count = 0;
	if (c == '"') {
	    me->state = quoted_value;
	} else {
	    me->state = ((c == '(')
			 ? bracketed_value
			 : value);
	    me->param[me->param_count++] = (char) c;	/* Don't miss first character */
	}
	break;

    case value:
	if (WHITE(c)) {
	    me->param[me->param_count] = 0;
	    StrAllocCopy(me->par_value[me->param_number], me->param);
	    me->state = before_tag;
	} else {
	    if (me->param_count < PARAM_MAX)
		me->param[me->param_count++] = (char) c;
	}
	break;

    case bracketed_value:
	if (c == ')') {
	    me->param[me->param_count] = 0;
	    StrAllocCopy(me->par_value[me->param_number], me->param);
	    me->state = before_tag;
	    break;
	}
	if (me->param_count < PARAM_MAX)
	    me->param[me->param_count++] = (char) c;
	break;

    case quoted_value:
	if (c == '"') {
	    me->param[me->param_count] = 0;
	    StrAllocCopy(me->par_value[me->param_number], me->param);
	    me->state = before_tag;
	    break;
	}

	if (c == '\\') {	/* Ignore escape but switch state */
	    me->state = escape_in_quoted;
	    break;
	}
	/* Fall through! */

    case escape_in_quoted:
	if (me->param_count < PARAM_MAX)
	    me->param[me->param_count++] = (char) c;
	me->state = quoted_value;
	break;

    case done:			/* Ignore anything after EOF */
	return;

    }				/* switch me->state */
}

/*			Open Cache file
 *			===============
 *
 *   Bugs: Maybe for filesystem-challenged platforms (MSDOS for example) we
 *   should make a hash code for the filename.
 */

#ifdef CACHE_FILE_PREFIX
static BOOL write_cache(HTStream *me)
{
    FILE *fp;
    char *cache_file_name = NULL;
    char *www_database;
    int result = NO;

    if (!me->par_value[PAR_DATABASE_NAME]
	|| !me->par_value[PAR_IP_NAME]
	)
	return NO;

    www_database = HTEscape(me->par_value[PAR_DATABASE_NAME], URL_XALPHAS);
    HTSprintf0(&cache_file_name, "%sWSRC-%s:%s:%.100s.txt",
	       CACHE_FILE_PREFIX,
	       me->par_value[PAR_IP_NAME],
	       (me->par_value[PAR_TCP_PORT]
		? me->par_value[PAR_TCP_PORT]
		: "210"),
	       www_database);

    if ((fp = fopen(cache_file_name, TXT_W)) != 0) {
	result = YES;
	if (me->par_value[PAR_DESCRIPTION])
	    fputs(me->par_value[PAR_DESCRIPTION], fp);
	else
	    fputs("Description not available\n", fp);
	fclose(fp);
    }
    FREE(www_database);
    FREE(cache_file_name);
    return result;
}
#endif

/*			Output equivalent HTML
 *			----------------------
 *
 */

static void give_parameter(HTStream *me, int p)
{
    PUTS(par_name[p]);
    if (me->par_value[p]) {
	PUTS(": ");
	PUTS(me->par_value[p]);
	PUTS("; ");
    } else {
	PUTS(gettext(" NOT GIVEN in source file; "));
    }
}

/*			Generate Output
 *			===============
 */
static void WSRC_gen_html(HTStream *me, int source_file)
{
    if (me->par_value[PAR_DATABASE_NAME]) {
	char *shortname = 0;
	int l;

	StrAllocCopy(shortname, me->par_value[PAR_DATABASE_NAME]);
	l = (int) strlen(shortname);
	if (l > 4 && !strcasecomp(shortname + l - 4, ".src")) {
	    shortname[l - 4] = 0;	/* Chop of .src -- boring! */
	}

	START(HTML_HEAD);
	PUTC('\n');
	START(HTML_TITLE);
	PUTS(shortname);
	PUTS(source_file ? gettext(" WAIS source file") : INDEX_SEGMENT);
	END(HTML_TITLE);
	PUTC('\n');
	END(HTML_HEAD);

	START(HTML_H1);
	PUTS(shortname);
	PUTS(source_file ? gettext(" description") : INDEX_SEGMENT);
	END(HTML_H1);
	PUTC('\n');
	FREE(shortname);
    }

    START(HTML_DL);		/* Definition list of details */

    if (source_file) {
	START(HTML_DT);
	PUTS(gettext("Access links"));
	MAYBE_END(HTML_DT);
	START(HTML_DD);
	if (me->par_value[PAR_IP_NAME] &&
	    me->par_value[PAR_DATABASE_NAME]) {

	    char *WSRC_address = NULL;
	    char *www_database;

	    www_database = HTEscape(me->par_value[PAR_DATABASE_NAME],
				    URL_XALPHAS);
	    HTSprintf0(&WSRC_address, "%s//%s%s%s/%s",
		       STR_WAIS_URL,
		       me->par_value[PAR_IP_NAME],
		       me->par_value[PAR_TCP_PORT] ? ":" : "",
		       (me->par_value[PAR_TCP_PORT]
			? me->par_value[PAR_TCP_PORT]
			: ""),
		       www_database);

	    HTStartAnchor(me->target, NULL, WSRC_address);
	    PUTS(gettext("Direct access"));
	    END(HTML_A);
	    /** Proxy will be used if defined, so let user know that - FM **/
	    PUTS(gettext(" (or via proxy server, if defined)"));

	    FREE(www_database);
	    FREE(WSRC_address);

	} else {
	    give_parameter(me, PAR_IP_NAME);
	    give_parameter(me, PAR_DATABASE_NAME);
	}
	MAYBE_END(HTML_DD);

    }
    /* end if source_file */
    if (me->par_value[PAR_MAINTAINER]) {
	START(HTML_DT);
	PUTS(gettext("Maintainer"));
	MAYBE_END(HTML_DT);
	START(HTML_DD);
	PUTS(me->par_value[PAR_MAINTAINER]);
	MAYBE_END(HTML_DD);
    }
    if (me->par_value[PAR_IP_NAME]) {
	START(HTML_DT);
	PUTS(gettext("Host"));
	MAYBE_END(HTML_DT);
	START(HTML_DD);
	PUTS(me->par_value[PAR_IP_NAME]);
	MAYBE_END(HTML_DD);
    }

    END(HTML_DL);

    if (me->par_value[PAR_DESCRIPTION]) {
	START(HTML_PRE);	/* Preformatted description */
	PUTS(me->par_value[PAR_DESCRIPTION]);
	END(HTML_PRE);
    }

    (*me->target->isa->_free) (me->target);

    return;
}				/* generate html */

static void WSRCParser_put_string(HTStream *context, const char *str)
{
    const char *p;

    for (p = str; *p; p++)
	WSRCParser_put_character(context, *p);
}

static void WSRCParser_write(HTStream *context, const char *str,
			     int l)
{
    const char *p;
    const char *e = str + l;

    for (p = str; p < e; p++)
	WSRCParser_put_character(context, *p);
}

static void WSRCParser_free(HTStream *me)
{
    WSRC_gen_html(me, YES);
#ifdef CACHE_FILE_PREFIX
    write_cache(me);
#endif
    {
	int p;

	for (p = 0; par_name[p]; p++) {		/* Clear out old values */
	    FREE(me->par_value[p]);
	}
    }
    FREE(me);
}

static void WSRCParser_abort(HTStream *me, HTError e GCC_UNUSED)
{
    WSRCParser_free(me);
}

/*		Stream subclass		-- method routines
 *		---------------
 */

static HTStreamClass WSRCParserClass =
{
    "WSRCParser",
    WSRCParser_free,
    WSRCParser_abort,
    WSRCParser_put_character,
    WSRCParser_put_string,
    WSRCParser_write
};

/*		Converter from WAIS Source to whatever
 *		--------------------------------------
 */
HTStream *HTWSRCConvert(HTPresentation *pres, HTParentAnchor *anchor,
			HTStream *sink)
{
    HTStream *me = (HTStream *) malloc(sizeof(*me));

    if (!me)
	outofmem(__FILE__, "HTWSRCConvert");

    me->isa = &WSRCParserClass;
    me->target = HTML_new(anchor, pres->rep_out, sink);

    {
	int p;

	for (p = 0; p < PAR_COUNT; p++) {	/* Clear out parameter values */
	    me->par_value[p] = 0;
	}
    }
    me->state = beginning;

    return me;
}