From e087f6d44e87f489fcb3056e86319ebba4218156 Mon Sep 17 00:00:00 2001 From: "Thomas E. Dickey" Date: Mon, 2 Sep 1996 19:39:24 -0400 Subject: snapshot of project "lynx", label v2_6 --- WWW/Library/Implementation/HTFormat.c | 836 ++++++++++++++++++++++++++++++++++ 1 file changed, 836 insertions(+) create mode 100644 WWW/Library/Implementation/HTFormat.c (limited to 'WWW/Library/Implementation/HTFormat.c') diff --git a/WWW/Library/Implementation/HTFormat.c b/WWW/Library/Implementation/HTFormat.c new file mode 100644 index 00000000..f25d36b3 --- /dev/null +++ b/WWW/Library/Implementation/HTFormat.c @@ -0,0 +1,836 @@ + +/* Manage different file formats HTFormat.c +** ============================= +** +** Bugs: +** Not reentrant. +** +** Assumes the incoming stream is ASCII, rather than a local file +** format, and so ALWAYS converts from ASCII on non-ASCII machines. +** Therefore, non-ASCII machines can't read local files. +** +*/ + + +#include "HTUtils.h" +#include "tcp.h" + +/* Implements: +*/ +#include "HTFormat.h" + +PUBLIC float HTMaxSecs = 1e10; /* No effective limit */ +PUBLIC float HTMaxLength = 1e10; /* No effective limit */ +PUBLIC long int HTMaxBytes = 0; /* No effective limit */ + +PUBLIC int loading_length= -1; + +#ifdef unix +#ifdef NeXT +#define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n" +#else +#define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n" + /* Full pathname would be better! */ +#endif +#endif + + +#include "HTML.h" +#include "HTMLDTD.h" +#include "HText.h" +#include "HTAlert.h" +#include "HTList.h" +#include "HTInit.h" +#include "HTTCP.h" +/* Streams and structured streams which we use: +*/ +#include "HTFWriter.h" +#include "HTPlain.h" +#include "SGML.h" +#include "HTML.h" +#include "HTMLGen.h" + +#include "LYexit.h" +#include "LYLeaks.h" + +#define FREE(x) if (x) {free(x); x = NULL;} + +extern int HTCheckForInterrupt NOPARAMS; + +PUBLIC BOOL HTOutputSource = NO; /* Flag: shortcut parser to stdout */ +/* extern BOOL interactive; LJM */ + +#ifdef ORIGINAL +struct _HTStream { + CONST HTStreamClass* isa; + /* ... */ +}; +#endif + +/* this version used by the NetToText stream */ +struct _HTStream { + CONST HTStreamClass * isa; + BOOL had_cr; + HTStream * sink; +}; + + +/* Presentation methods +** -------------------- +*/ + +PUBLIC HTList * HTPresentations = NULL; +PUBLIC HTPresentation * default_presentation = NULL; + +/* + * To free off the presentation list. + */ +PRIVATE void HTFreePresentations NOPARAMS; + +/* Define a presentation system command for a content-type +** ------------------------------------------------------- +*/ +PUBLIC void HTSetPresentation ARGS6( + CONST char *, representation, + CONST char *, command, + float, quality, + float, secs, + float, secs_per_byte, + long int, maxbytes) +{ + HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation)); + if (pres == NULL) + outofmem(__FILE__, "HTSetPresentation"); + + pres->rep = HTAtom_for(representation); + pres->rep_out = WWW_PRESENT; /* Fixed for now ... :-) */ + pres->converter = HTSaveAndExecute; /* Fixed for now ... */ + pres->quality = quality; + pres->secs = secs; + pres->secs_per_byte = secs_per_byte; + pres->maxbytes = maxbytes; + pres->command = NULL; + StrAllocCopy(pres->command, command); + + /* + * Memory leak fixed. + * 05-28-94 Lynx 2-3-1 Garrett Arch Blythe + */ + if (!HTPresentations) { + HTPresentations = HTList_new(); + atexit(HTFreePresentations); + } + + if (strcmp(representation, "*")==0) { + FREE(default_presentation); + default_presentation = pres; + } else { + HTList_addObject(HTPresentations, pres); + } +} + + +/* Define a built-in function for a content-type +** --------------------------------------------- +*/ +PUBLIC void HTSetConversion ARGS7( + CONST char *, representation_in, + CONST char *, representation_out, + HTConverter*, converter, + float, quality, + float, secs, + float, secs_per_byte, + long int, maxbytes) +{ + HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation)); + if (pres == NULL) + outofmem(__FILE__, "HTSetConversion"); + + pres->rep = HTAtom_for(representation_in); + pres->rep_out = HTAtom_for(representation_out); + pres->converter = converter; + pres->command = NULL; /* Fixed */ + pres->quality = quality; + pres->secs = secs; + pres->secs_per_byte = secs_per_byte; + pres->maxbytes = maxbytes; + pres->command = NULL; + + /* + * Memory Leak fixed. + * 05-28-94 Lynx 2-3-1 Garrett Arch Blythe + */ + if (!HTPresentations) { + HTPresentations = HTList_new(); + atexit(HTFreePresentations); + } + + HTList_addObject(HTPresentations, pres); +} + +/* +** Purpose: Free the presentation list. +** Arguments: void +** Return Value: void +** Remarks/Portability/Dependencies/Restrictions: +** Made to clean up Lynx's bad leakage. +** Revision History: +** 05-28-94 created Lynx 2-3-1 Garrett Arch Blythe +*/ +PRIVATE void HTFreePresentations NOARGS +{ + HTPresentation * pres = NULL; + + /* + * Loop through the list. + */ + while (!HTList_isEmpty(HTPresentations)) { + /* + * Free off each item. + * May also need to free off it's items, but not sure + * as of yet. + */ + pres = (HTPresentation *)HTList_removeLastObject(HTPresentations); + FREE(pres->command); + FREE(pres); + } + /* + * Free the list itself. + */ + HTList_delete(HTPresentations); + HTPresentations = NULL; +} + + +/* File buffering +** -------------- +** +** The input file is read using the macro which can read from +** a socket or a file. +** The input buffer size, if large will give greater efficiency and +** release the server faster, and if small will save space on PCs etc. +*/ +#define INPUT_BUFFER_SIZE 4096 /* Tradeoff */ +PRIVATE char input_buffer[INPUT_BUFFER_SIZE]; +PRIVATE char * input_pointer; +PRIVATE char * input_limit; +PRIVATE int input_file_number; + + +/* Set up the buffering +** +** These routines are public because they are in fact needed by +** many parsers, and on PCs and Macs we should not duplicate +** the static buffer area. +*/ +PUBLIC void HTInitInput ARGS1 (int,file_number) +{ + input_file_number = file_number; + input_pointer = input_limit = input_buffer; +} + +PUBLIC int interrupted_in_htgetcharacter = 0; +PUBLIC char HTGetCharacter NOARGS +{ + char ch; + interrupted_in_htgetcharacter = 0; + do { + if (input_pointer >= input_limit) { + int status = NETREAD( + input_file_number, input_buffer, INPUT_BUFFER_SIZE); + if (status <= 0) { + if (status == 0) return (char)EOF; + if (status == HT_INTERRUPTED) + { + if (TRACE) + fprintf (stderr, + "HTFormat: Interrupted in HTGetCharacter\n"); + interrupted_in_htgetcharacter = 1; + return (char)EOF; + } + if (TRACE) fprintf(stderr, + "HTFormat: File read error %d\n", status); + return (char)EOF; /* -1 is returned by UCX at end of HTTP link */ + } + input_pointer = input_buffer; + input_limit = input_buffer + status; + } + ch = *input_pointer++; + } while (ch == (char) 13); /* Ignore ASCII carriage return */ + + return FROMASCII(ch); +} + +/* Stream the data to an ouput file as binary +*/ +PUBLIC int HTOutputBinary ARGS2( int, input, + FILE *, output) +{ + do { + int status = NETREAD( + input, input_buffer, INPUT_BUFFER_SIZE); + if (status <= 0) { + if (status == 0) return 0; + if (TRACE) fprintf(stderr, + "HTFormat: File read error %d\n", status); + return 2; /* Error */ + } + fwrite(input_buffer, sizeof(char), status, output); + } while (YES); +} + +/* Match maintype to any MIME type starting with maintype, + * for example: image/gif should match image + */ +PRIVATE int half_match ARGS2(char *,trial_type, char *,target) +{ + char *cp=strchr(trial_type,'/'); + + /* if no '/' or no '*' */ + if(!cp || *(cp+1) != '*') + return 0; + + if(TRACE) + fprintf(stderr,"HTFormat: comparing %s and %s for half match\n", + trial_type, target); + + /* main type matches */ + if(!strncmp(trial_type, target, (cp-trial_type)-1)) + return 1; + + return 0; +} + + +/* Create a filter stack +** --------------------- +** +** If a wildcard match is made, a temporary HTPresentation +** structure is made to hold the destination format while the +** new stack is generated. This is just to pass the out format to +** MIME so far. Storing the format of a stream in the stream might +** be a lot neater. +** +*/ +PUBLIC HTStream * HTStreamStack ARGS4( + HTFormat, rep_in, + HTFormat, rep_out, + HTStream*, sink, + HTParentAnchor*, anchor) +{ + HTAtom * wildcard = HTAtom_for("*"); + + if (TRACE) fprintf(stderr, + "HTFormat: Constructing stream stack for %s to %s\n", + HTAtom_name(rep_in), + HTAtom_name(rep_out)); + + /* don't return on WWW_SOURCE some people might like + * to make use of the source!!!! LJM + */ + /* if (rep_out == WWW_SOURCE || + rep_out == rep_in) return sink; LJM */ + + if(rep_out == rep_in) return sink; + + /* don't do anymore do it in the Lynx code at startup LJM */ + /* if (!HTPresentations) HTFormatInit(); */ /* set up the list */ + + { + int n = HTList_count(HTPresentations); + int i; + HTPresentation * pres, *match, + *strong_wildcard_match=0, + *weak_wildcard_match=0, + *last_default_match=0, + *strong_subtype_wildcard_match=0; + + for(i=0; irep == rep_in) { + if (pres->rep_out == rep_out) { + if(TRACE) + fprintf(stderr,"StreamStack: found exact match: %s\n",HTAtom_name(pres->rep)); + return (*pres->converter)(pres, anchor, sink); + + } else if (pres->rep_out == wildcard) { + if(!strong_wildcard_match) + strong_wildcard_match = pres; + /* otherwise use the first one */ + if(TRACE) + fprintf(stderr,"StreamStack: found strong wildcard match: %s\n",HTAtom_name(pres->rep)); + } + + } else if(half_match(HTAtom_name(pres->rep), + HTAtom_name(rep_in))) { + + if (pres->rep_out == rep_out) { + if(!strong_subtype_wildcard_match) + strong_subtype_wildcard_match = pres; + /* otherwise use the first one */ + if(TRACE) + fprintf(stderr,"StreamStack: found strong subtype wildcard match: %s\n",HTAtom_name(pres->rep)); + } + } + + if (pres->rep == WWW_SOURCE) { + if(pres->rep_out == rep_out) { + if(!weak_wildcard_match) + weak_wildcard_match = pres; + /* otherwise use the first one */ + if(TRACE) + fprintf(stderr,"StreamStack: found weak wildcard match: %s\n",HTAtom_name(pres->rep_out)); + + } + if(pres->rep_out == wildcard) { + if(!last_default_match) + last_default_match = pres; + /* otherwise use the first one */ + } + } + } + + match = strong_subtype_wildcard_match ? strong_subtype_wildcard_match : + strong_wildcard_match ? strong_wildcard_match : + weak_wildcard_match ? weak_wildcard_match : + last_default_match; + + if (match) { + HTPresentation temp; + temp = *match; /* Specific instance */ + temp.rep = rep_in; /* yuk */ + temp.rep_out = rep_out; /* yuk */ + if(TRACE) + fprintf(stderr,"StreamStack: Using %s\n",HTAtom_name(temp.rep_out)); + return (*match->converter)(&temp, anchor, sink); + } + } + + return NULL; +} + + +/* Find the cost of a filter stack +** ------------------------------- +** +** Must return the cost of the same stack which StreamStack would set up. +** +** On entry, +** length The size of the data to be converted +*/ +PUBLIC float HTStackValue ARGS4( + HTFormat, rep_in, + HTFormat, rep_out, + float, initial_value, + long int, length) +{ + HTAtom * wildcard = HTAtom_for("*"); + + if (TRACE) fprintf(stderr, + "HTFormat: Evaluating stream stack for %s worth %.3f to %s\n", + HTAtom_name(rep_in), initial_value, + HTAtom_name(rep_out)); + + if (rep_out == WWW_SOURCE || + rep_out == rep_in) return 0.0; + + /* don't do anymore do it in the Lynx code at startup LJM */ + /* if (!HTPresentations) HTFormatInit(); */ /* set up the list */ + + { + int n = HTList_count(HTPresentations); + int i; + HTPresentation * pres; + for(i=0; irep == rep_in && ( + pres->rep_out == rep_out || + pres->rep_out == wildcard)) { + float value = initial_value * pres->quality; + if (HTMaxSecs != 0.0) + value = value - (length*pres->secs_per_byte + pres->secs) + /HTMaxSecs; + return value; + } + } + } + + return -1e30; /* Really bad */ + +} + + +/* Push data from a socket down a stream +** ------------------------------------- +** +** This routine is responsible for creating and PRESENTING any +** graphic (or other) objects described by the file. +** +** The file number given is assumed to be a TELNET stream ie containing +** CRLF at the end of lines which need to be stripped to LF for unix +** when the format is textual. +** +*/ + +PUBLIC int HTCopy ARGS3( + int, file_number, + void*, handle, + HTStream*, sink) +{ + HTStreamClass targetClass; + char line[256]; + int bytes=0; + int rv = 0; + char * msg; + + if (loading_length == -1) + msg = "Read %d bytes of data."; + else + /* We have a loading_length. */ + msg = "Read %d of %d bytes of data."; + + +/* Push the data down the stream +** +*/ + targetClass = *(sink->isa); /* Copy pointers to procedures */ + + /* Push binary from socket down sink + ** + ** This operation could be put into a main event loop + */ + for(;;) { + int status; + extern char LYCancelDownload; + + if (LYCancelDownload) { + LYCancelDownload = FALSE; + (*targetClass._abort)(sink, NULL); + rv = -1; + goto finished; + } + + if (HTCheckForInterrupt()) + { + _HTProgress ("Data transfer interrupted."); + (*targetClass._abort)(sink, NULL); + if(bytes) + rv = HT_INTERRUPTED; + else + rv = -1; + goto finished; + } + + + status = NETREAD(file_number, input_buffer, INPUT_BUFFER_SIZE); + + if (status <= 0) { + if (status == 0) + break; + else if (status == HT_INTERRUPTED) + { + _HTProgress ("Data transfer interrupted."); + (*targetClass._abort)(sink, NULL); + if(bytes) + rv = HT_INTERRUPTED; + else + rv = -1; + goto finished; + } + else if (SOCKET_ERRNO == ENOTCONN || SOCKET_ERRNO == ECONNRESET + || SOCKET_ERRNO == EPIPE) + { + /* Arrrrgh, HTTP 0/1 compability problem, maybe. */ + rv = -2; + goto finished; + } + break; + } + +#ifdef NOT_ASCII + { + char * p; + for(p = input_buffer; p < input_buffer+status; p++) { + *p = FROMASCII(*p); + } + } +#endif + + (*targetClass.put_block)(sink, input_buffer, status); + + bytes += status; + sprintf(line, msg, bytes, loading_length); + HTProgress(line); + + } /* next bufferload */ + + _HTProgress("Data transfer complete"); + NETCLOSE(file_number); + rv = HT_LOADED; + +finished: + loading_length = -1; + return(rv); + +} + + + +/* Push data from a file pointer down a stream +** ------------------------------------- +** +** This routine is responsible for creating and PRESENTING any +** graphic (or other) objects described by the file. +** +** +*/ +PUBLIC void HTFileCopy ARGS2( + FILE *, fp, + HTStream*, sink) +{ + HTStreamClass targetClass; + +/* Push the data down the stream +** +*/ + targetClass = *(sink->isa); /* Copy pointers to procedures */ + + /* Push binary from socket down sink + */ + for(;;) { + int status = fread( + input_buffer, 1, INPUT_BUFFER_SIZE, fp); + if (status == 0) { /* EOF or error */ + if (ferror(fp) == 0) break; + if (TRACE) fprintf(stderr, + "HTFormat: Read error, read returns %d\n", ferror(fp)); + break; + } + (*targetClass.put_block)(sink, input_buffer, status); + } /* next bufferload */ + +} + + + + +/* Push data from a socket down a stream STRIPPING CR +** -------------------------------------------------- +** +** This routine is responsible for creating and PRESENTING any +** graphic (or other) objects described by the socket. +** +** The file number given is assumed to be a TELNET stream ie containing +** CRLF at the end of lines which need to be stripped to LF for unix +** when the format is textual. +** +*/ +PUBLIC void HTCopyNoCR ARGS2( + int, file_number, + HTStream*, sink) +{ + HTStreamClass targetClass; + +/* Push the data, ignoring CRLF, down the stream +** +*/ + targetClass = *(sink->isa); /* Copy pointers to procedures */ + +/* Push text from telnet socket down sink +** +** @@@@@ To push strings could be faster? (especially is we +** cheat and don't ignore CR! :-} +*/ + HTInitInput(file_number); + for(;;) { + char character; + character = HTGetCharacter(); + if (character == (char)EOF) break; + (*targetClass.put_character)(sink, character); + } +} + + + +/* Parse a socket given format and file number +** +** This routine is responsible for creating and PRESENTING any +** graphic (or other) objects described by the file. +** +** The file number given is assumed to be a TELNET stream ie containing +** CRLF at the end of lines which need to be stripped to LF for unix +** when the format is textual. +** +*/ +PUBLIC int HTParseSocket ARGS5( + HTFormat, rep_in, + HTFormat, format_out, + HTParentAnchor *, anchor, + int, file_number, + HTStream*, sink) +{ + HTStream * stream; + HTStreamClass targetClass; + int rv; + extern char LYCancelDownload; + + stream = HTStreamStack(rep_in, + format_out, + sink , anchor); + + if (!stream) { + char buffer[1024]; /* @@@@@@@@ */ + if (LYCancelDownload) { + LYCancelDownload = FALSE; + return -1; + } + sprintf(buffer, "Sorry, can't convert from %s to %s.", + HTAtom_name(rep_in), HTAtom_name(format_out)); + if (TRACE) fprintf(stderr, "HTFormat: %s\n", buffer); + return HTLoadError(sink, 501, buffer); /* returns -501 */ + } + +/* +** Push the data, don't worry about CRLF we can strip them later. +*/ + targetClass = *(stream->isa); /* Copy pointers to procedures */ + rv = HTCopy(file_number, NULL, stream); + if (rv != -1 && rv != HT_INTERRUPTED) + (*targetClass._free)(stream); + + return rv; /* full: HT_LOADED; partial: HT_INTERRUPTED; no bytes: -1 */ +} + + + +/* Parse a file given format and file pointer +** +** This routine is responsible for creating and PRESENTING any +** graphic (or other) objects described by the file. +** +** The file number given is assumed to be a TELNET stream ie containing +** CRLF at the end of lines which need to be stripped to \n for unix +** when the format is textual. +** +*/ +PUBLIC int HTParseFile ARGS5( + HTFormat, rep_in, + HTFormat, format_out, + HTParentAnchor *, anchor, + FILE *, fp, + HTStream*, sink) +{ + HTStream * stream; + HTStreamClass targetClass; + + stream = HTStreamStack(rep_in, + format_out, + sink , anchor); + + if (!stream) { + char buffer[1024]; /* @@@@@@@@ */ + extern char LYCancelDownload; + if (LYCancelDownload) { + LYCancelDownload = FALSE; + return -1; + } + sprintf(buffer, "Sorry, can't convert from %s to %s.", + HTAtom_name(rep_in), HTAtom_name(format_out)); + if (TRACE) fprintf(stderr, "HTFormat(in HTParseFile): %s\n", buffer); + return HTLoadError(sink, 501, buffer); + } + +/* Push the data down the stream +** +** +** @@ Bug: This decision ought to be made based on "encoding" +** rather than on content-type. @@@ When we handle encoding. +** The current method smells anyway. +*/ + targetClass = *(stream->isa); /* Copy pointers to procedures */ + HTFileCopy(fp, stream); + (*targetClass._free)(stream); + + return HT_LOADED; +} + + +/* Converter stream: Network Telnet to internal character text +** ----------------------------------------------------------- +** +** The input is assumed to be in ASCII, with lines delimited +** by (13,10) pairs, These pairs are converted into (CR,LF) +** pairs in the local representation. The (CR,LF) sequence +** when found is changed to a '\n' character, the internal +** C representation of a new line. +*/ + + +PRIVATE void NetToText_put_character ARGS2(HTStream *, me, char, net_char) +{ + char c = FROMASCII(net_char); + if (me->had_cr) { + if (c==LF) { + me->sink->isa->put_character(me->sink, '\n'); /* Newline */ + me->had_cr = NO; + return; + } else { + me->sink->isa->put_character(me->sink, CR); /* leftover */ + } + } + me->had_cr = (c==CR); + if (!me->had_cr) + me->sink->isa->put_character(me->sink, c); /* normal */ +} + +PRIVATE void NetToText_put_string ARGS2(HTStream *, me, CONST char *, s) +{ + CONST char * p; + + for (p=s; *p; p++) + NetToText_put_character(me, *p); +} + +PRIVATE void NetToText_put_block ARGS3(HTStream *, me, CONST char*, s, int, l) +{ + CONST char * p; + for(p=s; p<(s+l); p++) NetToText_put_character(me, *p); +} + +PRIVATE void NetToText_free ARGS1(HTStream *, me) +{ + (me->sink->isa->_free)(me->sink); /* Close rest of pipe */ + FREE(me); +} + +PRIVATE void NetToText_abort ARGS2(HTStream *, me, HTError, e) +{ + me->sink->isa->_abort(me->sink,e); /* Abort rest of pipe */ + FREE(me); +} + +/* The class structure +*/ +PRIVATE HTStreamClass NetToTextClass = { + "NetToText", + NetToText_free, + NetToText_abort, + NetToText_put_character, + NetToText_put_string, + NetToText_put_block +}; + +/* The creation method +*/ +PUBLIC HTStream * HTNetToText ARGS1(HTStream *, sink) +{ + HTStream* me = (HTStream*)malloc(sizeof(*me)); + if (me == NULL) + outofmem(__FILE__, "NetToText"); + me->isa = &NetToTextClass; + + me->had_cr = NO; + me->sink = sink; + return me; +} + -- cgit 1.4.1-2-gfad0