/* Manage different file formats HTFormat.c ** ============================= ** ** Bugs: ** Not reentrant. ** ** Assumes the incoming stream is ASCII, rather than a local file ** format, and so ALWAYS converts from ASCII on non-ASCII machines. ** Therefore, non-ASCII machines can't read local files. ** */ #include "HTUtils.h" #include "tcp.h" #include "HTAccess.h" /* Implements: */ #include "HTFormat.h" PUBLIC float HTMaxSecs = 1e10; /* No effective limit */ PUBLIC float HTMaxLength = 1e10; /* No effective limit */ PUBLIC long int HTMaxBytes = 0; /* No effective limit */ #ifdef unix #ifdef NeXT #define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n" #else #define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n" /* Full pathname would be better! */ #endif /* NeXT */ #endif /* unix */ #include "HTML.h" #include "HTMLDTD.h" #include "HText.h" #include "HTAlert.h" #include "HTList.h" #include "HTInit.h" #include "HTTCP.h" /* Streams and structured streams which we use: */ #include "HTFWriter.h" #include "HTPlain.h" #include "SGML.h" #include "HTML.h" #include "HTMLGen.h" #include "LYexit.h" #include "LYLeaks.h" #define FREE(x) if (x) {free(x); x = NULL;} extern int HTCheckForInterrupt NOPARAMS; PUBLIC BOOL HTOutputSource = NO; /* Flag: shortcut parser to stdout */ /* extern BOOL interactive; LJM */ #ifdef ORIGINAL struct _HTStream { CONST HTStreamClass* isa; /* ... */ }; #endif /* ORIGINAL */ /* this version used by the NetToText stream */ struct _HTStream { CONST HTStreamClass * isa; BOOL had_cr; HTStream * sink; }; /* Presentation methods ** -------------------- */ PUBLIC HTList * HTPresentations = NULL; PUBLIC HTPresentation * default_presentation = NULL; /* * To free off the presentation list. */ PRIVATE void HTFreePresentations NOPARAMS; /* Define a presentation system command for a content-type ** ------------------------------------------------------- */ PUBLIC void HTSetPresentation ARGS6( CONST char *, representation, CONST char *, command, float, quality, float, secs, float, secs_per_byte, long int, maxbytes) { HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation)); if (pres == NULL) outofmem(__FILE__, "HTSetPresentation"); pres->rep = HTAtom_for(representation); pres->rep_out = WWW_PRESENT; /* Fixed for now ... :-) */ pres->converter = HTSaveAndExecute; /* Fixed for now ... */ pres->quality = quality; pres->secs = secs; pres->secs_per_byte = secs_per_byte; pres->maxbytes = maxbytes; pres->command = NULL; StrAllocCopy(pres->command, command); /* * Memory leak fixed. * 05-28-94 Lynx 2-3-1 Garrett Arch Blythe */ if (!HTPresentations) { HTPresentations = HTList_new(); atexit(HTFreePresentations); } if (strcmp(representation, "*")==0) { FREE(default_presentation); default_presentation = pres; } else { HTList_addObject(HTPresentations, pres); } } /* Define a built-in function for a content-type ** --------------------------------------------- */ PUBLIC void HTSetConversion ARGS7( CONST char *, representation_in, CONST char *, representation_out, HTConverter*, converter, float, quality, float, secs, float, secs_per_byte, long int, maxbytes) { HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation)); if (pres == NULL) outofmem(__FILE__, "HTSetConversion"); pres->rep = HTAtom_for(representation_in); pres->rep_out = HTAtom_for(representation_out); pres->converter = converter; pres->command = NULL; /* Fixed */ pres->quality = quality; pres->secs = secs; pres->secs_per_byte = secs_per_byte; pres->maxbytes = maxbytes; pres->command = NULL; /* * Memory Leak fixed. * 05-28-94 Lynx 2-3-1 Garrett Arch Blythe */ if (!HTPresentations) { HTPresentations = HTList_new(); atexit(HTFreePresentations); } HTList_addObject(HTPresentations, pres); } /* ** Purpose: Free the presentation list. ** Arguments: void ** Return Value: void ** Remarks/Portability/Dependencies/Restrictions: ** Made to clean up Lynx's bad leakage. ** Revision History: ** 05-28-94 created Lynx 2-3-1 Garrett Arch Blythe */ PRIVATE void HTFreePresentations NOARGS { HTPresentation * pres = NULL; /* * Loop through the list. */ while (!HTList_isEmpty(HTPresentations)) { /* * Free off each item. * May also need to free off it's items, but not sure * as of yet. */ pres = (HTPresentation *)HTList_removeLastObject(HTPresentations); FREE(pres->command); FREE(pres); } /* * Free the list itself. */ HTList_delete(HTPresentations); HTPresentations = NULL; } /* File buffering ** -------------- ** ** The input file is read using the macro which can read from ** a socket or a file. ** The input buffer size, if large will give greater efficiency and ** release the server faster, and if small will save space on PCs etc. */ #define INPUT_BUFFER_SIZE 4096 /* Tradeoff */ PRIVATE char input_buffer[INPUT_BUFFER_SIZE]; PRIVATE char * input_pointer; PRIVATE char * input_limit; PRIVATE int input_file_number; /* Set up the buffering ** ** These routines are public because they are in fact needed by ** many parsers, and on PCs and Macs we should not duplicate ** the static buffer area. */ PUBLIC void HTInitInput ARGS1 (int,file_number) { input_file_number = file_number; input_pointer = input_limit = input_buffer; } PUBLIC int interrupted_in_htgetcharacter = 0; PUBLIC char HTGetCharacter NOARGS { char ch; interrupted_in_htgetcharacter = 0; do { if (input_pointer >= input_limit) { int status = NETREAD(input_file_number, input_buffer, INPUT_BUFFER_SIZE); if (status <= 0) { if (status == 0) return (char)EOF; if (status == HT_INTERRUPTED) { if (TRACE) fprintf(stderr, "HTFormat: Interrupted in HTGetCharacter\n"); interrupted_in_htgetcharacter = 1; return (char)EOF; } if (TRACE) fprintf(stderr, "HTFormat: File read error %d\n", status); return (char)EOF; /* -1 is returned by UCX at end of HTTP link */ } input_pointer = input_buffer; input_limit = input_buffer + status; } ch = *input_pointer++; } while (ch == (char) 13); /* Ignore ASCII carriage return */ return FROMASCII(ch); } /* Stream the data to an ouput file as binary */ PUBLIC int HTOutputBinary ARGS2( int, input, FILE *, output) { do { int status = NETREAD(input, input_buffer, INPUT_BUFFER_SIZE); if (status <= 0) { if (status == 0) return 0; if (TRACE) fprintf(stderr, "HTFormat: File read error %d\n", status); return 2; /* Error */ } fwrite(input_buffer, sizeof(char), status, output); } while (YES); } /* Match maintype to any MIME type starting with maintype, * for example: image/gif should match image */ PRIVATE int half_match ARGS2(char *,trial_type, char *,target) { char *cp=strchr(trial_type,'/'); /* if no '/' or no '*' */ if (!cp || *(cp+1) != '*') return 0; if (TRACE) fprintf(stderr,"HTFormat: comparing %s and %s for half match\n", trial_type, target); /* main type matches */ if (!strncmp(trial_type, target, (cp-trial_type)-1)) return 1; return 0; } /* Create a filter stack ** --------------------- ** ** If a wildcard match is made, a temporary HTPresentation ** structure is made to hold the destination format while the ** new stack is generated. This is just to pass the out format to ** MIME so far. Storing the format of a stream in the stream might ** be a lot neater. ** */ PUBLIC HTStream * HTStreamStack ARGS4( HTFormat, rep_in, HTFormat, rep_out, HTStream*, sink, HTParentAnchor*, anchor) { HTAtom * wildcard = HTAtom_for("*"); if (TRACE) fprintf(stderr, "HTFormat: Constructing stream stack for %s to %s\n", HTAtom_name(rep_in), HTAtom_name(rep_out)); /* don't return on WWW_SOURCE some people might like * to make use of the source!!!! LJM *//* if (rep_out == WWW_SOURCE || rep_out == rep_in) return sink; LJM */ if (rep_out == rep_in) return sink; /* don't do anymore do it in the Lynx code at startup LJM */ /* if (!HTPresentations) HTFormatInit(); */ /* set up the list */ { int n = HTList_count(HTPresentations); int i; HTPresentation * pres, *match, *strong_wildcard_match=0, *weak_wildcard_match=0, *last_default_match=0, *strong_subtype_wildcard_match=0; for (i = 0; i < n; i++) { pres = (HTPresentation *)HTList_objectAt(HTPresentations, i); if (pres->rep == rep_in) { if (pres->rep_out == rep_out) { if (TRACE) fprintf(stderr, "StreamStack: found exact match: %s\n", HTAtom_name(pres->rep)); return (*pres->converter)(pres, anchor, sink); } else if (pres->rep_out == wildcard) { if (!strong_wildcard_match) strong_wildcard_match = pres; /* otherwise use the first one */ if (TRACE) fprintf(stderr, "StreamStack: found strong wildcard match: %s\n", HTAtom_name(pres->rep)); } } else if (half_match(HTAtom_name(pres->rep), HTAtom_name(rep_in))) { if (pres->rep_out == rep_out) { if (!strong_subtype_wildcard_match) strong_subtype_wildcard_match = pres; /* otherwise use the first one */ if (TRACE) fprintf(stderr, "StreamStack: found strong subtype wildcard match: %s\n", HTAtom_name(pres->rep)); } } if (pres->rep == WWW_SOURCE) { if (pres->rep_out == rep_out) { if (!weak_wildcard_match) weak_wildcard_match = pres; /* otherwise use the first one */ if (TRACE) fprintf(stderr, "StreamStack: found weak wildcard match: %s\n", HTAtom_name(pres->rep_out)); } if (pres->rep_out == wildcard) { if (!last_default_match) last_default_match = pres; /* otherwise use the first one */ } } } match = strong_subtype_wildcard_match ? strong_subtype_wildcard_match : strong_wildcard_match ? strong_wildcard_match : weak_wildcard_match ? weak_wildcard_match : last_default_match; if (match) { HTPresentation temp; temp = *match; /* Specific instance */ temp.rep = rep_in; /* yuk */ temp.rep_out = rep_out; /* yuk */ if (TRACE) fprintf(stderr, "StreamStack: Using %s\n", HTAtom_name(temp.rep_out)); return (*match->converter)(&temp, anchor, sink); } } return NULL; } /* Find the cost of a filter stack ** ------------------------------- ** ** Must return the cost of the same stack which StreamStack would set up. ** ** On entry, ** length The size of the data to be converted */ PUBLIC float HTStackValue ARGS4( HTFormat, rep_in, HTFormat, rep_out, float, initial_value, long int, length) { HTAtom * wildcard = HTAtom_for("*"); if (TRACE) fprintf(stderr, "HTFormat: Evaluating stream stack for %s worth %.3f to %s\n", HTAtom_name(rep_in), initial_value, HTAtom_name(rep_out)); if (rep_out == WWW_SOURCE || rep_out == rep_in) return 0.0; /* don't do anymore do it in the Lynx code at startup LJM */ /* if (!HTPresentations) HTFormatInit(); */ /* set up the list */ { int n = HTList_count(HTPresentations); int i; HTPresentation * pres; for (i = 0; i < n; i++) { pres = (HTPresentation *)HTList_objectAt(HTPresentations, i); if (pres->rep == rep_in && (pres->rep_out == rep_out || pres->rep_out == wildcard)) { float value = initial_value * pres->quality; if (HTMaxSecs != 0.0) value = value - (length*pres->secs_per_byte + pres->secs) /HTMaxSecs; return value; } } } return -1e30; /* Really bad */ } /* Push data from a socket down a stream ** ------------------------------------- ** ** This routine is responsible for creating and PRESENTING any ** graphic (or other) objects described by the file. ** ** The file number given is assumed to be a TELNET stream ie containing ** CRLF at the end of lines which need to be stripped to LF for unix ** when the format is textual. ** */ PUBLIC int HTCopy ARGS4( HTParentAnchor *, anchor, int, file_number, void*, handle, HTStream*, sink) { HTStreamClass targetClass; char line[256]; int bytes = 0; int rv = 0; /* Push the data down the stream */ targetClass = *(sink->isa); /* Copy pointers to procedures */ /* Push binary from socket down sink ** ** This operation could be put into a main event loop */ for (;;) { int status; extern char LYCancelDownload; if (LYCancelDownload) { LYCancelDownload = FALSE; (*targetClass._abort)(sink, NULL); rv = -1; goto finished; } if (HTCheckForInterrupt()) { _HTProgress ("Data transfer interrupted."); (*targetClass._abort)(sink, NULL); if (bytes) rv = HT_INTERRUPTED; else rv = -1; goto finished; } status = NETREAD(file_number, input_buffer, INPUT_BUFFER_SIZE); if (status <= 0) { if (status == 0) { break; } else if (status == HT_INTERRUPTED) { _HTProgress ("Data transfer interrupted."); (*targetClass._abort)(sink, NULL); if (bytes) rv = HT_INTERRUPTED; else rv = -1; goto finished; } else if (SOCKET_ERRNO == ENOTCONN || SOCKET_ERRNO == ECONNRESET || SOCKET_ERRNO == EPIPE) { /* * Arrrrgh, HTTP 0/1 compability problem, maybe. */ if (bytes <= 0) { /* * Don't have any data, so let the calling * function decide what to do about it. - FM */ rv = -2; goto finished; } else { /* * Treat what we've gotten already * as the complete transmission. - FM */ if (TRACE) fprintf(stderr, "HTCopy: Unexpected server disconnect. Treating as completed.\n"); status = 0; break; } } break; } #ifdef NOT_ASCII { char * p; for (p = input_buffer; p < input_buffer+status; p++) { *p = FROMASCII(*p); } } #endif /* NOT_ASCII */ (*targetClass.put_block)(sink, input_buffer, status); bytes += status; if (anchor && anchor->content_length > 0) sprintf(line, "Read %d of %d bytes of data.", bytes, anchor->content_length); else sprintf(line, "Read %d bytes of data.", bytes); HTProgress(line); } /* next bufferload */ _HTProgress("Data transfer complete"); (void)NETCLOSE(file_number); rv = HT_LOADED; finished: return(rv); } /* Push data from a file pointer down a stream ** ------------------------------------- ** ** This routine is responsible for creating and PRESENTING any ** graphic (or other) objects described by the file. ** ** */ PUBLIC int HTFileCopy ARGS2( FILE *, fp, HTStream*, sink) { HTStreamClass targetClass; char line[256]; int status, bytes = 0, nreads = 0, nprogr = 0; int rv = HT_OK; /* Push the data down the stream */ targetClass = *(sink->isa); /* Copy pointers to procedures */ /* Push binary from socket down sink */ for (;;) { status = fread(input_buffer, 1, INPUT_BUFFER_SIZE, fp); nreads++; if (status == 0) { /* EOF or error */ if (ferror(fp) == 0) { rv = HT_LOADED; break; } if (TRACE) fprintf(stderr, "HTFormat: Read error, read returns %d\n", ferror(fp)); if (bytes) { rv = HT_PARTIAL_CONTENT; } else { rv = -1; } break; } (*targetClass.put_block)(sink, input_buffer, status); bytes += status; if (nreads >= 100) { /* ** Show progress messages for local files, and check for ** user interruption. Start doing so only after a certain ** number of reads have been done, and don't update it on ** every read (normally reading in a local file should be ** speedy). - KW */ if (nprogr == 0) { if (bytes < 1024000) { sprintf(line, "Read %d bytes of data.", bytes); } else { sprintf(line, "Read %d KB of data. %s", bytes/1024, "(Press 'z' if you want to abort loading.)"); } HTProgress(line); if (HTCheckForInterrupt()) { _HTProgress ("Data transfer interrupted."); if (bytes) { rv = HT_INTERRUPTED; } else { rv = -1; } break; } nprogr++; } else if (nprogr == 25) { nprogr = 0; } else { nprogr++; } } } /* next bufferload */ return rv; } /* Push data from a socket down a stream STRIPPING CR ** -------------------------------------------------- ** ** This routine is responsible for creating and PRESENTING any ** graphic (or other) objects described by the socket. ** ** The file number given is assumed to be a TELNET stream ie containing ** CRLF at the end of lines which need to be stripped to LF for unix ** when the format is textual. ** */ PUBLIC void HTCopyNoCR ARGS3( HTParentAnchor *, anchor, int, file_number, HTStream*, sink) { HTStreamClass targetClass; char character; /* Push the data, ignoring CRLF, down the stream */ targetClass = *(sink->isa); /* Copy pointers to procedures */ /* Push text from telnet socket down sink ** ** @@@@@ To push strings could be faster? (especially is we ** cheat and don't ignore CR! :-} */ HTInitInput(file_number); for (;;) { character = HTGetCharacter(); if (character == (char)EOF) break; (*targetClass.put_character)(sink, character); } } /* Parse a socket given format and file number ** ** This routine is responsible for creating and PRESENTING any ** graphic (or other) objects described by the file. ** ** The file number given is assumed to be a TELNET stream ie containing ** CRLF at the end of lines which need to be stripped to LF for unix ** when the format is textual. ** */ PUBLIC int HTParseSocket ARGS5( HTFormat, rep_in, HTFormat, format_out, HTParentAnchor *, anchor, int, file_number, HTStream*, sink) { HTStream * stream; HTStreamClass targetClass; int rv; extern char LYCancelDownload; stream = HTStreamStack(rep_in, format_out, sink, anchor); if (!stream) { char buffer[1024]; /* @@@@@@@@ */ if (LYCancelDownload) { LYCancelDownload = FALSE; return -1; } sprintf(buffer, "Sorry, can't convert from %s to %s.", HTAtom_name(rep_in), HTAtom_name(format_out)); if (TRACE) fprintf(stderr, "HTFormat: %s\n", buffer); return HTLoadError(sink, 501, buffer); /* returns -501 */ } /* ** Push the data, don't worry about CRLF we can strip them later. */ targetClass = *(stream->isa); /* Copy pointers to procedures */ rv = HTCopy(anchor, file_number, NULL, stream); if (rv != -1 && rv != HT_INTERRUPTED) (*targetClass._free)(stream); return rv; /* full: HT_LOADED; partial: HT_INTERRUPTED; no bytes: -1 */ } /* Parse a file given format and file pointer ** ** This routine is responsible for creating and PRESENTING any ** graphic (or other) objects described by the file. ** ** The file number given is assumed to be a TELNET stream ie containing ** CRLF at the end of lines which need to be stripped to \n for unix ** when the format is textual. ** */ PUBLIC int HTParseFile ARGS5( HTFormat, rep_in, HTFormat, format_out, HTParentAnchor *, anchor, FILE *, fp, HTStream*, sink) { HTStream * stream; HTStreamClass targetClass; int rv; stream = HTStreamStack(rep_in, format_out, sink , anchor); if (!stream) { char buffer[1024]; /* @@@@@@@@ */ extern char LYCancelDownload; if (LYCancelDownload) { LYCancelDownload = FALSE; return -1; } sprintf(buffer, "Sorry, can't convert from %s to %s.", HTAtom_name(rep_in), HTAtom_name(format_out)); if (TRACE) fprintf(stderr, "HTFormat(in HTParseFile): %s\n", buffer); return HTLoadError(sink, 501, buffer); } /* Push the data down the stream ** ** @@ Bug: This decision ought to be made based on "encoding" ** rather than on content-type. @@@ When we handle encoding. ** The current method smells anyway. */ targetClass = *(stream->isa); /* Copy pointers to procedures */ rv = HTFileCopy(fp, stream); if (rv == -1 || rv == HT_INTERRUPTED) { (*targetClass._abort)(stream, NULL); } else { (*targetClass._free)(stream); } if (rv == -1) return HT_NO_DATA; else if (rv == HT_INTERRUPTED || (rv > 0 && rv != HT_LOADED)) return HT_PARTIAL_CONTENT; else return HT_LOADED; } /* Converter stream: Network Telnet to internal character text ** ----------------------------------------------------------- ** ** The input is assumed to be in ASCII, with lines delimited ** by (13,10) pairs, These pairs are converted into (CR,LF) ** pairs in the local representation. The (CR,LF) sequence ** when found is changed to a '\n' character, the internal ** C representation of a new line. */ PRIVATE void NetToText_put_character ARGS2(HTStream *, me, char, net_char) { char c = FROMASCII(net_char); if (me->had_cr) { if (c == LF) { me->sink->isa->put_character(me->sink, '\n'); /* Newline */ me->had_cr = NO; return; } else { me->sink->isa->put_character(me->sink, CR); /* leftover */ } } me->had_cr = (c == CR); if (!me->had_cr) me->sink->isa->put_character(me->sink, c); /* normal */ } PRIVATE void NetToText_put_string ARGS2(HTStream *, me, CONST char *, s) { CONST char * p; for (p = s; *p; p++) NetToText_put_character(me, *p); } PRIVATE void NetToText_put_block ARGS3(HTStream *, me, CONST char*, s, int, l) { CONST char * p; for (p = s; p < (s+l); p++) NetToText_put_character(me, *p); } PRIVATE void NetToText_free ARGS1(HTStream *, me) { (me->sink->isa->_free)(me->sink); /* Close rest of pipe */ FREE(me); } PRIVATE void NetToText_abort ARGS2(HTStream *, me, HTError, e) { me->sink->isa->_abort(me->sink,e); /* Abort rest of pipe */ FREE(me); } /* The class structure */ PRIVATE HTStreamClass NetToTextClass = { "NetToText", NetToText_free, NetToText_abort, NetToText_put_character, NetToText_put_string, NetToText_put_block }; /* The creation method */ PUBLIC HTStream * HTNetToText ARGS1(HTStream *, sink) { HTStream* me = (HTStream*)malloc(sizeof(*me)); if (me == NULL) outofmem(__FILE__, "NetToText"); me->isa = &NetToTextClass; me->had_cr = NO; me->sink = sink; return me; }